{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999830999721149, "eval_steps": 61000, "global_step": 118342, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001690002788504601, "grad_norm": 8.151397705078125, "learning_rate": 8.449514152936206e-09, "loss": 0.5921, "step": 10 }, { "epoch": 0.0003380005577009202, "grad_norm": 11.88992977142334, "learning_rate": 1.6899028305872412e-08, "loss": 0.6201, "step": 20 }, { "epoch": 0.0005070008365513803, "grad_norm": 12.493903160095215, "learning_rate": 2.5348542458808618e-08, "loss": 0.627, "step": 30 }, { "epoch": 0.0006760011154018404, "grad_norm": 9.884626388549805, "learning_rate": 3.3798056611744824e-08, "loss": 0.6306, "step": 40 }, { "epoch": 0.0008450013942523005, "grad_norm": 7.7189764976501465, "learning_rate": 4.224757076468103e-08, "loss": 0.639, "step": 50 }, { "epoch": 0.0010140016731027606, "grad_norm": 9.185622215270996, "learning_rate": 5.0697084917617236e-08, "loss": 0.6557, "step": 60 }, { "epoch": 0.0011830019519532208, "grad_norm": 8.327007293701172, "learning_rate": 5.9146599070553445e-08, "loss": 0.617, "step": 70 }, { "epoch": 0.0013520022308036808, "grad_norm": 7.37695837020874, "learning_rate": 6.759611322348965e-08, "loss": 0.626, "step": 80 }, { "epoch": 0.001521002509654141, "grad_norm": 7.518730640411377, "learning_rate": 7.604562737642586e-08, "loss": 0.6532, "step": 90 }, { "epoch": 0.001690002788504601, "grad_norm": 7.305288791656494, "learning_rate": 8.449514152936207e-08, "loss": 0.5994, "step": 100 }, { "epoch": 0.0018590030673550611, "grad_norm": 8.757489204406738, "learning_rate": 9.294465568229827e-08, "loss": 0.6068, "step": 110 }, { "epoch": 0.0020280033462055213, "grad_norm": 10.401308059692383, "learning_rate": 1.0139416983523447e-07, "loss": 0.5565, "step": 120 }, { "epoch": 0.0021970036250559813, "grad_norm": 7.546243190765381, "learning_rate": 1.0984368398817069e-07, "loss": 0.5529, "step": 130 }, { "epoch": 0.0023660039039064417, "grad_norm": 5.31623649597168, "learning_rate": 1.1829319814110689e-07, "loss": 0.5421, "step": 140 }, { "epoch": 0.0025350041827569016, "grad_norm": 4.31443452835083, "learning_rate": 1.267427122940431e-07, "loss": 0.4825, "step": 150 }, { "epoch": 0.0027040044616073616, "grad_norm": 4.859823703765869, "learning_rate": 1.351922264469793e-07, "loss": 0.478, "step": 160 }, { "epoch": 0.0028730047404578215, "grad_norm": 4.5274834632873535, "learning_rate": 1.4364174059991553e-07, "loss": 0.446, "step": 170 }, { "epoch": 0.003042005019308282, "grad_norm": 3.7420647144317627, "learning_rate": 1.5209125475285173e-07, "loss": 0.4171, "step": 180 }, { "epoch": 0.003211005298158742, "grad_norm": 4.145787239074707, "learning_rate": 1.6054076890578793e-07, "loss": 0.3593, "step": 190 }, { "epoch": 0.003380005577009202, "grad_norm": 3.9338839054107666, "learning_rate": 1.6899028305872413e-07, "loss": 0.3165, "step": 200 }, { "epoch": 0.0035490058558596623, "grad_norm": 2.608560800552368, "learning_rate": 1.7743979721166034e-07, "loss": 0.2655, "step": 210 }, { "epoch": 0.0037180061347101222, "grad_norm": 2.9996230602264404, "learning_rate": 1.8588931136459654e-07, "loss": 0.2363, "step": 220 }, { "epoch": 0.003887006413560582, "grad_norm": 3.015291452407837, "learning_rate": 1.9433882551753277e-07, "loss": 0.222, "step": 230 }, { "epoch": 0.004056006692411043, "grad_norm": 2.3702433109283447, "learning_rate": 2.0278833967046894e-07, "loss": 0.205, "step": 240 }, { "epoch": 0.0042250069712615025, "grad_norm": 2.180356740951538, "learning_rate": 2.1123785382340517e-07, "loss": 0.1839, "step": 250 }, { "epoch": 0.0043940072501119625, "grad_norm": 2.714348793029785, "learning_rate": 2.1968736797634138e-07, "loss": 0.1725, "step": 260 }, { "epoch": 0.0045630075289624225, "grad_norm": 3.1054859161376953, "learning_rate": 2.281368821292776e-07, "loss": 0.1611, "step": 270 }, { "epoch": 0.004732007807812883, "grad_norm": 2.3040058612823486, "learning_rate": 2.3658639628221378e-07, "loss": 0.1358, "step": 280 }, { "epoch": 0.004901008086663343, "grad_norm": 2.4823195934295654, "learning_rate": 2.4503591043515e-07, "loss": 0.1308, "step": 290 }, { "epoch": 0.005070008365513803, "grad_norm": 2.2402455806732178, "learning_rate": 2.534854245880862e-07, "loss": 0.1192, "step": 300 }, { "epoch": 0.005239008644364263, "grad_norm": 1.8717119693756104, "learning_rate": 2.619349387410224e-07, "loss": 0.1085, "step": 310 }, { "epoch": 0.005408008923214723, "grad_norm": 1.9309443235397339, "learning_rate": 2.703844528939586e-07, "loss": 0.1231, "step": 320 }, { "epoch": 0.005577009202065183, "grad_norm": 1.9466526508331299, "learning_rate": 2.788339670468948e-07, "loss": 0.1119, "step": 330 }, { "epoch": 0.005746009480915643, "grad_norm": 2.3317859172821045, "learning_rate": 2.8728348119983105e-07, "loss": 0.1019, "step": 340 }, { "epoch": 0.005915009759766104, "grad_norm": 1.9998661279678345, "learning_rate": 2.957329953527673e-07, "loss": 0.0835, "step": 350 }, { "epoch": 0.006084010038616564, "grad_norm": 1.7425237894058228, "learning_rate": 3.0418250950570346e-07, "loss": 0.0885, "step": 360 }, { "epoch": 0.006253010317467024, "grad_norm": 2.8006510734558105, "learning_rate": 3.1263202365863963e-07, "loss": 0.0907, "step": 370 }, { "epoch": 0.006422010596317484, "grad_norm": 1.7340335845947266, "learning_rate": 3.2108153781157586e-07, "loss": 0.0846, "step": 380 }, { "epoch": 0.006591010875167944, "grad_norm": 1.3293828964233398, "learning_rate": 3.295310519645121e-07, "loss": 0.0819, "step": 390 }, { "epoch": 0.006760011154018404, "grad_norm": 1.7171714305877686, "learning_rate": 3.3798056611744827e-07, "loss": 0.0764, "step": 400 }, { "epoch": 0.0069290114328688646, "grad_norm": 1.758617639541626, "learning_rate": 3.4643008027038444e-07, "loss": 0.0693, "step": 410 }, { "epoch": 0.0070980117117193245, "grad_norm": 1.6397101879119873, "learning_rate": 3.5487959442332067e-07, "loss": 0.0707, "step": 420 }, { "epoch": 0.0072670119905697845, "grad_norm": 1.5723251104354858, "learning_rate": 3.633291085762569e-07, "loss": 0.065, "step": 430 }, { "epoch": 0.0074360122694202445, "grad_norm": 1.4874505996704102, "learning_rate": 3.717786227291931e-07, "loss": 0.0717, "step": 440 }, { "epoch": 0.007605012548270704, "grad_norm": 1.5094786882400513, "learning_rate": 3.802281368821293e-07, "loss": 0.0725, "step": 450 }, { "epoch": 0.007774012827121164, "grad_norm": 1.599475622177124, "learning_rate": 3.8867765103506554e-07, "loss": 0.0604, "step": 460 }, { "epoch": 0.007943013105971624, "grad_norm": 1.5444916486740112, "learning_rate": 3.9712716518800176e-07, "loss": 0.0615, "step": 470 }, { "epoch": 0.008112013384822085, "grad_norm": 1.5351428985595703, "learning_rate": 4.055766793409379e-07, "loss": 0.0579, "step": 480 }, { "epoch": 0.008281013663672544, "grad_norm": 2.069044351577759, "learning_rate": 4.140261934938741e-07, "loss": 0.0635, "step": 490 }, { "epoch": 0.008450013942523005, "grad_norm": 1.7014342546463013, "learning_rate": 4.2247570764681035e-07, "loss": 0.0567, "step": 500 }, { "epoch": 0.008619014221373466, "grad_norm": 1.249957799911499, "learning_rate": 4.309252217997466e-07, "loss": 0.0519, "step": 510 }, { "epoch": 0.008788014500223925, "grad_norm": 1.6389652490615845, "learning_rate": 4.3937473595268275e-07, "loss": 0.0501, "step": 520 }, { "epoch": 0.008957014779074386, "grad_norm": 1.3542331457138062, "learning_rate": 4.47824250105619e-07, "loss": 0.0532, "step": 530 }, { "epoch": 0.009126015057924845, "grad_norm": 1.6643306016921997, "learning_rate": 4.562737642585552e-07, "loss": 0.0481, "step": 540 }, { "epoch": 0.009295015336775306, "grad_norm": 2.3514788150787354, "learning_rate": 4.647232784114914e-07, "loss": 0.0563, "step": 550 }, { "epoch": 0.009464015615625767, "grad_norm": 1.8567042350769043, "learning_rate": 4.7317279256442756e-07, "loss": 0.0638, "step": 560 }, { "epoch": 0.009633015894476226, "grad_norm": 1.4401603937149048, "learning_rate": 4.816223067173637e-07, "loss": 0.051, "step": 570 }, { "epoch": 0.009802016173326687, "grad_norm": 1.580834984779358, "learning_rate": 4.900718208703e-07, "loss": 0.0394, "step": 580 }, { "epoch": 0.009971016452177146, "grad_norm": 1.2436542510986328, "learning_rate": 4.985213350232362e-07, "loss": 0.0534, "step": 590 }, { "epoch": 0.010140016731027606, "grad_norm": 1.244551420211792, "learning_rate": 5.069708491761724e-07, "loss": 0.0309, "step": 600 }, { "epoch": 0.010309017009878066, "grad_norm": 1.8506207466125488, "learning_rate": 5.154203633291087e-07, "loss": 0.0369, "step": 610 }, { "epoch": 0.010478017288728526, "grad_norm": 1.8189890384674072, "learning_rate": 5.238698774820448e-07, "loss": 0.0495, "step": 620 }, { "epoch": 0.010647017567578987, "grad_norm": 2.2815606594085693, "learning_rate": 5.32319391634981e-07, "loss": 0.051, "step": 630 }, { "epoch": 0.010816017846429446, "grad_norm": 1.1429193019866943, "learning_rate": 5.407689057879172e-07, "loss": 0.0381, "step": 640 }, { "epoch": 0.010985018125279907, "grad_norm": 1.6302450895309448, "learning_rate": 5.492184199408535e-07, "loss": 0.0389, "step": 650 }, { "epoch": 0.011154018404130366, "grad_norm": 1.1284207105636597, "learning_rate": 5.576679340937896e-07, "loss": 0.0418, "step": 660 }, { "epoch": 0.011323018682980827, "grad_norm": 1.0358737707138062, "learning_rate": 5.661174482467258e-07, "loss": 0.044, "step": 670 }, { "epoch": 0.011492018961831286, "grad_norm": 1.4662261009216309, "learning_rate": 5.745669623996621e-07, "loss": 0.0362, "step": 680 }, { "epoch": 0.011661019240681747, "grad_norm": 1.567488193511963, "learning_rate": 5.830164765525983e-07, "loss": 0.0455, "step": 690 }, { "epoch": 0.011830019519532208, "grad_norm": 1.529253363609314, "learning_rate": 5.914659907055346e-07, "loss": 0.0323, "step": 700 }, { "epoch": 0.011999019798382667, "grad_norm": 1.3436548709869385, "learning_rate": 5.999155048584706e-07, "loss": 0.0418, "step": 710 }, { "epoch": 0.012168020077233128, "grad_norm": 1.8246670961380005, "learning_rate": 6.083650190114069e-07, "loss": 0.0298, "step": 720 }, { "epoch": 0.012337020356083587, "grad_norm": 1.7654868364334106, "learning_rate": 6.168145331643431e-07, "loss": 0.0358, "step": 730 }, { "epoch": 0.012506020634934048, "grad_norm": 1.6966760158538818, "learning_rate": 6.252640473172793e-07, "loss": 0.0456, "step": 740 }, { "epoch": 0.012675020913784509, "grad_norm": 1.1358875036239624, "learning_rate": 6.337135614702155e-07, "loss": 0.0345, "step": 750 }, { "epoch": 0.012844021192634968, "grad_norm": 1.6618404388427734, "learning_rate": 6.421630756231517e-07, "loss": 0.0372, "step": 760 }, { "epoch": 0.013013021471485428, "grad_norm": 0.9701797366142273, "learning_rate": 6.506125897760879e-07, "loss": 0.0325, "step": 770 }, { "epoch": 0.013182021750335888, "grad_norm": 0.8507864475250244, "learning_rate": 6.590621039290242e-07, "loss": 0.0358, "step": 780 }, { "epoch": 0.013351022029186348, "grad_norm": 0.8990994095802307, "learning_rate": 6.675116180819604e-07, "loss": 0.0385, "step": 790 }, { "epoch": 0.013520022308036807, "grad_norm": 1.6713075637817383, "learning_rate": 6.759611322348965e-07, "loss": 0.0317, "step": 800 }, { "epoch": 0.013689022586887268, "grad_norm": 1.2208130359649658, "learning_rate": 6.844106463878328e-07, "loss": 0.0358, "step": 810 }, { "epoch": 0.013858022865737729, "grad_norm": 1.2064299583435059, "learning_rate": 6.928601605407689e-07, "loss": 0.0361, "step": 820 }, { "epoch": 0.014027023144588188, "grad_norm": 1.4440975189208984, "learning_rate": 7.013096746937053e-07, "loss": 0.0254, "step": 830 }, { "epoch": 0.014196023423438649, "grad_norm": 1.4820549488067627, "learning_rate": 7.097591888466413e-07, "loss": 0.0301, "step": 840 }, { "epoch": 0.014365023702289108, "grad_norm": 1.100009799003601, "learning_rate": 7.182087029995775e-07, "loss": 0.0288, "step": 850 }, { "epoch": 0.014534023981139569, "grad_norm": 1.205920696258545, "learning_rate": 7.266582171525138e-07, "loss": 0.0257, "step": 860 }, { "epoch": 0.01470302425999003, "grad_norm": 0.9258947372436523, "learning_rate": 7.3510773130545e-07, "loss": 0.0316, "step": 870 }, { "epoch": 0.014872024538840489, "grad_norm": 1.1105785369873047, "learning_rate": 7.435572454583862e-07, "loss": 0.0283, "step": 880 }, { "epoch": 0.01504102481769095, "grad_norm": 2.2304327487945557, "learning_rate": 7.520067596113224e-07, "loss": 0.0343, "step": 890 }, { "epoch": 0.015210025096541409, "grad_norm": 1.0406194925308228, "learning_rate": 7.604562737642586e-07, "loss": 0.0338, "step": 900 }, { "epoch": 0.01537902537539187, "grad_norm": 1.505669355392456, "learning_rate": 7.689057879171949e-07, "loss": 0.0237, "step": 910 }, { "epoch": 0.015548025654242329, "grad_norm": 1.3921010494232178, "learning_rate": 7.773553020701311e-07, "loss": 0.0349, "step": 920 }, { "epoch": 0.01571702593309279, "grad_norm": 0.5777738690376282, "learning_rate": 7.858048162230672e-07, "loss": 0.0275, "step": 930 }, { "epoch": 0.01588602621194325, "grad_norm": 0.9491533637046814, "learning_rate": 7.942543303760035e-07, "loss": 0.0269, "step": 940 }, { "epoch": 0.01605502649079371, "grad_norm": 2.254439353942871, "learning_rate": 8.027038445289396e-07, "loss": 0.0278, "step": 950 }, { "epoch": 0.01622402676964417, "grad_norm": 0.8885716795921326, "learning_rate": 8.111533586818758e-07, "loss": 0.024, "step": 960 }, { "epoch": 0.01639302704849463, "grad_norm": 1.567550539970398, "learning_rate": 8.196028728348121e-07, "loss": 0.0286, "step": 970 }, { "epoch": 0.01656202732734509, "grad_norm": 0.8156539797782898, "learning_rate": 8.280523869877482e-07, "loss": 0.0259, "step": 980 }, { "epoch": 0.01673102760619555, "grad_norm": 0.7024825811386108, "learning_rate": 8.365019011406844e-07, "loss": 0.0215, "step": 990 }, { "epoch": 0.01690002788504601, "grad_norm": 1.4939314126968384, "learning_rate": 8.449514152936207e-07, "loss": 0.0261, "step": 1000 }, { "epoch": 0.01706902816389647, "grad_norm": 0.7816221714019775, "learning_rate": 8.534009294465569e-07, "loss": 0.0257, "step": 1010 }, { "epoch": 0.017238028442746932, "grad_norm": 0.7380543947219849, "learning_rate": 8.618504435994932e-07, "loss": 0.0272, "step": 1020 }, { "epoch": 0.01740702872159739, "grad_norm": 0.7406052350997925, "learning_rate": 8.702999577524293e-07, "loss": 0.0232, "step": 1030 }, { "epoch": 0.01757602900044785, "grad_norm": 0.8127363920211792, "learning_rate": 8.787494719053655e-07, "loss": 0.0238, "step": 1040 }, { "epoch": 0.01774502927929831, "grad_norm": 1.1809587478637695, "learning_rate": 8.871989860583018e-07, "loss": 0.0264, "step": 1050 }, { "epoch": 0.01791402955814877, "grad_norm": 1.326064944267273, "learning_rate": 8.95648500211238e-07, "loss": 0.0277, "step": 1060 }, { "epoch": 0.01808302983699923, "grad_norm": 1.0980157852172852, "learning_rate": 9.04098014364174e-07, "loss": 0.018, "step": 1070 }, { "epoch": 0.01825203011584969, "grad_norm": 0.8225419521331787, "learning_rate": 9.125475285171104e-07, "loss": 0.0267, "step": 1080 }, { "epoch": 0.018421030394700152, "grad_norm": 0.9772644639015198, "learning_rate": 9.209970426700465e-07, "loss": 0.0212, "step": 1090 }, { "epoch": 0.01859003067355061, "grad_norm": 1.07682466506958, "learning_rate": 9.294465568229828e-07, "loss": 0.0244, "step": 1100 }, { "epoch": 0.01875903095240107, "grad_norm": 0.7112758159637451, "learning_rate": 9.37896070975919e-07, "loss": 0.0181, "step": 1110 }, { "epoch": 0.018928031231251533, "grad_norm": 0.8537008166313171, "learning_rate": 9.463455851288551e-07, "loss": 0.0185, "step": 1120 }, { "epoch": 0.019097031510101992, "grad_norm": 1.3446390628814697, "learning_rate": 9.547950992817914e-07, "loss": 0.0194, "step": 1130 }, { "epoch": 0.01926603178895245, "grad_norm": 1.150349736213684, "learning_rate": 9.632446134347275e-07, "loss": 0.0269, "step": 1140 }, { "epoch": 0.01943503206780291, "grad_norm": 0.9560878276824951, "learning_rate": 9.716941275876638e-07, "loss": 0.0261, "step": 1150 }, { "epoch": 0.019604032346653373, "grad_norm": 0.605726957321167, "learning_rate": 9.801436417406e-07, "loss": 0.0264, "step": 1160 }, { "epoch": 0.019773032625503832, "grad_norm": 1.082674264907837, "learning_rate": 9.885931558935361e-07, "loss": 0.0207, "step": 1170 }, { "epoch": 0.01994203290435429, "grad_norm": 1.2841850519180298, "learning_rate": 9.970426700464724e-07, "loss": 0.0246, "step": 1180 }, { "epoch": 0.020111033183204754, "grad_norm": 1.2737842798233032, "learning_rate": 1.0054921841994087e-06, "loss": 0.0183, "step": 1190 }, { "epoch": 0.020280033462055213, "grad_norm": 1.370842456817627, "learning_rate": 1.0139416983523447e-06, "loss": 0.0327, "step": 1200 }, { "epoch": 0.020449033740905672, "grad_norm": 1.343038558959961, "learning_rate": 1.022391212505281e-06, "loss": 0.0236, "step": 1210 }, { "epoch": 0.02061803401975613, "grad_norm": 0.7596139311790466, "learning_rate": 1.0308407266582173e-06, "loss": 0.0199, "step": 1220 }, { "epoch": 0.020787034298606594, "grad_norm": 0.6171843409538269, "learning_rate": 1.0392902408111534e-06, "loss": 0.0181, "step": 1230 }, { "epoch": 0.020956034577457053, "grad_norm": 0.9393839836120605, "learning_rate": 1.0477397549640897e-06, "loss": 0.0174, "step": 1240 }, { "epoch": 0.021125034856307512, "grad_norm": 0.6550285816192627, "learning_rate": 1.056189269117026e-06, "loss": 0.0191, "step": 1250 }, { "epoch": 0.021294035135157974, "grad_norm": 2.181431770324707, "learning_rate": 1.064638783269962e-06, "loss": 0.027, "step": 1260 }, { "epoch": 0.021463035414008434, "grad_norm": 1.4009298086166382, "learning_rate": 1.0730882974228983e-06, "loss": 0.018, "step": 1270 }, { "epoch": 0.021632035692858893, "grad_norm": 0.7100903987884521, "learning_rate": 1.0815378115758344e-06, "loss": 0.0233, "step": 1280 }, { "epoch": 0.02180103597170935, "grad_norm": 0.7620948553085327, "learning_rate": 1.0899873257287706e-06, "loss": 0.0169, "step": 1290 }, { "epoch": 0.021970036250559814, "grad_norm": 1.241431713104248, "learning_rate": 1.098436839881707e-06, "loss": 0.0158, "step": 1300 }, { "epoch": 0.022139036529410273, "grad_norm": 1.2758827209472656, "learning_rate": 1.106886354034643e-06, "loss": 0.0232, "step": 1310 }, { "epoch": 0.022308036808260732, "grad_norm": 0.9061040878295898, "learning_rate": 1.1153358681875793e-06, "loss": 0.0147, "step": 1320 }, { "epoch": 0.022477037087111195, "grad_norm": 0.692929744720459, "learning_rate": 1.1237853823405156e-06, "loss": 0.0265, "step": 1330 }, { "epoch": 0.022646037365961654, "grad_norm": 1.0171177387237549, "learning_rate": 1.1322348964934516e-06, "loss": 0.0176, "step": 1340 }, { "epoch": 0.022815037644812113, "grad_norm": 0.5499007105827332, "learning_rate": 1.140684410646388e-06, "loss": 0.0231, "step": 1350 }, { "epoch": 0.022984037923662572, "grad_norm": 0.607111930847168, "learning_rate": 1.1491339247993242e-06, "loss": 0.0183, "step": 1360 }, { "epoch": 0.023153038202513035, "grad_norm": 0.6044015288352966, "learning_rate": 1.1575834389522603e-06, "loss": 0.017, "step": 1370 }, { "epoch": 0.023322038481363494, "grad_norm": 1.118083119392395, "learning_rate": 1.1660329531051966e-06, "loss": 0.0186, "step": 1380 }, { "epoch": 0.023491038760213953, "grad_norm": 0.9161491394042969, "learning_rate": 1.1744824672581326e-06, "loss": 0.0225, "step": 1390 }, { "epoch": 0.023660039039064416, "grad_norm": 0.6185082793235779, "learning_rate": 1.1829319814110691e-06, "loss": 0.0187, "step": 1400 }, { "epoch": 0.023829039317914875, "grad_norm": 0.9716012477874756, "learning_rate": 1.1913814955640052e-06, "loss": 0.0167, "step": 1410 }, { "epoch": 0.023998039596765334, "grad_norm": 1.1802388429641724, "learning_rate": 1.1998310097169413e-06, "loss": 0.0262, "step": 1420 }, { "epoch": 0.024167039875615796, "grad_norm": 0.9881535172462463, "learning_rate": 1.2082805238698775e-06, "loss": 0.0187, "step": 1430 }, { "epoch": 0.024336040154466256, "grad_norm": 1.5982509851455688, "learning_rate": 1.2167300380228138e-06, "loss": 0.0158, "step": 1440 }, { "epoch": 0.024505040433316715, "grad_norm": 0.49811652302742004, "learning_rate": 1.2251795521757499e-06, "loss": 0.0197, "step": 1450 }, { "epoch": 0.024674040712167174, "grad_norm": 0.9442542195320129, "learning_rate": 1.2336290663286862e-06, "loss": 0.0206, "step": 1460 }, { "epoch": 0.024843040991017636, "grad_norm": 0.9874531030654907, "learning_rate": 1.2420785804816225e-06, "loss": 0.0197, "step": 1470 }, { "epoch": 0.025012041269868095, "grad_norm": 1.2066454887390137, "learning_rate": 1.2505280946345585e-06, "loss": 0.0183, "step": 1480 }, { "epoch": 0.025181041548718554, "grad_norm": 0.8461329936981201, "learning_rate": 1.2589776087874946e-06, "loss": 0.0188, "step": 1490 }, { "epoch": 0.025350041827569017, "grad_norm": 0.6502810716629028, "learning_rate": 1.267427122940431e-06, "loss": 0.0151, "step": 1500 }, { "epoch": 0.025519042106419476, "grad_norm": 1.0374168157577515, "learning_rate": 1.2758766370933674e-06, "loss": 0.0206, "step": 1510 }, { "epoch": 0.025688042385269935, "grad_norm": 0.5221607685089111, "learning_rate": 1.2843261512463034e-06, "loss": 0.0127, "step": 1520 }, { "epoch": 0.025857042664120394, "grad_norm": 0.7759193181991577, "learning_rate": 1.2927756653992395e-06, "loss": 0.02, "step": 1530 }, { "epoch": 0.026026042942970857, "grad_norm": 1.2694779634475708, "learning_rate": 1.3012251795521758e-06, "loss": 0.0163, "step": 1540 }, { "epoch": 0.026195043221821316, "grad_norm": 1.1054325103759766, "learning_rate": 1.3096746937051123e-06, "loss": 0.0199, "step": 1550 }, { "epoch": 0.026364043500671775, "grad_norm": 0.9145587682723999, "learning_rate": 1.3181242078580484e-06, "loss": 0.0164, "step": 1560 }, { "epoch": 0.026533043779522238, "grad_norm": 0.9101834893226624, "learning_rate": 1.3265737220109844e-06, "loss": 0.0157, "step": 1570 }, { "epoch": 0.026702044058372697, "grad_norm": 0.7743887901306152, "learning_rate": 1.3350232361639207e-06, "loss": 0.0137, "step": 1580 }, { "epoch": 0.026871044337223156, "grad_norm": 0.9588603973388672, "learning_rate": 1.3434727503168568e-06, "loss": 0.0188, "step": 1590 }, { "epoch": 0.027040044616073615, "grad_norm": 0.9868338108062744, "learning_rate": 1.351922264469793e-06, "loss": 0.0172, "step": 1600 }, { "epoch": 0.027209044894924077, "grad_norm": 0.7865809798240662, "learning_rate": 1.3603717786227293e-06, "loss": 0.0156, "step": 1610 }, { "epoch": 0.027378045173774537, "grad_norm": 0.6851559281349182, "learning_rate": 1.3688212927756656e-06, "loss": 0.0165, "step": 1620 }, { "epoch": 0.027547045452624996, "grad_norm": 0.6153374314308167, "learning_rate": 1.3772708069286017e-06, "loss": 0.019, "step": 1630 }, { "epoch": 0.027716045731475458, "grad_norm": 0.7208739519119263, "learning_rate": 1.3857203210815378e-06, "loss": 0.0173, "step": 1640 }, { "epoch": 0.027885046010325917, "grad_norm": 0.5045366287231445, "learning_rate": 1.394169835234474e-06, "loss": 0.0122, "step": 1650 }, { "epoch": 0.028054046289176376, "grad_norm": 0.7605059742927551, "learning_rate": 1.4026193493874105e-06, "loss": 0.0172, "step": 1660 }, { "epoch": 0.028223046568026836, "grad_norm": 0.7432957887649536, "learning_rate": 1.4110688635403466e-06, "loss": 0.0144, "step": 1670 }, { "epoch": 0.028392046846877298, "grad_norm": 0.7140432000160217, "learning_rate": 1.4195183776932827e-06, "loss": 0.0174, "step": 1680 }, { "epoch": 0.028561047125727757, "grad_norm": 1.02549409866333, "learning_rate": 1.427967891846219e-06, "loss": 0.0132, "step": 1690 }, { "epoch": 0.028730047404578216, "grad_norm": 0.8485150337219238, "learning_rate": 1.436417405999155e-06, "loss": 0.0145, "step": 1700 }, { "epoch": 0.02889904768342868, "grad_norm": 0.8102651238441467, "learning_rate": 1.4448669201520913e-06, "loss": 0.0108, "step": 1710 }, { "epoch": 0.029068047962279138, "grad_norm": 0.9968916773796082, "learning_rate": 1.4533164343050276e-06, "loss": 0.0152, "step": 1720 }, { "epoch": 0.029237048241129597, "grad_norm": 0.5318018794059753, "learning_rate": 1.4617659484579639e-06, "loss": 0.0124, "step": 1730 }, { "epoch": 0.02940604851998006, "grad_norm": 0.863592267036438, "learning_rate": 1.4702154626109e-06, "loss": 0.0155, "step": 1740 }, { "epoch": 0.02957504879883052, "grad_norm": 1.0146982669830322, "learning_rate": 1.4786649767638362e-06, "loss": 0.0133, "step": 1750 }, { "epoch": 0.029744049077680978, "grad_norm": 1.1718655824661255, "learning_rate": 1.4871144909167723e-06, "loss": 0.0141, "step": 1760 }, { "epoch": 0.029913049356531437, "grad_norm": 0.6279026865959167, "learning_rate": 1.4955640050697088e-06, "loss": 0.0118, "step": 1770 }, { "epoch": 0.0300820496353819, "grad_norm": 0.4484230875968933, "learning_rate": 1.5040135192226449e-06, "loss": 0.0167, "step": 1780 }, { "epoch": 0.03025104991423236, "grad_norm": 1.6238749027252197, "learning_rate": 1.512463033375581e-06, "loss": 0.0163, "step": 1790 }, { "epoch": 0.030420050193082818, "grad_norm": 0.5909531116485596, "learning_rate": 1.5209125475285172e-06, "loss": 0.015, "step": 1800 }, { "epoch": 0.03058905047193328, "grad_norm": 0.6374512910842896, "learning_rate": 1.5293620616814533e-06, "loss": 0.0153, "step": 1810 }, { "epoch": 0.03075805075078374, "grad_norm": 0.5122248530387878, "learning_rate": 1.5378115758343898e-06, "loss": 0.013, "step": 1820 }, { "epoch": 0.0309270510296342, "grad_norm": 1.1308633089065552, "learning_rate": 1.5462610899873259e-06, "loss": 0.0157, "step": 1830 }, { "epoch": 0.031096051308484658, "grad_norm": 0.7191557884216309, "learning_rate": 1.5547106041402621e-06, "loss": 0.0136, "step": 1840 }, { "epoch": 0.03126505158733512, "grad_norm": 0.8519582152366638, "learning_rate": 1.5631601182931982e-06, "loss": 0.0127, "step": 1850 }, { "epoch": 0.03143405186618558, "grad_norm": 0.6365352869033813, "learning_rate": 1.5716096324461345e-06, "loss": 0.0171, "step": 1860 }, { "epoch": 0.03160305214503604, "grad_norm": 0.7419239282608032, "learning_rate": 1.5800591465990706e-06, "loss": 0.0158, "step": 1870 }, { "epoch": 0.0317720524238865, "grad_norm": 0.8644031882286072, "learning_rate": 1.588508660752007e-06, "loss": 0.0174, "step": 1880 }, { "epoch": 0.031941052702736956, "grad_norm": 0.6560561060905457, "learning_rate": 1.5969581749049431e-06, "loss": 0.0136, "step": 1890 }, { "epoch": 0.03211005298158742, "grad_norm": 1.0905290842056274, "learning_rate": 1.6054076890578792e-06, "loss": 0.0113, "step": 1900 }, { "epoch": 0.03227905326043788, "grad_norm": 0.9088236689567566, "learning_rate": 1.6138572032108155e-06, "loss": 0.0131, "step": 1910 }, { "epoch": 0.03244805353928834, "grad_norm": 0.6915132403373718, "learning_rate": 1.6223067173637516e-06, "loss": 0.0225, "step": 1920 }, { "epoch": 0.0326170538181388, "grad_norm": 1.2276079654693604, "learning_rate": 1.630756231516688e-06, "loss": 0.0176, "step": 1930 }, { "epoch": 0.03278605409698926, "grad_norm": 0.8565443754196167, "learning_rate": 1.6392057456696241e-06, "loss": 0.0167, "step": 1940 }, { "epoch": 0.03295505437583972, "grad_norm": 0.7216575145721436, "learning_rate": 1.6476552598225604e-06, "loss": 0.0131, "step": 1950 }, { "epoch": 0.03312405465469018, "grad_norm": 0.8094061613082886, "learning_rate": 1.6561047739754965e-06, "loss": 0.0108, "step": 1960 }, { "epoch": 0.03329305493354064, "grad_norm": 1.2675161361694336, "learning_rate": 1.6645542881284328e-06, "loss": 0.0168, "step": 1970 }, { "epoch": 0.0334620552123911, "grad_norm": 0.8833696842193604, "learning_rate": 1.6730038022813688e-06, "loss": 0.0119, "step": 1980 }, { "epoch": 0.03363105549124156, "grad_norm": 0.6309747099876404, "learning_rate": 1.6814533164343053e-06, "loss": 0.016, "step": 1990 }, { "epoch": 0.03380005577009202, "grad_norm": 1.0539937019348145, "learning_rate": 1.6899028305872414e-06, "loss": 0.0148, "step": 2000 }, { "epoch": 0.03396905604894248, "grad_norm": 0.6724973320960999, "learning_rate": 1.6983523447401777e-06, "loss": 0.0122, "step": 2010 }, { "epoch": 0.03413805632779294, "grad_norm": 1.0127696990966797, "learning_rate": 1.7068018588931137e-06, "loss": 0.0137, "step": 2020 }, { "epoch": 0.0343070566066434, "grad_norm": 0.5368540287017822, "learning_rate": 1.7152513730460498e-06, "loss": 0.0145, "step": 2030 }, { "epoch": 0.034476056885493864, "grad_norm": 0.4811045825481415, "learning_rate": 1.7237008871989863e-06, "loss": 0.0112, "step": 2040 }, { "epoch": 0.03464505716434432, "grad_norm": 1.032359004020691, "learning_rate": 1.7321504013519224e-06, "loss": 0.0133, "step": 2050 }, { "epoch": 0.03481405744319478, "grad_norm": 0.5104614496231079, "learning_rate": 1.7405999155048587e-06, "loss": 0.0108, "step": 2060 }, { "epoch": 0.03498305772204524, "grad_norm": 0.8072396516799927, "learning_rate": 1.7490494296577947e-06, "loss": 0.0136, "step": 2070 }, { "epoch": 0.0351520580008957, "grad_norm": 0.6972222924232483, "learning_rate": 1.757498943810731e-06, "loss": 0.0158, "step": 2080 }, { "epoch": 0.03532105827974616, "grad_norm": 0.7694892883300781, "learning_rate": 1.765948457963667e-06, "loss": 0.0148, "step": 2090 }, { "epoch": 0.03549005855859662, "grad_norm": 0.694287121295929, "learning_rate": 1.7743979721166036e-06, "loss": 0.0086, "step": 2100 }, { "epoch": 0.035659058837447084, "grad_norm": 0.8641753196716309, "learning_rate": 1.7828474862695396e-06, "loss": 0.0162, "step": 2110 }, { "epoch": 0.03582805911629754, "grad_norm": 0.373634934425354, "learning_rate": 1.791297000422476e-06, "loss": 0.0116, "step": 2120 }, { "epoch": 0.035997059395148, "grad_norm": 0.5773941278457642, "learning_rate": 1.799746514575412e-06, "loss": 0.0126, "step": 2130 }, { "epoch": 0.03616605967399846, "grad_norm": 0.9467854499816895, "learning_rate": 1.808196028728348e-06, "loss": 0.0192, "step": 2140 }, { "epoch": 0.03633505995284892, "grad_norm": 0.6334127187728882, "learning_rate": 1.8166455428812846e-06, "loss": 0.0201, "step": 2150 }, { "epoch": 0.03650406023169938, "grad_norm": 0.6911800503730774, "learning_rate": 1.8250950570342208e-06, "loss": 0.0149, "step": 2160 }, { "epoch": 0.036673060510549846, "grad_norm": 0.7418361306190491, "learning_rate": 1.833544571187157e-06, "loss": 0.0102, "step": 2170 }, { "epoch": 0.036842060789400305, "grad_norm": 0.4494631588459015, "learning_rate": 1.841994085340093e-06, "loss": 0.0115, "step": 2180 }, { "epoch": 0.037011061068250764, "grad_norm": 0.5005876421928406, "learning_rate": 1.8504435994930293e-06, "loss": 0.0108, "step": 2190 }, { "epoch": 0.03718006134710122, "grad_norm": 0.6692945957183838, "learning_rate": 1.8588931136459655e-06, "loss": 0.0142, "step": 2200 }, { "epoch": 0.03734906162595168, "grad_norm": 0.7445424199104309, "learning_rate": 1.8673426277989018e-06, "loss": 0.0142, "step": 2210 }, { "epoch": 0.03751806190480214, "grad_norm": 0.3330194354057312, "learning_rate": 1.875792141951838e-06, "loss": 0.0118, "step": 2220 }, { "epoch": 0.0376870621836526, "grad_norm": 0.3205147385597229, "learning_rate": 1.8842416561047742e-06, "loss": 0.0137, "step": 2230 }, { "epoch": 0.037856062462503066, "grad_norm": 0.7221159934997559, "learning_rate": 1.8926911702577102e-06, "loss": 0.0136, "step": 2240 }, { "epoch": 0.038025062741353526, "grad_norm": 1.715714931488037, "learning_rate": 1.9011406844106463e-06, "loss": 0.0098, "step": 2250 }, { "epoch": 0.038194063020203985, "grad_norm": 0.8114831447601318, "learning_rate": 1.909590198563583e-06, "loss": 0.0109, "step": 2260 }, { "epoch": 0.038363063299054444, "grad_norm": 0.8724892735481262, "learning_rate": 1.918039712716519e-06, "loss": 0.0149, "step": 2270 }, { "epoch": 0.0385320635779049, "grad_norm": 0.6100572943687439, "learning_rate": 1.926489226869455e-06, "loss": 0.0122, "step": 2280 }, { "epoch": 0.03870106385675536, "grad_norm": 1.0169868469238281, "learning_rate": 1.9349387410223914e-06, "loss": 0.013, "step": 2290 }, { "epoch": 0.03887006413560582, "grad_norm": 0.4001051187515259, "learning_rate": 1.9433882551753275e-06, "loss": 0.0079, "step": 2300 }, { "epoch": 0.03903906441445629, "grad_norm": 0.804621160030365, "learning_rate": 1.951837769328264e-06, "loss": 0.0144, "step": 2310 }, { "epoch": 0.039208064693306746, "grad_norm": 0.4144463539123535, "learning_rate": 1.9602872834812e-06, "loss": 0.0107, "step": 2320 }, { "epoch": 0.039377064972157205, "grad_norm": 0.7317914962768555, "learning_rate": 1.968736797634136e-06, "loss": 0.0136, "step": 2330 }, { "epoch": 0.039546065251007664, "grad_norm": 0.6143588423728943, "learning_rate": 1.9771863117870722e-06, "loss": 0.013, "step": 2340 }, { "epoch": 0.03971506552985812, "grad_norm": 0.7639254331588745, "learning_rate": 1.9856358259400087e-06, "loss": 0.0136, "step": 2350 }, { "epoch": 0.03988406580870858, "grad_norm": 0.6767488121986389, "learning_rate": 1.9940853400929448e-06, "loss": 0.0155, "step": 2360 }, { "epoch": 0.04005306608755904, "grad_norm": 0.702724039554596, "learning_rate": 2.0025348542458813e-06, "loss": 0.0108, "step": 2370 }, { "epoch": 0.04022206636640951, "grad_norm": 0.7386316061019897, "learning_rate": 2.0109843683988174e-06, "loss": 0.0108, "step": 2380 }, { "epoch": 0.04039106664525997, "grad_norm": 0.966352105140686, "learning_rate": 2.0194338825517534e-06, "loss": 0.0143, "step": 2390 }, { "epoch": 0.040560066924110426, "grad_norm": 1.3236074447631836, "learning_rate": 2.0278833967046895e-06, "loss": 0.0156, "step": 2400 }, { "epoch": 0.040729067202960885, "grad_norm": 1.0675822496414185, "learning_rate": 2.0363329108576256e-06, "loss": 0.0138, "step": 2410 }, { "epoch": 0.040898067481811344, "grad_norm": 0.8293465375900269, "learning_rate": 2.044782425010562e-06, "loss": 0.0136, "step": 2420 }, { "epoch": 0.0410670677606618, "grad_norm": 0.675937294960022, "learning_rate": 2.053231939163498e-06, "loss": 0.0104, "step": 2430 }, { "epoch": 0.04123606803951226, "grad_norm": 0.8487566709518433, "learning_rate": 2.0616814533164346e-06, "loss": 0.0138, "step": 2440 }, { "epoch": 0.04140506831836273, "grad_norm": 0.3574985861778259, "learning_rate": 2.0701309674693707e-06, "loss": 0.012, "step": 2450 }, { "epoch": 0.04157406859721319, "grad_norm": 0.6251744031906128, "learning_rate": 2.0785804816223068e-06, "loss": 0.0148, "step": 2460 }, { "epoch": 0.041743068876063646, "grad_norm": 0.3150479197502136, "learning_rate": 2.087029995775243e-06, "loss": 0.0107, "step": 2470 }, { "epoch": 0.041912069154914106, "grad_norm": 0.6732006669044495, "learning_rate": 2.0954795099281793e-06, "loss": 0.0107, "step": 2480 }, { "epoch": 0.042081069433764565, "grad_norm": 0.5718549489974976, "learning_rate": 2.1039290240811154e-06, "loss": 0.0117, "step": 2490 }, { "epoch": 0.042250069712615024, "grad_norm": 0.6807505488395691, "learning_rate": 2.112378538234052e-06, "loss": 0.0101, "step": 2500 }, { "epoch": 0.04241906999146548, "grad_norm": 0.7062185406684875, "learning_rate": 2.120828052386988e-06, "loss": 0.0117, "step": 2510 }, { "epoch": 0.04258807027031595, "grad_norm": 0.4019978642463684, "learning_rate": 2.129277566539924e-06, "loss": 0.0128, "step": 2520 }, { "epoch": 0.04275707054916641, "grad_norm": 0.26033371686935425, "learning_rate": 2.1377270806928605e-06, "loss": 0.0124, "step": 2530 }, { "epoch": 0.04292607082801687, "grad_norm": 0.7266717553138733, "learning_rate": 2.1461765948457966e-06, "loss": 0.0151, "step": 2540 }, { "epoch": 0.043095071106867326, "grad_norm": 0.6882942914962769, "learning_rate": 2.1546261089987327e-06, "loss": 0.0124, "step": 2550 }, { "epoch": 0.043264071385717785, "grad_norm": 0.494502454996109, "learning_rate": 2.1630756231516687e-06, "loss": 0.0129, "step": 2560 }, { "epoch": 0.043433071664568244, "grad_norm": 0.35432446002960205, "learning_rate": 2.1715251373046052e-06, "loss": 0.0119, "step": 2570 }, { "epoch": 0.0436020719434187, "grad_norm": 0.4541483521461487, "learning_rate": 2.1799746514575413e-06, "loss": 0.0113, "step": 2580 }, { "epoch": 0.04377107222226917, "grad_norm": 0.5097981095314026, "learning_rate": 2.188424165610478e-06, "loss": 0.0104, "step": 2590 }, { "epoch": 0.04394007250111963, "grad_norm": 0.7258899807929993, "learning_rate": 2.196873679763414e-06, "loss": 0.0141, "step": 2600 }, { "epoch": 0.04410907277997009, "grad_norm": 0.5327528119087219, "learning_rate": 2.20532319391635e-06, "loss": 0.0113, "step": 2610 }, { "epoch": 0.04427807305882055, "grad_norm": 0.5078420639038086, "learning_rate": 2.213772708069286e-06, "loss": 0.0091, "step": 2620 }, { "epoch": 0.044447073337671006, "grad_norm": 0.7442695498466492, "learning_rate": 2.222222222222222e-06, "loss": 0.0084, "step": 2630 }, { "epoch": 0.044616073616521465, "grad_norm": 0.7157345414161682, "learning_rate": 2.2306717363751586e-06, "loss": 0.01, "step": 2640 }, { "epoch": 0.044785073895371924, "grad_norm": 0.7449740767478943, "learning_rate": 2.239121250528095e-06, "loss": 0.0136, "step": 2650 }, { "epoch": 0.04495407417422239, "grad_norm": 3.954612970352173, "learning_rate": 2.247570764681031e-06, "loss": 0.0105, "step": 2660 }, { "epoch": 0.04512307445307285, "grad_norm": 0.38182348012924194, "learning_rate": 2.256020278833967e-06, "loss": 0.0106, "step": 2670 }, { "epoch": 0.04529207473192331, "grad_norm": 0.6025572419166565, "learning_rate": 2.2644697929869033e-06, "loss": 0.0087, "step": 2680 }, { "epoch": 0.04546107501077377, "grad_norm": 0.47721076011657715, "learning_rate": 2.2729193071398398e-06, "loss": 0.0114, "step": 2690 }, { "epoch": 0.045630075289624226, "grad_norm": 1.120234727859497, "learning_rate": 2.281368821292776e-06, "loss": 0.0125, "step": 2700 }, { "epoch": 0.045799075568474686, "grad_norm": 0.42115306854248047, "learning_rate": 2.289818335445712e-06, "loss": 0.0099, "step": 2710 }, { "epoch": 0.045968075847325145, "grad_norm": 0.8091180324554443, "learning_rate": 2.2982678495986484e-06, "loss": 0.0104, "step": 2720 }, { "epoch": 0.04613707612617561, "grad_norm": 0.671550989151001, "learning_rate": 2.3067173637515845e-06, "loss": 0.0132, "step": 2730 }, { "epoch": 0.04630607640502607, "grad_norm": 0.43608835339546204, "learning_rate": 2.3151668779045205e-06, "loss": 0.0104, "step": 2740 }, { "epoch": 0.04647507668387653, "grad_norm": 0.9488043189048767, "learning_rate": 2.323616392057457e-06, "loss": 0.0086, "step": 2750 }, { "epoch": 0.04664407696272699, "grad_norm": 0.6109009385108948, "learning_rate": 2.332065906210393e-06, "loss": 0.0111, "step": 2760 }, { "epoch": 0.04681307724157745, "grad_norm": 0.6729733347892761, "learning_rate": 2.340515420363329e-06, "loss": 0.0123, "step": 2770 }, { "epoch": 0.046982077520427906, "grad_norm": 0.4502004086971283, "learning_rate": 2.3489649345162652e-06, "loss": 0.0091, "step": 2780 }, { "epoch": 0.04715107779927837, "grad_norm": 0.679175853729248, "learning_rate": 2.3574144486692017e-06, "loss": 0.0078, "step": 2790 }, { "epoch": 0.04732007807812883, "grad_norm": 0.7278356552124023, "learning_rate": 2.3658639628221382e-06, "loss": 0.0131, "step": 2800 }, { "epoch": 0.04748907835697929, "grad_norm": 0.36557623744010925, "learning_rate": 2.3743134769750743e-06, "loss": 0.0094, "step": 2810 }, { "epoch": 0.04765807863582975, "grad_norm": 0.6772125959396362, "learning_rate": 2.3827629911280104e-06, "loss": 0.0116, "step": 2820 }, { "epoch": 0.04782707891468021, "grad_norm": 0.6479217410087585, "learning_rate": 2.3912125052809464e-06, "loss": 0.0125, "step": 2830 }, { "epoch": 0.04799607919353067, "grad_norm": 0.28205257654190063, "learning_rate": 2.3996620194338825e-06, "loss": 0.0076, "step": 2840 }, { "epoch": 0.04816507947238113, "grad_norm": 0.5454665422439575, "learning_rate": 2.408111533586819e-06, "loss": 0.0136, "step": 2850 }, { "epoch": 0.04833407975123159, "grad_norm": 0.6780948638916016, "learning_rate": 2.416561047739755e-06, "loss": 0.0123, "step": 2860 }, { "epoch": 0.04850308003008205, "grad_norm": 0.6540535688400269, "learning_rate": 2.4250105618926916e-06, "loss": 0.0108, "step": 2870 }, { "epoch": 0.04867208030893251, "grad_norm": 0.6456772089004517, "learning_rate": 2.4334600760456276e-06, "loss": 0.0088, "step": 2880 }, { "epoch": 0.04884108058778297, "grad_norm": 0.6767243146896362, "learning_rate": 2.4419095901985637e-06, "loss": 0.0102, "step": 2890 }, { "epoch": 0.04901008086663343, "grad_norm": 0.8583904504776001, "learning_rate": 2.4503591043514998e-06, "loss": 0.0091, "step": 2900 }, { "epoch": 0.04917908114548389, "grad_norm": 0.24955904483795166, "learning_rate": 2.4588086185044363e-06, "loss": 0.0103, "step": 2910 }, { "epoch": 0.04934808142433435, "grad_norm": 0.4824363887310028, "learning_rate": 2.4672581326573723e-06, "loss": 0.0095, "step": 2920 }, { "epoch": 0.04951708170318481, "grad_norm": 0.6091395020484924, "learning_rate": 2.4757076468103084e-06, "loss": 0.0122, "step": 2930 }, { "epoch": 0.04968608198203527, "grad_norm": 0.5298306941986084, "learning_rate": 2.484157160963245e-06, "loss": 0.0104, "step": 2940 }, { "epoch": 0.04985508226088573, "grad_norm": 0.38689276576042175, "learning_rate": 2.492606675116181e-06, "loss": 0.0074, "step": 2950 }, { "epoch": 0.05002408253973619, "grad_norm": 0.6825554966926575, "learning_rate": 2.501056189269117e-06, "loss": 0.0114, "step": 2960 }, { "epoch": 0.05019308281858665, "grad_norm": 0.44893091917037964, "learning_rate": 2.509505703422053e-06, "loss": 0.0088, "step": 2970 }, { "epoch": 0.05036208309743711, "grad_norm": 0.4857201874256134, "learning_rate": 2.517955217574989e-06, "loss": 0.0091, "step": 2980 }, { "epoch": 0.05053108337628757, "grad_norm": 0.5426431894302368, "learning_rate": 2.526404731727926e-06, "loss": 0.0101, "step": 2990 }, { "epoch": 0.050700083655138034, "grad_norm": 0.7086471915245056, "learning_rate": 2.534854245880862e-06, "loss": 0.0091, "step": 3000 }, { "epoch": 0.05086908393398849, "grad_norm": 0.5153199434280396, "learning_rate": 2.5433037600337983e-06, "loss": 0.0126, "step": 3010 }, { "epoch": 0.05103808421283895, "grad_norm": 0.5115523338317871, "learning_rate": 2.5517532741867347e-06, "loss": 0.0119, "step": 3020 }, { "epoch": 0.05120708449168941, "grad_norm": 0.4097737669944763, "learning_rate": 2.560202788339671e-06, "loss": 0.0161, "step": 3030 }, { "epoch": 0.05137608477053987, "grad_norm": 0.4461556077003479, "learning_rate": 2.568652302492607e-06, "loss": 0.011, "step": 3040 }, { "epoch": 0.05154508504939033, "grad_norm": 0.6485928297042847, "learning_rate": 2.577101816645543e-06, "loss": 0.0099, "step": 3050 }, { "epoch": 0.05171408532824079, "grad_norm": 0.49462923407554626, "learning_rate": 2.585551330798479e-06, "loss": 0.0092, "step": 3060 }, { "epoch": 0.051883085607091255, "grad_norm": 0.496439129114151, "learning_rate": 2.5940008449514155e-06, "loss": 0.0102, "step": 3070 }, { "epoch": 0.052052085885941714, "grad_norm": 0.724690854549408, "learning_rate": 2.6024503591043516e-06, "loss": 0.0124, "step": 3080 }, { "epoch": 0.05222108616479217, "grad_norm": 0.4814421832561493, "learning_rate": 2.6108998732572877e-06, "loss": 0.0095, "step": 3090 }, { "epoch": 0.05239008644364263, "grad_norm": 0.4340079724788666, "learning_rate": 2.6193493874102246e-06, "loss": 0.0094, "step": 3100 }, { "epoch": 0.05255908672249309, "grad_norm": 0.33029723167419434, "learning_rate": 2.6277989015631607e-06, "loss": 0.0074, "step": 3110 }, { "epoch": 0.05272808700134355, "grad_norm": 0.4209522306919098, "learning_rate": 2.6362484157160967e-06, "loss": 0.0095, "step": 3120 }, { "epoch": 0.05289708728019401, "grad_norm": 0.7987039089202881, "learning_rate": 2.644697929869033e-06, "loss": 0.0121, "step": 3130 }, { "epoch": 0.053066087559044475, "grad_norm": 0.371640145778656, "learning_rate": 2.653147444021969e-06, "loss": 0.0145, "step": 3140 }, { "epoch": 0.053235087837894934, "grad_norm": 0.6809061169624329, "learning_rate": 2.6615969581749054e-06, "loss": 0.0104, "step": 3150 }, { "epoch": 0.05340408811674539, "grad_norm": 0.4903334379196167, "learning_rate": 2.6700464723278414e-06, "loss": 0.0108, "step": 3160 }, { "epoch": 0.05357308839559585, "grad_norm": 0.232858344912529, "learning_rate": 2.6784959864807775e-06, "loss": 0.008, "step": 3170 }, { "epoch": 0.05374208867444631, "grad_norm": 0.7675929069519043, "learning_rate": 2.6869455006337136e-06, "loss": 0.0086, "step": 3180 }, { "epoch": 0.05391108895329677, "grad_norm": 0.2953050136566162, "learning_rate": 2.6953950147866496e-06, "loss": 0.0084, "step": 3190 }, { "epoch": 0.05408008923214723, "grad_norm": 0.5588734149932861, "learning_rate": 2.703844528939586e-06, "loss": 0.0079, "step": 3200 }, { "epoch": 0.054249089510997696, "grad_norm": 0.428972989320755, "learning_rate": 2.7122940430925226e-06, "loss": 0.0118, "step": 3210 }, { "epoch": 0.054418089789848155, "grad_norm": 0.6115742921829224, "learning_rate": 2.7207435572454587e-06, "loss": 0.0221, "step": 3220 }, { "epoch": 0.054587090068698614, "grad_norm": 0.3895367980003357, "learning_rate": 2.7291930713983948e-06, "loss": 0.0099, "step": 3230 }, { "epoch": 0.05475609034754907, "grad_norm": 0.3974573314189911, "learning_rate": 2.7376425855513313e-06, "loss": 0.0106, "step": 3240 }, { "epoch": 0.05492509062639953, "grad_norm": 2.016308069229126, "learning_rate": 2.7460920997042673e-06, "loss": 0.011, "step": 3250 }, { "epoch": 0.05509409090524999, "grad_norm": 0.5943537950515747, "learning_rate": 2.7545416138572034e-06, "loss": 0.0085, "step": 3260 }, { "epoch": 0.05526309118410045, "grad_norm": 0.3754553496837616, "learning_rate": 2.7629911280101395e-06, "loss": 0.0078, "step": 3270 }, { "epoch": 0.055432091462950916, "grad_norm": 0.42528584599494934, "learning_rate": 2.7714406421630755e-06, "loss": 0.0083, "step": 3280 }, { "epoch": 0.055601091741801376, "grad_norm": 0.4286918342113495, "learning_rate": 2.779890156316012e-06, "loss": 0.0094, "step": 3290 }, { "epoch": 0.055770092020651835, "grad_norm": 0.2522464692592621, "learning_rate": 2.788339670468948e-06, "loss": 0.007, "step": 3300 }, { "epoch": 0.055939092299502294, "grad_norm": 0.4107299745082855, "learning_rate": 2.796789184621884e-06, "loss": 0.0072, "step": 3310 }, { "epoch": 0.05610809257835275, "grad_norm": 0.5001223683357239, "learning_rate": 2.805238698774821e-06, "loss": 0.0098, "step": 3320 }, { "epoch": 0.05627709285720321, "grad_norm": 0.3938818573951721, "learning_rate": 2.813688212927757e-06, "loss": 0.0114, "step": 3330 }, { "epoch": 0.05644609313605367, "grad_norm": 0.36034202575683594, "learning_rate": 2.8221377270806932e-06, "loss": 0.0122, "step": 3340 }, { "epoch": 0.05661509341490414, "grad_norm": 0.5340605974197388, "learning_rate": 2.8305872412336293e-06, "loss": 0.0089, "step": 3350 }, { "epoch": 0.056784093693754596, "grad_norm": 0.633908748626709, "learning_rate": 2.8390367553865654e-06, "loss": 0.0095, "step": 3360 }, { "epoch": 0.056953093972605055, "grad_norm": 0.9071553349494934, "learning_rate": 2.847486269539502e-06, "loss": 0.0119, "step": 3370 }, { "epoch": 0.057122094251455514, "grad_norm": 0.5576254725456238, "learning_rate": 2.855935783692438e-06, "loss": 0.0112, "step": 3380 }, { "epoch": 0.057291094530305973, "grad_norm": 0.2599470019340515, "learning_rate": 2.864385297845374e-06, "loss": 0.0101, "step": 3390 }, { "epoch": 0.05746009480915643, "grad_norm": 0.19270236790180206, "learning_rate": 2.87283481199831e-06, "loss": 0.0093, "step": 3400 }, { "epoch": 0.0576290950880069, "grad_norm": 0.36447200179100037, "learning_rate": 2.881284326151246e-06, "loss": 0.0098, "step": 3410 }, { "epoch": 0.05779809536685736, "grad_norm": 0.14679476618766785, "learning_rate": 2.8897338403041826e-06, "loss": 0.0102, "step": 3420 }, { "epoch": 0.05796709564570782, "grad_norm": 0.5648771524429321, "learning_rate": 2.898183354457119e-06, "loss": 0.0113, "step": 3430 }, { "epoch": 0.058136095924558276, "grad_norm": 0.6138327121734619, "learning_rate": 2.906632868610055e-06, "loss": 0.0097, "step": 3440 }, { "epoch": 0.058305096203408735, "grad_norm": 0.33649885654449463, "learning_rate": 2.9150823827629917e-06, "loss": 0.0079, "step": 3450 }, { "epoch": 0.058474096482259194, "grad_norm": 0.5774721503257751, "learning_rate": 2.9235318969159278e-06, "loss": 0.0087, "step": 3460 }, { "epoch": 0.05864309676110965, "grad_norm": 0.2652968168258667, "learning_rate": 2.931981411068864e-06, "loss": 0.0097, "step": 3470 }, { "epoch": 0.05881209703996012, "grad_norm": 0.546208381652832, "learning_rate": 2.9404309252218e-06, "loss": 0.0078, "step": 3480 }, { "epoch": 0.05898109731881058, "grad_norm": 0.4050743281841278, "learning_rate": 2.948880439374736e-06, "loss": 0.0092, "step": 3490 }, { "epoch": 0.05915009759766104, "grad_norm": 0.5059739947319031, "learning_rate": 2.9573299535276725e-06, "loss": 0.0061, "step": 3500 }, { "epoch": 0.059319097876511497, "grad_norm": 0.29913344979286194, "learning_rate": 2.9657794676806085e-06, "loss": 0.0112, "step": 3510 }, { "epoch": 0.059488098155361956, "grad_norm": 0.4305241107940674, "learning_rate": 2.9742289818335446e-06, "loss": 0.0103, "step": 3520 }, { "epoch": 0.059657098434212415, "grad_norm": 0.5640769004821777, "learning_rate": 2.982678495986481e-06, "loss": 0.0092, "step": 3530 }, { "epoch": 0.059826098713062874, "grad_norm": 0.39654386043548584, "learning_rate": 2.9911280101394176e-06, "loss": 0.0084, "step": 3540 }, { "epoch": 0.05999509899191334, "grad_norm": 0.2653600871562958, "learning_rate": 2.9995775242923537e-06, "loss": 0.0072, "step": 3550 }, { "epoch": 0.0601640992707638, "grad_norm": 0.6193217635154724, "learning_rate": 3.0080270384452897e-06, "loss": 0.0091, "step": 3560 }, { "epoch": 0.06033309954961426, "grad_norm": 0.48455023765563965, "learning_rate": 3.016476552598226e-06, "loss": 0.0078, "step": 3570 }, { "epoch": 0.06050209982846472, "grad_norm": 0.8907110691070557, "learning_rate": 3.024926066751162e-06, "loss": 0.0085, "step": 3580 }, { "epoch": 0.060671100107315176, "grad_norm": 0.5781303644180298, "learning_rate": 3.0333755809040984e-06, "loss": 0.0085, "step": 3590 }, { "epoch": 0.060840100386165635, "grad_norm": 0.2834400236606598, "learning_rate": 3.0418250950570345e-06, "loss": 0.0099, "step": 3600 }, { "epoch": 0.061009100665016094, "grad_norm": 0.5349828600883484, "learning_rate": 3.0502746092099705e-06, "loss": 0.0102, "step": 3610 }, { "epoch": 0.06117810094386656, "grad_norm": 1.0900537967681885, "learning_rate": 3.0587241233629066e-06, "loss": 0.0096, "step": 3620 }, { "epoch": 0.06134710122271702, "grad_norm": 0.8852835893630981, "learning_rate": 3.0671736375158427e-06, "loss": 0.0078, "step": 3630 }, { "epoch": 0.06151610150156748, "grad_norm": 0.6187847852706909, "learning_rate": 3.0756231516687796e-06, "loss": 0.0108, "step": 3640 }, { "epoch": 0.06168510178041794, "grad_norm": 0.730607807636261, "learning_rate": 3.0840726658217156e-06, "loss": 0.0098, "step": 3650 }, { "epoch": 0.0618541020592684, "grad_norm": 0.47413742542266846, "learning_rate": 3.0925221799746517e-06, "loss": 0.0116, "step": 3660 }, { "epoch": 0.062023102338118856, "grad_norm": 0.4965061843395233, "learning_rate": 3.1009716941275882e-06, "loss": 0.0079, "step": 3670 }, { "epoch": 0.062192102616969315, "grad_norm": 0.3510805666446686, "learning_rate": 3.1094212082805243e-06, "loss": 0.0104, "step": 3680 }, { "epoch": 0.06236110289581978, "grad_norm": 0.3473954498767853, "learning_rate": 3.1178707224334604e-06, "loss": 0.01, "step": 3690 }, { "epoch": 0.06253010317467024, "grad_norm": 0.5971751809120178, "learning_rate": 3.1263202365863964e-06, "loss": 0.0086, "step": 3700 }, { "epoch": 0.0626991034535207, "grad_norm": 0.6997776031494141, "learning_rate": 3.1347697507393325e-06, "loss": 0.0136, "step": 3710 }, { "epoch": 0.06286810373237116, "grad_norm": 0.5057122111320496, "learning_rate": 3.143219264892269e-06, "loss": 0.0094, "step": 3720 }, { "epoch": 0.06303710401122162, "grad_norm": 0.5285948514938354, "learning_rate": 3.151668779045205e-06, "loss": 0.0095, "step": 3730 }, { "epoch": 0.06320610429007208, "grad_norm": 0.7009038329124451, "learning_rate": 3.160118293198141e-06, "loss": 0.0121, "step": 3740 }, { "epoch": 0.06337510456892254, "grad_norm": 0.24084387719631195, "learning_rate": 3.1685678073510776e-06, "loss": 0.0061, "step": 3750 }, { "epoch": 0.063544104847773, "grad_norm": 0.40020808577537537, "learning_rate": 3.177017321504014e-06, "loss": 0.011, "step": 3760 }, { "epoch": 0.06371310512662345, "grad_norm": 0.4643620550632477, "learning_rate": 3.18546683565695e-06, "loss": 0.008, "step": 3770 }, { "epoch": 0.06388210540547391, "grad_norm": 0.19212964177131653, "learning_rate": 3.1939163498098863e-06, "loss": 0.0071, "step": 3780 }, { "epoch": 0.06405110568432439, "grad_norm": 0.4514760375022888, "learning_rate": 3.2023658639628223e-06, "loss": 0.0097, "step": 3790 }, { "epoch": 0.06422010596317484, "grad_norm": 0.4289383590221405, "learning_rate": 3.2108153781157584e-06, "loss": 0.006, "step": 3800 }, { "epoch": 0.0643891062420253, "grad_norm": 0.4901948869228363, "learning_rate": 3.219264892268695e-06, "loss": 0.0091, "step": 3810 }, { "epoch": 0.06455810652087576, "grad_norm": 0.36952874064445496, "learning_rate": 3.227714406421631e-06, "loss": 0.01, "step": 3820 }, { "epoch": 0.06472710679972622, "grad_norm": 0.3414541184902191, "learning_rate": 3.236163920574567e-06, "loss": 0.0112, "step": 3830 }, { "epoch": 0.06489610707857668, "grad_norm": 0.5098986625671387, "learning_rate": 3.244613434727503e-06, "loss": 0.0092, "step": 3840 }, { "epoch": 0.06506510735742714, "grad_norm": 0.33219394087791443, "learning_rate": 3.2530629488804396e-06, "loss": 0.0083, "step": 3850 }, { "epoch": 0.0652341076362776, "grad_norm": 0.27958717942237854, "learning_rate": 3.261512463033376e-06, "loss": 0.0055, "step": 3860 }, { "epoch": 0.06540310791512806, "grad_norm": 0.3715076446533203, "learning_rate": 3.269961977186312e-06, "loss": 0.0077, "step": 3870 }, { "epoch": 0.06557210819397852, "grad_norm": 0.4441938102245331, "learning_rate": 3.2784114913392482e-06, "loss": 0.0086, "step": 3880 }, { "epoch": 0.06574110847282898, "grad_norm": 0.4816913902759552, "learning_rate": 3.2868610054921847e-06, "loss": 0.0091, "step": 3890 }, { "epoch": 0.06591010875167944, "grad_norm": 0.40409502387046814, "learning_rate": 3.295310519645121e-06, "loss": 0.0088, "step": 3900 }, { "epoch": 0.0660791090305299, "grad_norm": 0.5365285873413086, "learning_rate": 3.303760033798057e-06, "loss": 0.0113, "step": 3910 }, { "epoch": 0.06624810930938035, "grad_norm": 0.6550111770629883, "learning_rate": 3.312209547950993e-06, "loss": 0.0079, "step": 3920 }, { "epoch": 0.06641710958823083, "grad_norm": 0.6132133603096008, "learning_rate": 3.320659062103929e-06, "loss": 0.0089, "step": 3930 }, { "epoch": 0.06658610986708129, "grad_norm": 0.7494462132453918, "learning_rate": 3.3291085762568655e-06, "loss": 0.0092, "step": 3940 }, { "epoch": 0.06675511014593175, "grad_norm": 0.5431383848190308, "learning_rate": 3.3375580904098016e-06, "loss": 0.0113, "step": 3950 }, { "epoch": 0.0669241104247822, "grad_norm": 0.2721993327140808, "learning_rate": 3.3460076045627376e-06, "loss": 0.0107, "step": 3960 }, { "epoch": 0.06709311070363266, "grad_norm": 0.1583024263381958, "learning_rate": 3.3544571187156746e-06, "loss": 0.0072, "step": 3970 }, { "epoch": 0.06726211098248312, "grad_norm": 0.5978319644927979, "learning_rate": 3.3629066328686106e-06, "loss": 0.0068, "step": 3980 }, { "epoch": 0.06743111126133358, "grad_norm": 0.5664768218994141, "learning_rate": 3.3713561470215467e-06, "loss": 0.0128, "step": 3990 }, { "epoch": 0.06760011154018404, "grad_norm": 0.4068852663040161, "learning_rate": 3.3798056611744828e-06, "loss": 0.006, "step": 4000 }, { "epoch": 0.0677691118190345, "grad_norm": 0.19173608720302582, "learning_rate": 3.388255175327419e-06, "loss": 0.0053, "step": 4010 }, { "epoch": 0.06793811209788496, "grad_norm": 0.5509228706359863, "learning_rate": 3.3967046894803553e-06, "loss": 0.0109, "step": 4020 }, { "epoch": 0.06810711237673542, "grad_norm": 0.23547115921974182, "learning_rate": 3.4051542036332914e-06, "loss": 0.007, "step": 4030 }, { "epoch": 0.06827611265558588, "grad_norm": 0.46924418210983276, "learning_rate": 3.4136037177862275e-06, "loss": 0.0072, "step": 4040 }, { "epoch": 0.06844511293443634, "grad_norm": 0.2977207601070404, "learning_rate": 3.4220532319391635e-06, "loss": 0.0054, "step": 4050 }, { "epoch": 0.0686141132132868, "grad_norm": 0.3505478799343109, "learning_rate": 3.4305027460920996e-06, "loss": 0.0067, "step": 4060 }, { "epoch": 0.06878311349213727, "grad_norm": 0.3547617495059967, "learning_rate": 3.438952260245036e-06, "loss": 0.0095, "step": 4070 }, { "epoch": 0.06895211377098773, "grad_norm": 0.49734488129615784, "learning_rate": 3.4474017743979726e-06, "loss": 0.0075, "step": 4080 }, { "epoch": 0.06912111404983819, "grad_norm": 0.5076583027839661, "learning_rate": 3.4558512885509087e-06, "loss": 0.0093, "step": 4090 }, { "epoch": 0.06929011432868865, "grad_norm": 0.4036179482936859, "learning_rate": 3.4643008027038447e-06, "loss": 0.0092, "step": 4100 }, { "epoch": 0.0694591146075391, "grad_norm": 0.8179068565368652, "learning_rate": 3.4727503168567812e-06, "loss": 0.0079, "step": 4110 }, { "epoch": 0.06962811488638956, "grad_norm": 0.6014817953109741, "learning_rate": 3.4811998310097173e-06, "loss": 0.008, "step": 4120 }, { "epoch": 0.06979711516524002, "grad_norm": 0.6647421717643738, "learning_rate": 3.4896493451626534e-06, "loss": 0.0086, "step": 4130 }, { "epoch": 0.06996611544409048, "grad_norm": 0.4266943335533142, "learning_rate": 3.4980988593155894e-06, "loss": 0.0077, "step": 4140 }, { "epoch": 0.07013511572294094, "grad_norm": 0.7118129134178162, "learning_rate": 3.5065483734685255e-06, "loss": 0.0075, "step": 4150 }, { "epoch": 0.0703041160017914, "grad_norm": 0.4971505105495453, "learning_rate": 3.514997887621462e-06, "loss": 0.0064, "step": 4160 }, { "epoch": 0.07047311628064186, "grad_norm": 0.19220784306526184, "learning_rate": 3.523447401774398e-06, "loss": 0.0079, "step": 4170 }, { "epoch": 0.07064211655949232, "grad_norm": 0.2972339987754822, "learning_rate": 3.531896915927334e-06, "loss": 0.0061, "step": 4180 }, { "epoch": 0.07081111683834278, "grad_norm": 0.5900372266769409, "learning_rate": 3.540346430080271e-06, "loss": 0.0068, "step": 4190 }, { "epoch": 0.07098011711719324, "grad_norm": 0.5239884853363037, "learning_rate": 3.548795944233207e-06, "loss": 0.0085, "step": 4200 }, { "epoch": 0.07114911739604371, "grad_norm": 0.35723498463630676, "learning_rate": 3.557245458386143e-06, "loss": 0.0076, "step": 4210 }, { "epoch": 0.07131811767489417, "grad_norm": 0.48376893997192383, "learning_rate": 3.5656949725390793e-06, "loss": 0.0075, "step": 4220 }, { "epoch": 0.07148711795374463, "grad_norm": 0.22889120876789093, "learning_rate": 3.5741444866920154e-06, "loss": 0.0083, "step": 4230 }, { "epoch": 0.07165611823259509, "grad_norm": 0.14473536610603333, "learning_rate": 3.582594000844952e-06, "loss": 0.008, "step": 4240 }, { "epoch": 0.07182511851144555, "grad_norm": 0.4018864631652832, "learning_rate": 3.591043514997888e-06, "loss": 0.0072, "step": 4250 }, { "epoch": 0.071994118790296, "grad_norm": 0.25340160727500916, "learning_rate": 3.599493029150824e-06, "loss": 0.007, "step": 4260 }, { "epoch": 0.07216311906914646, "grad_norm": 0.4823681116104126, "learning_rate": 3.60794254330376e-06, "loss": 0.01, "step": 4270 }, { "epoch": 0.07233211934799692, "grad_norm": 0.45038795471191406, "learning_rate": 3.616392057456696e-06, "loss": 0.0064, "step": 4280 }, { "epoch": 0.07250111962684738, "grad_norm": 0.511728048324585, "learning_rate": 3.624841571609633e-06, "loss": 0.0053, "step": 4290 }, { "epoch": 0.07267011990569784, "grad_norm": 0.4706685245037079, "learning_rate": 3.633291085762569e-06, "loss": 0.0063, "step": 4300 }, { "epoch": 0.0728391201845483, "grad_norm": 0.4813501834869385, "learning_rate": 3.641740599915505e-06, "loss": 0.0074, "step": 4310 }, { "epoch": 0.07300812046339876, "grad_norm": 0.28050094842910767, "learning_rate": 3.6501901140684417e-06, "loss": 0.0121, "step": 4320 }, { "epoch": 0.07317712074224922, "grad_norm": 0.44418439269065857, "learning_rate": 3.6586396282213777e-06, "loss": 0.0067, "step": 4330 }, { "epoch": 0.07334612102109969, "grad_norm": 0.2895668148994446, "learning_rate": 3.667089142374314e-06, "loss": 0.0065, "step": 4340 }, { "epoch": 0.07351512129995015, "grad_norm": 0.35102537274360657, "learning_rate": 3.67553865652725e-06, "loss": 0.0108, "step": 4350 }, { "epoch": 0.07368412157880061, "grad_norm": 0.5368096232414246, "learning_rate": 3.683988170680186e-06, "loss": 0.0076, "step": 4360 }, { "epoch": 0.07385312185765107, "grad_norm": 0.23388966917991638, "learning_rate": 3.6924376848331225e-06, "loss": 0.0081, "step": 4370 }, { "epoch": 0.07402212213650153, "grad_norm": 0.35802993178367615, "learning_rate": 3.7008871989860585e-06, "loss": 0.006, "step": 4380 }, { "epoch": 0.07419112241535199, "grad_norm": 0.24196232855319977, "learning_rate": 3.7093367131389946e-06, "loss": 0.0062, "step": 4390 }, { "epoch": 0.07436012269420245, "grad_norm": 0.6262447237968445, "learning_rate": 3.717786227291931e-06, "loss": 0.0089, "step": 4400 }, { "epoch": 0.0745291229730529, "grad_norm": 0.25484466552734375, "learning_rate": 3.7262357414448676e-06, "loss": 0.0086, "step": 4410 }, { "epoch": 0.07469812325190336, "grad_norm": 0.5549577474594116, "learning_rate": 3.7346852555978037e-06, "loss": 0.0074, "step": 4420 }, { "epoch": 0.07486712353075382, "grad_norm": 0.2302803099155426, "learning_rate": 3.7431347697507397e-06, "loss": 0.0055, "step": 4430 }, { "epoch": 0.07503612380960428, "grad_norm": 0.6471118927001953, "learning_rate": 3.751584283903676e-06, "loss": 0.0086, "step": 4440 }, { "epoch": 0.07520512408845474, "grad_norm": 0.4209013283252716, "learning_rate": 3.760033798056612e-06, "loss": 0.0078, "step": 4450 }, { "epoch": 0.0753741243673052, "grad_norm": 0.43538352847099304, "learning_rate": 3.7684833122095484e-06, "loss": 0.0072, "step": 4460 }, { "epoch": 0.07554312464615566, "grad_norm": 0.31720903515815735, "learning_rate": 3.7769328263624844e-06, "loss": 0.0103, "step": 4470 }, { "epoch": 0.07571212492500613, "grad_norm": 0.15655528008937836, "learning_rate": 3.7853823405154205e-06, "loss": 0.0064, "step": 4480 }, { "epoch": 0.07588112520385659, "grad_norm": 0.30433279275894165, "learning_rate": 3.7938318546683566e-06, "loss": 0.0065, "step": 4490 }, { "epoch": 0.07605012548270705, "grad_norm": 0.4596308767795563, "learning_rate": 3.8022813688212926e-06, "loss": 0.0087, "step": 4500 }, { "epoch": 0.07621912576155751, "grad_norm": 0.11451300978660583, "learning_rate": 3.8107308829742296e-06, "loss": 0.0091, "step": 4510 }, { "epoch": 0.07638812604040797, "grad_norm": 0.5014181137084961, "learning_rate": 3.819180397127166e-06, "loss": 0.008, "step": 4520 }, { "epoch": 0.07655712631925843, "grad_norm": 0.3703087270259857, "learning_rate": 3.827629911280102e-06, "loss": 0.0056, "step": 4530 }, { "epoch": 0.07672612659810889, "grad_norm": 1.3860195875167847, "learning_rate": 3.836079425433038e-06, "loss": 0.0099, "step": 4540 }, { "epoch": 0.07689512687695935, "grad_norm": 0.33820924162864685, "learning_rate": 3.844528939585974e-06, "loss": 0.0094, "step": 4550 }, { "epoch": 0.0770641271558098, "grad_norm": 0.4271750748157501, "learning_rate": 3.85297845373891e-06, "loss": 0.0076, "step": 4560 }, { "epoch": 0.07723312743466026, "grad_norm": 0.40488216280937195, "learning_rate": 3.861427967891846e-06, "loss": 0.0059, "step": 4570 }, { "epoch": 0.07740212771351072, "grad_norm": 0.3280101418495178, "learning_rate": 3.869877482044783e-06, "loss": 0.0069, "step": 4580 }, { "epoch": 0.07757112799236118, "grad_norm": 0.6588373184204102, "learning_rate": 3.8783269961977185e-06, "loss": 0.0092, "step": 4590 }, { "epoch": 0.07774012827121164, "grad_norm": 0.4867706894874573, "learning_rate": 3.886776510350655e-06, "loss": 0.009, "step": 4600 }, { "epoch": 0.0779091285500621, "grad_norm": 0.5322169661521912, "learning_rate": 3.8952260245035915e-06, "loss": 0.0114, "step": 4610 }, { "epoch": 0.07807812882891257, "grad_norm": 0.616223156452179, "learning_rate": 3.903675538656528e-06, "loss": 0.0098, "step": 4620 }, { "epoch": 0.07824712910776303, "grad_norm": 0.810210108757019, "learning_rate": 3.912125052809464e-06, "loss": 0.0098, "step": 4630 }, { "epoch": 0.07841612938661349, "grad_norm": 0.2605763375759125, "learning_rate": 3.9205745669624e-06, "loss": 0.0067, "step": 4640 }, { "epoch": 0.07858512966546395, "grad_norm": 0.16967809200286865, "learning_rate": 3.929024081115337e-06, "loss": 0.008, "step": 4650 }, { "epoch": 0.07875412994431441, "grad_norm": 0.15028618276119232, "learning_rate": 3.937473595268272e-06, "loss": 0.0063, "step": 4660 }, { "epoch": 0.07892313022316487, "grad_norm": 0.3279257118701935, "learning_rate": 3.945923109421209e-06, "loss": 0.0066, "step": 4670 }, { "epoch": 0.07909213050201533, "grad_norm": 0.7601935267448425, "learning_rate": 3.9543726235741444e-06, "loss": 0.01, "step": 4680 }, { "epoch": 0.07926113078086579, "grad_norm": 0.15642717480659485, "learning_rate": 3.962822137727081e-06, "loss": 0.0065, "step": 4690 }, { "epoch": 0.07943013105971625, "grad_norm": 0.36829888820648193, "learning_rate": 3.9712716518800174e-06, "loss": 0.0071, "step": 4700 }, { "epoch": 0.0795991313385667, "grad_norm": 0.42035579681396484, "learning_rate": 3.979721166032953e-06, "loss": 0.0056, "step": 4710 }, { "epoch": 0.07976813161741717, "grad_norm": 0.20596951246261597, "learning_rate": 3.9881706801858896e-06, "loss": 0.0065, "step": 4720 }, { "epoch": 0.07993713189626762, "grad_norm": 0.3290463089942932, "learning_rate": 3.996620194338826e-06, "loss": 0.0086, "step": 4730 }, { "epoch": 0.08010613217511808, "grad_norm": 0.2539362609386444, "learning_rate": 4.0050697084917626e-06, "loss": 0.006, "step": 4740 }, { "epoch": 0.08027513245396854, "grad_norm": 0.7381569147109985, "learning_rate": 4.013519222644698e-06, "loss": 0.0062, "step": 4750 }, { "epoch": 0.08044413273281902, "grad_norm": 0.17694604396820068, "learning_rate": 4.021968736797635e-06, "loss": 0.0079, "step": 4760 }, { "epoch": 0.08061313301166947, "grad_norm": 0.44804155826568604, "learning_rate": 4.03041825095057e-06, "loss": 0.008, "step": 4770 }, { "epoch": 0.08078213329051993, "grad_norm": 0.3938175439834595, "learning_rate": 4.038867765103507e-06, "loss": 0.0089, "step": 4780 }, { "epoch": 0.08095113356937039, "grad_norm": 0.6432244181632996, "learning_rate": 4.047317279256443e-06, "loss": 0.0055, "step": 4790 }, { "epoch": 0.08112013384822085, "grad_norm": 0.3865385949611664, "learning_rate": 4.055766793409379e-06, "loss": 0.0059, "step": 4800 }, { "epoch": 0.08128913412707131, "grad_norm": 0.3288101255893707, "learning_rate": 4.0642163075623155e-06, "loss": 0.008, "step": 4810 }, { "epoch": 0.08145813440592177, "grad_norm": 0.386060893535614, "learning_rate": 4.072665821715251e-06, "loss": 0.0073, "step": 4820 }, { "epoch": 0.08162713468477223, "grad_norm": 0.3735050559043884, "learning_rate": 4.081115335868188e-06, "loss": 0.0075, "step": 4830 }, { "epoch": 0.08179613496362269, "grad_norm": 0.28494900465011597, "learning_rate": 4.089564850021124e-06, "loss": 0.0086, "step": 4840 }, { "epoch": 0.08196513524247315, "grad_norm": 0.8604230284690857, "learning_rate": 4.098014364174061e-06, "loss": 0.0093, "step": 4850 }, { "epoch": 0.0821341355213236, "grad_norm": 0.32281509041786194, "learning_rate": 4.106463878326996e-06, "loss": 0.0125, "step": 4860 }, { "epoch": 0.08230313580017407, "grad_norm": 0.6922529935836792, "learning_rate": 4.114913392479933e-06, "loss": 0.0088, "step": 4870 }, { "epoch": 0.08247213607902452, "grad_norm": 0.15109947323799133, "learning_rate": 4.123362906632869e-06, "loss": 0.0082, "step": 4880 }, { "epoch": 0.08264113635787498, "grad_norm": 0.5638471841812134, "learning_rate": 4.131812420785805e-06, "loss": 0.0073, "step": 4890 }, { "epoch": 0.08281013663672546, "grad_norm": 0.5557750463485718, "learning_rate": 4.140261934938741e-06, "loss": 0.0065, "step": 4900 }, { "epoch": 0.08297913691557592, "grad_norm": 0.3340568244457245, "learning_rate": 4.148711449091677e-06, "loss": 0.008, "step": 4910 }, { "epoch": 0.08314813719442637, "grad_norm": 0.4634811282157898, "learning_rate": 4.1571609632446135e-06, "loss": 0.0082, "step": 4920 }, { "epoch": 0.08331713747327683, "grad_norm": 0.45580101013183594, "learning_rate": 4.16561047739755e-06, "loss": 0.0062, "step": 4930 }, { "epoch": 0.08348613775212729, "grad_norm": 0.3351278007030487, "learning_rate": 4.174059991550486e-06, "loss": 0.0135, "step": 4940 }, { "epoch": 0.08365513803097775, "grad_norm": 0.16381919384002686, "learning_rate": 4.182509505703423e-06, "loss": 0.0078, "step": 4950 }, { "epoch": 0.08382413830982821, "grad_norm": 0.7266935706138611, "learning_rate": 4.190959019856359e-06, "loss": 0.0056, "step": 4960 }, { "epoch": 0.08399313858867867, "grad_norm": 0.44963306188583374, "learning_rate": 4.199408534009295e-06, "loss": 0.0097, "step": 4970 }, { "epoch": 0.08416213886752913, "grad_norm": 0.32577261328697205, "learning_rate": 4.207858048162231e-06, "loss": 0.0092, "step": 4980 }, { "epoch": 0.08433113914637959, "grad_norm": 0.42351993918418884, "learning_rate": 4.216307562315167e-06, "loss": 0.0065, "step": 4990 }, { "epoch": 0.08450013942523005, "grad_norm": 0.38387471437454224, "learning_rate": 4.224757076468104e-06, "loss": 0.0073, "step": 5000 }, { "epoch": 0.0846691397040805, "grad_norm": 0.5673115849494934, "learning_rate": 4.2332065906210394e-06, "loss": 0.0077, "step": 5010 }, { "epoch": 0.08483813998293097, "grad_norm": 0.46692800521850586, "learning_rate": 4.241656104773976e-06, "loss": 0.0081, "step": 5020 }, { "epoch": 0.08500714026178144, "grad_norm": 0.27370235323905945, "learning_rate": 4.2501056189269116e-06, "loss": 0.0092, "step": 5030 }, { "epoch": 0.0851761405406319, "grad_norm": 0.2749960720539093, "learning_rate": 4.258555133079848e-06, "loss": 0.008, "step": 5040 }, { "epoch": 0.08534514081948236, "grad_norm": 0.34917569160461426, "learning_rate": 4.2670046472327846e-06, "loss": 0.0112, "step": 5050 }, { "epoch": 0.08551414109833282, "grad_norm": 0.26685452461242676, "learning_rate": 4.275454161385721e-06, "loss": 0.0065, "step": 5060 }, { "epoch": 0.08568314137718328, "grad_norm": 0.18275409936904907, "learning_rate": 4.283903675538657e-06, "loss": 0.0066, "step": 5070 }, { "epoch": 0.08585214165603373, "grad_norm": 0.30578580498695374, "learning_rate": 4.292353189691593e-06, "loss": 0.0057, "step": 5080 }, { "epoch": 0.0860211419348842, "grad_norm": 0.21141697466373444, "learning_rate": 4.30080270384453e-06, "loss": 0.0055, "step": 5090 }, { "epoch": 0.08619014221373465, "grad_norm": 0.5646255612373352, "learning_rate": 4.309252217997465e-06, "loss": 0.0083, "step": 5100 }, { "epoch": 0.08635914249258511, "grad_norm": 0.2993975579738617, "learning_rate": 4.317701732150402e-06, "loss": 0.0071, "step": 5110 }, { "epoch": 0.08652814277143557, "grad_norm": 0.33375898003578186, "learning_rate": 4.3261512463033375e-06, "loss": 0.008, "step": 5120 }, { "epoch": 0.08669714305028603, "grad_norm": 0.5091102719306946, "learning_rate": 4.334600760456274e-06, "loss": 0.0059, "step": 5130 }, { "epoch": 0.08686614332913649, "grad_norm": 0.36565321683883667, "learning_rate": 4.3430502746092105e-06, "loss": 0.0076, "step": 5140 }, { "epoch": 0.08703514360798695, "grad_norm": 0.683262288570404, "learning_rate": 4.351499788762146e-06, "loss": 0.0061, "step": 5150 }, { "epoch": 0.0872041438868374, "grad_norm": 0.650642991065979, "learning_rate": 4.359949302915083e-06, "loss": 0.0078, "step": 5160 }, { "epoch": 0.08737314416568788, "grad_norm": 0.5229181051254272, "learning_rate": 4.368398817068019e-06, "loss": 0.0096, "step": 5170 }, { "epoch": 0.08754214444453834, "grad_norm": 0.4177519679069519, "learning_rate": 4.376848331220956e-06, "loss": 0.0093, "step": 5180 }, { "epoch": 0.0877111447233888, "grad_norm": 0.5535804629325867, "learning_rate": 4.385297845373891e-06, "loss": 0.0081, "step": 5190 }, { "epoch": 0.08788014500223926, "grad_norm": 0.294405460357666, "learning_rate": 4.393747359526828e-06, "loss": 0.0053, "step": 5200 }, { "epoch": 0.08804914528108972, "grad_norm": 0.38873642683029175, "learning_rate": 4.402196873679763e-06, "loss": 0.006, "step": 5210 }, { "epoch": 0.08821814555994018, "grad_norm": 0.45184701681137085, "learning_rate": 4.4106463878327e-06, "loss": 0.0073, "step": 5220 }, { "epoch": 0.08838714583879063, "grad_norm": 0.5146165490150452, "learning_rate": 4.419095901985636e-06, "loss": 0.0082, "step": 5230 }, { "epoch": 0.0885561461176411, "grad_norm": 0.4142181873321533, "learning_rate": 4.427545416138572e-06, "loss": 0.0051, "step": 5240 }, { "epoch": 0.08872514639649155, "grad_norm": 0.13999409973621368, "learning_rate": 4.4359949302915085e-06, "loss": 0.0064, "step": 5250 }, { "epoch": 0.08889414667534201, "grad_norm": 0.5468639731407166, "learning_rate": 4.444444444444444e-06, "loss": 0.0077, "step": 5260 }, { "epoch": 0.08906314695419247, "grad_norm": 0.45908740162849426, "learning_rate": 4.4528939585973815e-06, "loss": 0.0085, "step": 5270 }, { "epoch": 0.08923214723304293, "grad_norm": 0.6285553574562073, "learning_rate": 4.461343472750317e-06, "loss": 0.0086, "step": 5280 }, { "epoch": 0.08940114751189339, "grad_norm": 0.3353530764579773, "learning_rate": 4.469792986903254e-06, "loss": 0.0079, "step": 5290 }, { "epoch": 0.08957014779074385, "grad_norm": 0.4628467261791229, "learning_rate": 4.47824250105619e-06, "loss": 0.0085, "step": 5300 }, { "epoch": 0.08973914806959432, "grad_norm": 0.35261330008506775, "learning_rate": 4.486692015209126e-06, "loss": 0.0092, "step": 5310 }, { "epoch": 0.08990814834844478, "grad_norm": 0.33638912439346313, "learning_rate": 4.495141529362062e-06, "loss": 0.0082, "step": 5320 }, { "epoch": 0.09007714862729524, "grad_norm": 0.3847145736217499, "learning_rate": 4.503591043514998e-06, "loss": 0.0078, "step": 5330 }, { "epoch": 0.0902461489061457, "grad_norm": 0.41521865129470825, "learning_rate": 4.512040557667934e-06, "loss": 0.0083, "step": 5340 }, { "epoch": 0.09041514918499616, "grad_norm": 0.5259934663772583, "learning_rate": 4.520490071820871e-06, "loss": 0.0094, "step": 5350 }, { "epoch": 0.09058414946384662, "grad_norm": 0.4790876805782318, "learning_rate": 4.5289395859738065e-06, "loss": 0.0064, "step": 5360 }, { "epoch": 0.09075314974269708, "grad_norm": 0.3402535319328308, "learning_rate": 4.537389100126743e-06, "loss": 0.0054, "step": 5370 }, { "epoch": 0.09092215002154753, "grad_norm": 0.46615660190582275, "learning_rate": 4.5458386142796795e-06, "loss": 0.0073, "step": 5380 }, { "epoch": 0.091091150300398, "grad_norm": 0.39094266295433044, "learning_rate": 4.554288128432616e-06, "loss": 0.0077, "step": 5390 }, { "epoch": 0.09126015057924845, "grad_norm": 0.2667132616043091, "learning_rate": 4.562737642585552e-06, "loss": 0.0077, "step": 5400 }, { "epoch": 0.09142915085809891, "grad_norm": 0.483772337436676, "learning_rate": 4.571187156738488e-06, "loss": 0.0076, "step": 5410 }, { "epoch": 0.09159815113694937, "grad_norm": 0.4861210882663727, "learning_rate": 4.579636670891424e-06, "loss": 0.0107, "step": 5420 }, { "epoch": 0.09176715141579983, "grad_norm": 0.5079867839813232, "learning_rate": 4.58808618504436e-06, "loss": 0.0089, "step": 5430 }, { "epoch": 0.09193615169465029, "grad_norm": 0.33771970868110657, "learning_rate": 4.596535699197297e-06, "loss": 0.0081, "step": 5440 }, { "epoch": 0.09210515197350076, "grad_norm": 0.10134998708963394, "learning_rate": 4.6049852133502325e-06, "loss": 0.0043, "step": 5450 }, { "epoch": 0.09227415225235122, "grad_norm": 0.15871858596801758, "learning_rate": 4.613434727503169e-06, "loss": 0.0104, "step": 5460 }, { "epoch": 0.09244315253120168, "grad_norm": 0.4372641444206238, "learning_rate": 4.621884241656105e-06, "loss": 0.0167, "step": 5470 }, { "epoch": 0.09261215281005214, "grad_norm": 0.4832150340080261, "learning_rate": 4.630333755809041e-06, "loss": 0.0088, "step": 5480 }, { "epoch": 0.0927811530889026, "grad_norm": 0.34747639298439026, "learning_rate": 4.638783269961978e-06, "loss": 0.0052, "step": 5490 }, { "epoch": 0.09295015336775306, "grad_norm": 0.15239928662776947, "learning_rate": 4.647232784114914e-06, "loss": 0.0064, "step": 5500 }, { "epoch": 0.09311915364660352, "grad_norm": 0.48043081164360046, "learning_rate": 4.65568229826785e-06, "loss": 0.006, "step": 5510 }, { "epoch": 0.09328815392545398, "grad_norm": 0.30117249488830566, "learning_rate": 4.664131812420786e-06, "loss": 0.0062, "step": 5520 }, { "epoch": 0.09345715420430444, "grad_norm": 0.5305731892585754, "learning_rate": 4.672581326573723e-06, "loss": 0.0069, "step": 5530 }, { "epoch": 0.0936261544831549, "grad_norm": 0.31124499440193176, "learning_rate": 4.681030840726658e-06, "loss": 0.0065, "step": 5540 }, { "epoch": 0.09379515476200535, "grad_norm": 0.15525099635124207, "learning_rate": 4.689480354879595e-06, "loss": 0.006, "step": 5550 }, { "epoch": 0.09396415504085581, "grad_norm": 0.5805965065956116, "learning_rate": 4.6979298690325305e-06, "loss": 0.0067, "step": 5560 }, { "epoch": 0.09413315531970627, "grad_norm": 0.21559707820415497, "learning_rate": 4.706379383185467e-06, "loss": 0.0055, "step": 5570 }, { "epoch": 0.09430215559855674, "grad_norm": 0.4127371311187744, "learning_rate": 4.7148288973384035e-06, "loss": 0.0064, "step": 5580 }, { "epoch": 0.0944711558774072, "grad_norm": 0.28241196274757385, "learning_rate": 4.723278411491339e-06, "loss": 0.0067, "step": 5590 }, { "epoch": 0.09464015615625766, "grad_norm": 0.3856637477874756, "learning_rate": 4.7317279256442765e-06, "loss": 0.0086, "step": 5600 }, { "epoch": 0.09480915643510812, "grad_norm": 0.4850485622882843, "learning_rate": 4.740177439797212e-06, "loss": 0.0076, "step": 5610 }, { "epoch": 0.09497815671395858, "grad_norm": 0.3252734839916229, "learning_rate": 4.748626953950149e-06, "loss": 0.0068, "step": 5620 }, { "epoch": 0.09514715699280904, "grad_norm": 0.27155035734176636, "learning_rate": 4.757076468103084e-06, "loss": 0.0065, "step": 5630 }, { "epoch": 0.0953161572716595, "grad_norm": 0.21099352836608887, "learning_rate": 4.765525982256021e-06, "loss": 0.0074, "step": 5640 }, { "epoch": 0.09548515755050996, "grad_norm": 0.23718442022800446, "learning_rate": 4.773975496408957e-06, "loss": 0.0066, "step": 5650 }, { "epoch": 0.09565415782936042, "grad_norm": 0.12414859235286713, "learning_rate": 4.782425010561893e-06, "loss": 0.0072, "step": 5660 }, { "epoch": 0.09582315810821088, "grad_norm": 0.1519409865140915, "learning_rate": 4.790874524714829e-06, "loss": 0.0063, "step": 5670 }, { "epoch": 0.09599215838706134, "grad_norm": 0.16129052639007568, "learning_rate": 4.799324038867765e-06, "loss": 0.0069, "step": 5680 }, { "epoch": 0.0961611586659118, "grad_norm": 0.394161194562912, "learning_rate": 4.8077735530207015e-06, "loss": 0.0042, "step": 5690 }, { "epoch": 0.09633015894476225, "grad_norm": 0.15623830258846283, "learning_rate": 4.816223067173638e-06, "loss": 0.0068, "step": 5700 }, { "epoch": 0.09649915922361271, "grad_norm": 0.4517795145511627, "learning_rate": 4.8246725813265745e-06, "loss": 0.0098, "step": 5710 }, { "epoch": 0.09666815950246319, "grad_norm": 0.37541139125823975, "learning_rate": 4.83312209547951e-06, "loss": 0.0087, "step": 5720 }, { "epoch": 0.09683715978131364, "grad_norm": 0.23171207308769226, "learning_rate": 4.841571609632447e-06, "loss": 0.0046, "step": 5730 }, { "epoch": 0.0970061600601641, "grad_norm": 0.23239940404891968, "learning_rate": 4.850021123785383e-06, "loss": 0.0051, "step": 5740 }, { "epoch": 0.09717516033901456, "grad_norm": 0.16493850946426392, "learning_rate": 4.858470637938319e-06, "loss": 0.0063, "step": 5750 }, { "epoch": 0.09734416061786502, "grad_norm": 0.3797301650047302, "learning_rate": 4.866920152091255e-06, "loss": 0.0086, "step": 5760 }, { "epoch": 0.09751316089671548, "grad_norm": 0.35541290044784546, "learning_rate": 4.875369666244191e-06, "loss": 0.0075, "step": 5770 }, { "epoch": 0.09768216117556594, "grad_norm": 0.24128615856170654, "learning_rate": 4.8838191803971274e-06, "loss": 0.0058, "step": 5780 }, { "epoch": 0.0978511614544164, "grad_norm": 0.29352807998657227, "learning_rate": 4.892268694550064e-06, "loss": 0.0084, "step": 5790 }, { "epoch": 0.09802016173326686, "grad_norm": 0.10192513465881348, "learning_rate": 4.9007182087029996e-06, "loss": 0.008, "step": 5800 }, { "epoch": 0.09818916201211732, "grad_norm": 0.3217865228652954, "learning_rate": 4.909167722855936e-06, "loss": 0.0056, "step": 5810 }, { "epoch": 0.09835816229096778, "grad_norm": 0.25088396668434143, "learning_rate": 4.9176172370088726e-06, "loss": 0.0054, "step": 5820 }, { "epoch": 0.09852716256981824, "grad_norm": 0.6246805787086487, "learning_rate": 4.926066751161809e-06, "loss": 0.0077, "step": 5830 }, { "epoch": 0.0986961628486687, "grad_norm": 0.5747209787368774, "learning_rate": 4.934516265314745e-06, "loss": 0.0054, "step": 5840 }, { "epoch": 0.09886516312751915, "grad_norm": 0.22276253998279572, "learning_rate": 4.942965779467681e-06, "loss": 0.0076, "step": 5850 }, { "epoch": 0.09903416340636963, "grad_norm": 0.26171597838401794, "learning_rate": 4.951415293620617e-06, "loss": 0.0062, "step": 5860 }, { "epoch": 0.09920316368522009, "grad_norm": 0.4480033218860626, "learning_rate": 4.959864807773553e-06, "loss": 0.006, "step": 5870 }, { "epoch": 0.09937216396407055, "grad_norm": 0.44686561822891235, "learning_rate": 4.96831432192649e-06, "loss": 0.0073, "step": 5880 }, { "epoch": 0.099541164242921, "grad_norm": 0.186452716588974, "learning_rate": 4.9767638360794255e-06, "loss": 0.0104, "step": 5890 }, { "epoch": 0.09971016452177146, "grad_norm": 0.3807360529899597, "learning_rate": 4.985213350232362e-06, "loss": 0.0086, "step": 5900 }, { "epoch": 0.09987916480062192, "grad_norm": 0.4487653076648712, "learning_rate": 4.993662864385298e-06, "loss": 0.0073, "step": 5910 }, { "epoch": 0.10004816507947238, "grad_norm": 0.2705078125, "learning_rate": 5.002112378538234e-06, "loss": 0.0114, "step": 5920 }, { "epoch": 0.10021716535832284, "grad_norm": 0.33181098103523254, "learning_rate": 5.010561892691171e-06, "loss": 0.0046, "step": 5930 }, { "epoch": 0.1003861656371733, "grad_norm": 0.25259125232696533, "learning_rate": 5.019011406844106e-06, "loss": 0.0089, "step": 5940 }, { "epoch": 0.10055516591602376, "grad_norm": 0.5207515954971313, "learning_rate": 5.027460920997043e-06, "loss": 0.0089, "step": 5950 }, { "epoch": 0.10072416619487422, "grad_norm": 0.1929161250591278, "learning_rate": 5.035910435149978e-06, "loss": 0.0055, "step": 5960 }, { "epoch": 0.10089316647372468, "grad_norm": 0.28532758355140686, "learning_rate": 5.044359949302915e-06, "loss": 0.0056, "step": 5970 }, { "epoch": 0.10106216675257514, "grad_norm": 0.0873856320977211, "learning_rate": 5.052809463455852e-06, "loss": 0.0061, "step": 5980 }, { "epoch": 0.1012311670314256, "grad_norm": 0.2607017755508423, "learning_rate": 5.061258977608789e-06, "loss": 0.0064, "step": 5990 }, { "epoch": 0.10140016731027607, "grad_norm": 0.3413243293762207, "learning_rate": 5.069708491761724e-06, "loss": 0.0064, "step": 6000 }, { "epoch": 0.10156916758912653, "grad_norm": 0.5261040329933167, "learning_rate": 5.078158005914661e-06, "loss": 0.0074, "step": 6010 }, { "epoch": 0.10173816786797699, "grad_norm": 0.21714960038661957, "learning_rate": 5.0866075200675965e-06, "loss": 0.005, "step": 6020 }, { "epoch": 0.10190716814682745, "grad_norm": 0.26323312520980835, "learning_rate": 5.095057034220533e-06, "loss": 0.0079, "step": 6030 }, { "epoch": 0.1020761684256779, "grad_norm": 0.2866646647453308, "learning_rate": 5.1035065483734695e-06, "loss": 0.0062, "step": 6040 }, { "epoch": 0.10224516870452836, "grad_norm": 0.21184338629245758, "learning_rate": 5.111956062526405e-06, "loss": 0.0063, "step": 6050 }, { "epoch": 0.10241416898337882, "grad_norm": 0.1997213065624237, "learning_rate": 5.120405576679342e-06, "loss": 0.0072, "step": 6060 }, { "epoch": 0.10258316926222928, "grad_norm": 0.38983890414237976, "learning_rate": 5.128855090832277e-06, "loss": 0.0065, "step": 6070 }, { "epoch": 0.10275216954107974, "grad_norm": 0.23241905868053436, "learning_rate": 5.137304604985214e-06, "loss": 0.0059, "step": 6080 }, { "epoch": 0.1029211698199302, "grad_norm": 0.44172483682632446, "learning_rate": 5.14575411913815e-06, "loss": 0.0053, "step": 6090 }, { "epoch": 0.10309017009878066, "grad_norm": 0.33180946111679077, "learning_rate": 5.154203633291086e-06, "loss": 0.01, "step": 6100 }, { "epoch": 0.10325917037763112, "grad_norm": 0.38379010558128357, "learning_rate": 5.162653147444022e-06, "loss": 0.0061, "step": 6110 }, { "epoch": 0.10342817065648158, "grad_norm": 0.2291935533285141, "learning_rate": 5.171102661596958e-06, "loss": 0.0058, "step": 6120 }, { "epoch": 0.10359717093533205, "grad_norm": 0.13137617707252502, "learning_rate": 5.1795521757498946e-06, "loss": 0.0055, "step": 6130 }, { "epoch": 0.10376617121418251, "grad_norm": 0.3997475504875183, "learning_rate": 5.188001689902831e-06, "loss": 0.0071, "step": 6140 }, { "epoch": 0.10393517149303297, "grad_norm": 0.4803593158721924, "learning_rate": 5.196451204055767e-06, "loss": 0.0078, "step": 6150 }, { "epoch": 0.10410417177188343, "grad_norm": 0.2848051190376282, "learning_rate": 5.204900718208703e-06, "loss": 0.0064, "step": 6160 }, { "epoch": 0.10427317205073389, "grad_norm": 0.4575238823890686, "learning_rate": 5.213350232361639e-06, "loss": 0.0077, "step": 6170 }, { "epoch": 0.10444217232958435, "grad_norm": 0.2880443036556244, "learning_rate": 5.221799746514575e-06, "loss": 0.006, "step": 6180 }, { "epoch": 0.1046111726084348, "grad_norm": 0.2867361605167389, "learning_rate": 5.230249260667513e-06, "loss": 0.0033, "step": 6190 }, { "epoch": 0.10478017288728526, "grad_norm": 0.2843003273010254, "learning_rate": 5.238698774820449e-06, "loss": 0.0089, "step": 6200 }, { "epoch": 0.10494917316613572, "grad_norm": 0.38200074434280396, "learning_rate": 5.247148288973385e-06, "loss": 0.0083, "step": 6210 }, { "epoch": 0.10511817344498618, "grad_norm": 0.3250083029270172, "learning_rate": 5.255597803126321e-06, "loss": 0.0067, "step": 6220 }, { "epoch": 0.10528717372383664, "grad_norm": 0.15679076313972473, "learning_rate": 5.264047317279257e-06, "loss": 0.0083, "step": 6230 }, { "epoch": 0.1054561740026871, "grad_norm": 0.23474538326263428, "learning_rate": 5.2724968314321934e-06, "loss": 0.0071, "step": 6240 }, { "epoch": 0.10562517428153756, "grad_norm": 0.21750317513942719, "learning_rate": 5.28094634558513e-06, "loss": 0.0075, "step": 6250 }, { "epoch": 0.10579417456038802, "grad_norm": 0.31989774107933044, "learning_rate": 5.289395859738066e-06, "loss": 0.006, "step": 6260 }, { "epoch": 0.10596317483923849, "grad_norm": 0.28282850980758667, "learning_rate": 5.297845373891002e-06, "loss": 0.0057, "step": 6270 }, { "epoch": 0.10613217511808895, "grad_norm": 0.5023276209831238, "learning_rate": 5.306294888043938e-06, "loss": 0.0073, "step": 6280 }, { "epoch": 0.10630117539693941, "grad_norm": 0.25754910707473755, "learning_rate": 5.314744402196874e-06, "loss": 0.0077, "step": 6290 }, { "epoch": 0.10647017567578987, "grad_norm": 0.38880106806755066, "learning_rate": 5.323193916349811e-06, "loss": 0.0058, "step": 6300 }, { "epoch": 0.10663917595464033, "grad_norm": 0.3246299922466278, "learning_rate": 5.331643430502746e-06, "loss": 0.0058, "step": 6310 }, { "epoch": 0.10680817623349079, "grad_norm": 0.27981093525886536, "learning_rate": 5.340092944655683e-06, "loss": 0.0058, "step": 6320 }, { "epoch": 0.10697717651234125, "grad_norm": 0.22193048894405365, "learning_rate": 5.3485424588086185e-06, "loss": 0.0075, "step": 6330 }, { "epoch": 0.1071461767911917, "grad_norm": 0.6009519100189209, "learning_rate": 5.356991972961555e-06, "loss": 0.0063, "step": 6340 }, { "epoch": 0.10731517707004216, "grad_norm": 0.2525694966316223, "learning_rate": 5.3654414871144915e-06, "loss": 0.006, "step": 6350 }, { "epoch": 0.10748417734889262, "grad_norm": 0.45865657925605774, "learning_rate": 5.373891001267427e-06, "loss": 0.0078, "step": 6360 }, { "epoch": 0.10765317762774308, "grad_norm": 0.5306742191314697, "learning_rate": 5.382340515420364e-06, "loss": 0.0074, "step": 6370 }, { "epoch": 0.10782217790659354, "grad_norm": 0.35447096824645996, "learning_rate": 5.390790029573299e-06, "loss": 0.0083, "step": 6380 }, { "epoch": 0.107991178185444, "grad_norm": 0.32727372646331787, "learning_rate": 5.399239543726236e-06, "loss": 0.0052, "step": 6390 }, { "epoch": 0.10816017846429446, "grad_norm": 0.2099214345216751, "learning_rate": 5.407689057879172e-06, "loss": 0.0044, "step": 6400 }, { "epoch": 0.10832917874314493, "grad_norm": 0.18449358642101288, "learning_rate": 5.416138572032109e-06, "loss": 0.0068, "step": 6410 }, { "epoch": 0.10849817902199539, "grad_norm": 0.24026605486869812, "learning_rate": 5.424588086185045e-06, "loss": 0.0067, "step": 6420 }, { "epoch": 0.10866717930084585, "grad_norm": 0.2846910059452057, "learning_rate": 5.433037600337982e-06, "loss": 0.0052, "step": 6430 }, { "epoch": 0.10883617957969631, "grad_norm": 0.3208889365196228, "learning_rate": 5.441487114490917e-06, "loss": 0.0063, "step": 6440 }, { "epoch": 0.10900517985854677, "grad_norm": 0.2054966241121292, "learning_rate": 5.449936628643854e-06, "loss": 0.0067, "step": 6450 }, { "epoch": 0.10917418013739723, "grad_norm": 0.33208614587783813, "learning_rate": 5.4583861427967895e-06, "loss": 0.0076, "step": 6460 }, { "epoch": 0.10934318041624769, "grad_norm": 0.22714951634407043, "learning_rate": 5.466835656949726e-06, "loss": 0.0092, "step": 6470 }, { "epoch": 0.10951218069509815, "grad_norm": 0.26246002316474915, "learning_rate": 5.4752851711026625e-06, "loss": 0.0042, "step": 6480 }, { "epoch": 0.1096811809739486, "grad_norm": 0.46016496419906616, "learning_rate": 5.483734685255598e-06, "loss": 0.0112, "step": 6490 }, { "epoch": 0.10985018125279906, "grad_norm": 0.17244432866573334, "learning_rate": 5.492184199408535e-06, "loss": 0.0054, "step": 6500 }, { "epoch": 0.11001918153164952, "grad_norm": 0.1076434776186943, "learning_rate": 5.50063371356147e-06, "loss": 0.0057, "step": 6510 }, { "epoch": 0.11018818181049998, "grad_norm": 0.332721471786499, "learning_rate": 5.509083227714407e-06, "loss": 0.005, "step": 6520 }, { "epoch": 0.11035718208935044, "grad_norm": 0.12702441215515137, "learning_rate": 5.517532741867343e-06, "loss": 0.0071, "step": 6530 }, { "epoch": 0.1105261823682009, "grad_norm": 0.13691650331020355, "learning_rate": 5.525982256020279e-06, "loss": 0.0048, "step": 6540 }, { "epoch": 0.11069518264705137, "grad_norm": 0.16740848124027252, "learning_rate": 5.5344317701732154e-06, "loss": 0.0045, "step": 6550 }, { "epoch": 0.11086418292590183, "grad_norm": 0.14890030026435852, "learning_rate": 5.542881284326151e-06, "loss": 0.0083, "step": 6560 }, { "epoch": 0.11103318320475229, "grad_norm": 0.5786375999450684, "learning_rate": 5.5513307984790876e-06, "loss": 0.0069, "step": 6570 }, { "epoch": 0.11120218348360275, "grad_norm": 0.30092403292655945, "learning_rate": 5.559780312632024e-06, "loss": 0.0044, "step": 6580 }, { "epoch": 0.11137118376245321, "grad_norm": 0.21859614551067352, "learning_rate": 5.56822982678496e-06, "loss": 0.0075, "step": 6590 }, { "epoch": 0.11154018404130367, "grad_norm": 0.22003114223480225, "learning_rate": 5.576679340937896e-06, "loss": 0.0095, "step": 6600 }, { "epoch": 0.11170918432015413, "grad_norm": 0.28145134449005127, "learning_rate": 5.585128855090832e-06, "loss": 0.0063, "step": 6610 }, { "epoch": 0.11187818459900459, "grad_norm": 0.09334838390350342, "learning_rate": 5.593578369243768e-06, "loss": 0.0068, "step": 6620 }, { "epoch": 0.11204718487785505, "grad_norm": 0.40071040391921997, "learning_rate": 5.602027883396706e-06, "loss": 0.0068, "step": 6630 }, { "epoch": 0.1122161851567055, "grad_norm": 0.2164887636899948, "learning_rate": 5.610477397549642e-06, "loss": 0.0044, "step": 6640 }, { "epoch": 0.11238518543555596, "grad_norm": 0.2855166792869568, "learning_rate": 5.618926911702578e-06, "loss": 0.0062, "step": 6650 }, { "epoch": 0.11255418571440642, "grad_norm": 0.2651625871658325, "learning_rate": 5.627376425855514e-06, "loss": 0.0053, "step": 6660 }, { "epoch": 0.11272318599325688, "grad_norm": 0.2063087671995163, "learning_rate": 5.63582594000845e-06, "loss": 0.0073, "step": 6670 }, { "epoch": 0.11289218627210734, "grad_norm": 0.07739928364753723, "learning_rate": 5.6442754541613865e-06, "loss": 0.0055, "step": 6680 }, { "epoch": 0.11306118655095782, "grad_norm": 0.4230482876300812, "learning_rate": 5.652724968314323e-06, "loss": 0.0075, "step": 6690 }, { "epoch": 0.11323018682980827, "grad_norm": 0.26819807291030884, "learning_rate": 5.661174482467259e-06, "loss": 0.0064, "step": 6700 }, { "epoch": 0.11339918710865873, "grad_norm": 0.5605406761169434, "learning_rate": 5.669623996620195e-06, "loss": 0.0057, "step": 6710 }, { "epoch": 0.11356818738750919, "grad_norm": 0.27150842547416687, "learning_rate": 5.678073510773131e-06, "loss": 0.0055, "step": 6720 }, { "epoch": 0.11373718766635965, "grad_norm": 0.3509826362133026, "learning_rate": 5.686523024926067e-06, "loss": 0.0053, "step": 6730 }, { "epoch": 0.11390618794521011, "grad_norm": 0.2064315527677536, "learning_rate": 5.694972539079004e-06, "loss": 0.0055, "step": 6740 }, { "epoch": 0.11407518822406057, "grad_norm": 0.2994695007801056, "learning_rate": 5.703422053231939e-06, "loss": 0.0055, "step": 6750 }, { "epoch": 0.11424418850291103, "grad_norm": 0.5383670330047607, "learning_rate": 5.711871567384876e-06, "loss": 0.0062, "step": 6760 }, { "epoch": 0.11441318878176149, "grad_norm": 0.20319953560829163, "learning_rate": 5.7203210815378115e-06, "loss": 0.004, "step": 6770 }, { "epoch": 0.11458218906061195, "grad_norm": 0.28655731678009033, "learning_rate": 5.728770595690748e-06, "loss": 0.0032, "step": 6780 }, { "epoch": 0.1147511893394624, "grad_norm": 0.225058451294899, "learning_rate": 5.7372201098436845e-06, "loss": 0.007, "step": 6790 }, { "epoch": 0.11492018961831287, "grad_norm": 0.1450721025466919, "learning_rate": 5.74566962399662e-06, "loss": 0.0066, "step": 6800 }, { "epoch": 0.11508918989716332, "grad_norm": 0.25479888916015625, "learning_rate": 5.754119138149557e-06, "loss": 0.0078, "step": 6810 }, { "epoch": 0.1152581901760138, "grad_norm": 0.25367265939712524, "learning_rate": 5.762568652302492e-06, "loss": 0.0071, "step": 6820 }, { "epoch": 0.11542719045486426, "grad_norm": 0.16044245660305023, "learning_rate": 5.771018166455429e-06, "loss": 0.0062, "step": 6830 }, { "epoch": 0.11559619073371472, "grad_norm": 0.32665905356407166, "learning_rate": 5.779467680608365e-06, "loss": 0.0044, "step": 6840 }, { "epoch": 0.11576519101256517, "grad_norm": 0.47442156076431274, "learning_rate": 5.787917194761303e-06, "loss": 0.0063, "step": 6850 }, { "epoch": 0.11593419129141563, "grad_norm": 0.4188691973686218, "learning_rate": 5.796366708914238e-06, "loss": 0.0076, "step": 6860 }, { "epoch": 0.11610319157026609, "grad_norm": 0.1815645545721054, "learning_rate": 5.804816223067175e-06, "loss": 0.0075, "step": 6870 }, { "epoch": 0.11627219184911655, "grad_norm": 0.15755246579647064, "learning_rate": 5.81326573722011e-06, "loss": 0.0046, "step": 6880 }, { "epoch": 0.11644119212796701, "grad_norm": 0.4051576852798462, "learning_rate": 5.821715251373047e-06, "loss": 0.0062, "step": 6890 }, { "epoch": 0.11661019240681747, "grad_norm": 0.39070749282836914, "learning_rate": 5.830164765525983e-06, "loss": 0.0068, "step": 6900 }, { "epoch": 0.11677919268566793, "grad_norm": 0.458823025226593, "learning_rate": 5.838614279678919e-06, "loss": 0.008, "step": 6910 }, { "epoch": 0.11694819296451839, "grad_norm": 0.03617480769753456, "learning_rate": 5.8470637938318555e-06, "loss": 0.005, "step": 6920 }, { "epoch": 0.11711719324336885, "grad_norm": 0.2267484962940216, "learning_rate": 5.855513307984791e-06, "loss": 0.0051, "step": 6930 }, { "epoch": 0.1172861935222193, "grad_norm": 0.2612169682979584, "learning_rate": 5.863962822137728e-06, "loss": 0.0062, "step": 6940 }, { "epoch": 0.11745519380106977, "grad_norm": 0.42087146639823914, "learning_rate": 5.872412336290664e-06, "loss": 0.0127, "step": 6950 }, { "epoch": 0.11762419407992024, "grad_norm": 0.3855131268501282, "learning_rate": 5.8808618504436e-06, "loss": 0.0056, "step": 6960 }, { "epoch": 0.1177931943587707, "grad_norm": 0.4098065197467804, "learning_rate": 5.889311364596536e-06, "loss": 0.0071, "step": 6970 }, { "epoch": 0.11796219463762116, "grad_norm": 0.18783697485923767, "learning_rate": 5.897760878749472e-06, "loss": 0.0093, "step": 6980 }, { "epoch": 0.11813119491647162, "grad_norm": 0.1892809122800827, "learning_rate": 5.9062103929024085e-06, "loss": 0.0102, "step": 6990 }, { "epoch": 0.11830019519532207, "grad_norm": 0.14258983731269836, "learning_rate": 5.914659907055345e-06, "loss": 0.0059, "step": 7000 }, { "epoch": 0.11846919547417253, "grad_norm": 0.47493696212768555, "learning_rate": 5.923109421208281e-06, "loss": 0.005, "step": 7010 }, { "epoch": 0.11863819575302299, "grad_norm": 0.12930482625961304, "learning_rate": 5.931558935361217e-06, "loss": 0.0053, "step": 7020 }, { "epoch": 0.11880719603187345, "grad_norm": 0.26768070459365845, "learning_rate": 5.940008449514153e-06, "loss": 0.0052, "step": 7030 }, { "epoch": 0.11897619631072391, "grad_norm": 0.4684838354587555, "learning_rate": 5.948457963667089e-06, "loss": 0.0037, "step": 7040 }, { "epoch": 0.11914519658957437, "grad_norm": 0.33800092339515686, "learning_rate": 5.956907477820026e-06, "loss": 0.0094, "step": 7050 }, { "epoch": 0.11931419686842483, "grad_norm": 0.1822284311056137, "learning_rate": 5.965356991972962e-06, "loss": 0.0069, "step": 7060 }, { "epoch": 0.11948319714727529, "grad_norm": 0.3310371935367584, "learning_rate": 5.973806506125899e-06, "loss": 0.0073, "step": 7070 }, { "epoch": 0.11965219742612575, "grad_norm": 0.1805610954761505, "learning_rate": 5.982256020278835e-06, "loss": 0.0067, "step": 7080 }, { "epoch": 0.1198211977049762, "grad_norm": 0.22287240624427795, "learning_rate": 5.990705534431771e-06, "loss": 0.0062, "step": 7090 }, { "epoch": 0.11999019798382668, "grad_norm": 0.22043073177337646, "learning_rate": 5.999155048584707e-06, "loss": 0.0054, "step": 7100 }, { "epoch": 0.12015919826267714, "grad_norm": 0.36443981528282166, "learning_rate": 6.007604562737643e-06, "loss": 0.0065, "step": 7110 }, { "epoch": 0.1203281985415276, "grad_norm": 0.22064130008220673, "learning_rate": 6.0160540768905795e-06, "loss": 0.0073, "step": 7120 }, { "epoch": 0.12049719882037806, "grad_norm": 0.48764801025390625, "learning_rate": 6.024503591043516e-06, "loss": 0.0122, "step": 7130 }, { "epoch": 0.12066619909922852, "grad_norm": 0.15102821588516235, "learning_rate": 6.032953105196452e-06, "loss": 0.0062, "step": 7140 }, { "epoch": 0.12083519937807898, "grad_norm": 0.22233891487121582, "learning_rate": 6.041402619349388e-06, "loss": 0.0058, "step": 7150 }, { "epoch": 0.12100419965692943, "grad_norm": 0.4343150854110718, "learning_rate": 6.049852133502324e-06, "loss": 0.0065, "step": 7160 }, { "epoch": 0.1211731999357799, "grad_norm": 0.28188592195510864, "learning_rate": 6.05830164765526e-06, "loss": 0.0102, "step": 7170 }, { "epoch": 0.12134220021463035, "grad_norm": 0.5624836683273315, "learning_rate": 6.066751161808197e-06, "loss": 0.0052, "step": 7180 }, { "epoch": 0.12151120049348081, "grad_norm": 0.3889232873916626, "learning_rate": 6.075200675961132e-06, "loss": 0.0056, "step": 7190 }, { "epoch": 0.12168020077233127, "grad_norm": 0.22911286354064941, "learning_rate": 6.083650190114069e-06, "loss": 0.0035, "step": 7200 }, { "epoch": 0.12184920105118173, "grad_norm": 0.42093756794929504, "learning_rate": 6.0920997042670045e-06, "loss": 0.0062, "step": 7210 }, { "epoch": 0.12201820133003219, "grad_norm": 0.39620158076286316, "learning_rate": 6.100549218419941e-06, "loss": 0.0049, "step": 7220 }, { "epoch": 0.12218720160888265, "grad_norm": 0.25324249267578125, "learning_rate": 6.1089987325728775e-06, "loss": 0.0085, "step": 7230 }, { "epoch": 0.12235620188773312, "grad_norm": 0.5439518094062805, "learning_rate": 6.117448246725813e-06, "loss": 0.0076, "step": 7240 }, { "epoch": 0.12252520216658358, "grad_norm": 0.2995526194572449, "learning_rate": 6.12589776087875e-06, "loss": 0.0091, "step": 7250 }, { "epoch": 0.12269420244543404, "grad_norm": 0.07601664960384369, "learning_rate": 6.134347275031685e-06, "loss": 0.0048, "step": 7260 }, { "epoch": 0.1228632027242845, "grad_norm": 0.196809783577919, "learning_rate": 6.142796789184622e-06, "loss": 0.0073, "step": 7270 }, { "epoch": 0.12303220300313496, "grad_norm": 0.1969756931066513, "learning_rate": 6.151246303337559e-06, "loss": 0.0057, "step": 7280 }, { "epoch": 0.12320120328198542, "grad_norm": 0.21454773843288422, "learning_rate": 6.159695817490496e-06, "loss": 0.0037, "step": 7290 }, { "epoch": 0.12337020356083588, "grad_norm": 0.21385742723941803, "learning_rate": 6.168145331643431e-06, "loss": 0.006, "step": 7300 }, { "epoch": 0.12353920383968633, "grad_norm": 0.2406219244003296, "learning_rate": 6.176594845796368e-06, "loss": 0.0084, "step": 7310 }, { "epoch": 0.1237082041185368, "grad_norm": 0.23890087008476257, "learning_rate": 6.1850443599493034e-06, "loss": 0.0097, "step": 7320 }, { "epoch": 0.12387720439738725, "grad_norm": 0.4540402293205261, "learning_rate": 6.19349387410224e-06, "loss": 0.0056, "step": 7330 }, { "epoch": 0.12404620467623771, "grad_norm": 0.2217201441526413, "learning_rate": 6.2019433882551764e-06, "loss": 0.0068, "step": 7340 }, { "epoch": 0.12421520495508817, "grad_norm": 0.4099099040031433, "learning_rate": 6.210392902408112e-06, "loss": 0.0056, "step": 7350 }, { "epoch": 0.12438420523393863, "grad_norm": 0.3341483175754547, "learning_rate": 6.2188424165610486e-06, "loss": 0.0051, "step": 7360 }, { "epoch": 0.1245532055127891, "grad_norm": 0.23542355000972748, "learning_rate": 6.227291930713984e-06, "loss": 0.0039, "step": 7370 }, { "epoch": 0.12472220579163956, "grad_norm": 0.24790506064891815, "learning_rate": 6.235741444866921e-06, "loss": 0.0063, "step": 7380 }, { "epoch": 0.12489120607049002, "grad_norm": 0.5249748826026917, "learning_rate": 6.244190959019857e-06, "loss": 0.0057, "step": 7390 }, { "epoch": 0.12506020634934048, "grad_norm": 0.3524332642555237, "learning_rate": 6.252640473172793e-06, "loss": 0.0049, "step": 7400 }, { "epoch": 0.12522920662819093, "grad_norm": 0.21495512127876282, "learning_rate": 6.261089987325729e-06, "loss": 0.0055, "step": 7410 }, { "epoch": 0.1253982069070414, "grad_norm": 0.17406326532363892, "learning_rate": 6.269539501478665e-06, "loss": 0.0059, "step": 7420 }, { "epoch": 0.12556720718589184, "grad_norm": 0.17095540463924408, "learning_rate": 6.2779890156316015e-06, "loss": 0.005, "step": 7430 }, { "epoch": 0.12573620746474232, "grad_norm": 0.12662187218666077, "learning_rate": 6.286438529784538e-06, "loss": 0.0072, "step": 7440 }, { "epoch": 0.1259052077435928, "grad_norm": 0.14037641882896423, "learning_rate": 6.294888043937474e-06, "loss": 0.005, "step": 7450 }, { "epoch": 0.12607420802244323, "grad_norm": 0.4467966854572296, "learning_rate": 6.30333755809041e-06, "loss": 0.0068, "step": 7460 }, { "epoch": 0.1262432083012937, "grad_norm": 0.29418110847473145, "learning_rate": 6.311787072243346e-06, "loss": 0.0072, "step": 7470 }, { "epoch": 0.12641220858014415, "grad_norm": 0.2855652868747711, "learning_rate": 6.320236586396282e-06, "loss": 0.0045, "step": 7480 }, { "epoch": 0.12658120885899463, "grad_norm": 0.249783456325531, "learning_rate": 6.328686100549219e-06, "loss": 0.0053, "step": 7490 }, { "epoch": 0.12675020913784507, "grad_norm": 0.1699150800704956, "learning_rate": 6.337135614702155e-06, "loss": 0.0074, "step": 7500 }, { "epoch": 0.12691920941669554, "grad_norm": 0.1926853507757187, "learning_rate": 6.345585128855092e-06, "loss": 0.0058, "step": 7510 }, { "epoch": 0.127088209695546, "grad_norm": 0.18150867521762848, "learning_rate": 6.354034643008028e-06, "loss": 0.0057, "step": 7520 }, { "epoch": 0.12725720997439646, "grad_norm": 0.23561467230319977, "learning_rate": 6.362484157160964e-06, "loss": 0.0057, "step": 7530 }, { "epoch": 0.1274262102532469, "grad_norm": 0.12343981117010117, "learning_rate": 6.3709336713139e-06, "loss": 0.006, "step": 7540 }, { "epoch": 0.12759521053209738, "grad_norm": 0.23419040441513062, "learning_rate": 6.379383185466836e-06, "loss": 0.0041, "step": 7550 }, { "epoch": 0.12776421081094783, "grad_norm": 0.2999023497104645, "learning_rate": 6.3878326996197725e-06, "loss": 0.0096, "step": 7560 }, { "epoch": 0.1279332110897983, "grad_norm": 0.513978123664856, "learning_rate": 6.396282213772709e-06, "loss": 0.0049, "step": 7570 }, { "epoch": 0.12810221136864877, "grad_norm": 0.49737265706062317, "learning_rate": 6.404731727925645e-06, "loss": 0.0061, "step": 7580 }, { "epoch": 0.12827121164749922, "grad_norm": 0.2800723612308502, "learning_rate": 6.413181242078581e-06, "loss": 0.0079, "step": 7590 }, { "epoch": 0.1284402119263497, "grad_norm": 0.2312825322151184, "learning_rate": 6.421630756231517e-06, "loss": 0.0068, "step": 7600 }, { "epoch": 0.12860921220520014, "grad_norm": 0.10857295989990234, "learning_rate": 6.430080270384453e-06, "loss": 0.0045, "step": 7610 }, { "epoch": 0.1287782124840506, "grad_norm": 0.2910762131214142, "learning_rate": 6.43852978453739e-06, "loss": 0.0064, "step": 7620 }, { "epoch": 0.12894721276290105, "grad_norm": 0.09099793434143066, "learning_rate": 6.4469792986903254e-06, "loss": 0.0046, "step": 7630 }, { "epoch": 0.12911621304175153, "grad_norm": 0.5014476180076599, "learning_rate": 6.455428812843262e-06, "loss": 0.0064, "step": 7640 }, { "epoch": 0.12928521332060197, "grad_norm": 0.4255957305431366, "learning_rate": 6.4638783269961976e-06, "loss": 0.0084, "step": 7650 }, { "epoch": 0.12945421359945244, "grad_norm": 0.4127286672592163, "learning_rate": 6.472327841149134e-06, "loss": 0.0055, "step": 7660 }, { "epoch": 0.1296232138783029, "grad_norm": 0.3005797266960144, "learning_rate": 6.4807773553020706e-06, "loss": 0.0066, "step": 7670 }, { "epoch": 0.12979221415715336, "grad_norm": 0.24604128301143646, "learning_rate": 6.489226869455006e-06, "loss": 0.0062, "step": 7680 }, { "epoch": 0.1299612144360038, "grad_norm": 0.11351402848958969, "learning_rate": 6.497676383607943e-06, "loss": 0.0068, "step": 7690 }, { "epoch": 0.13013021471485428, "grad_norm": 0.31498342752456665, "learning_rate": 6.506125897760879e-06, "loss": 0.0052, "step": 7700 }, { "epoch": 0.13029921499370473, "grad_norm": 0.17923851311206818, "learning_rate": 6.514575411913816e-06, "loss": 0.0085, "step": 7710 }, { "epoch": 0.1304682152725552, "grad_norm": 0.1601186990737915, "learning_rate": 6.523024926066752e-06, "loss": 0.0044, "step": 7720 }, { "epoch": 0.13063721555140567, "grad_norm": 0.3751295506954193, "learning_rate": 6.531474440219689e-06, "loss": 0.0076, "step": 7730 }, { "epoch": 0.13080621583025612, "grad_norm": 0.33075717091560364, "learning_rate": 6.539923954372624e-06, "loss": 0.0067, "step": 7740 }, { "epoch": 0.1309752161091066, "grad_norm": 0.12268586456775665, "learning_rate": 6.548373468525561e-06, "loss": 0.0061, "step": 7750 }, { "epoch": 0.13114421638795704, "grad_norm": 0.4134727418422699, "learning_rate": 6.5568229826784965e-06, "loss": 0.0084, "step": 7760 }, { "epoch": 0.1313132166668075, "grad_norm": 0.030967149883508682, "learning_rate": 6.565272496831433e-06, "loss": 0.0048, "step": 7770 }, { "epoch": 0.13148221694565795, "grad_norm": 0.10871484875679016, "learning_rate": 6.5737220109843695e-06, "loss": 0.007, "step": 7780 }, { "epoch": 0.13165121722450843, "grad_norm": 0.3758116364479065, "learning_rate": 6.582171525137305e-06, "loss": 0.0055, "step": 7790 }, { "epoch": 0.13182021750335887, "grad_norm": 0.90146404504776, "learning_rate": 6.590621039290242e-06, "loss": 0.0051, "step": 7800 }, { "epoch": 0.13198921778220934, "grad_norm": 0.4192593991756439, "learning_rate": 6.599070553443177e-06, "loss": 0.0123, "step": 7810 }, { "epoch": 0.1321582180610598, "grad_norm": 0.4820626378059387, "learning_rate": 6.607520067596114e-06, "loss": 0.0082, "step": 7820 }, { "epoch": 0.13232721833991026, "grad_norm": 0.0934116318821907, "learning_rate": 6.61596958174905e-06, "loss": 0.0058, "step": 7830 }, { "epoch": 0.1324962186187607, "grad_norm": 0.46475282311439514, "learning_rate": 6.624419095901986e-06, "loss": 0.0053, "step": 7840 }, { "epoch": 0.13266521889761118, "grad_norm": 0.15889766812324524, "learning_rate": 6.632868610054922e-06, "loss": 0.0036, "step": 7850 }, { "epoch": 0.13283421917646165, "grad_norm": 0.1389142870903015, "learning_rate": 6.641318124207858e-06, "loss": 0.0069, "step": 7860 }, { "epoch": 0.1330032194553121, "grad_norm": 0.31071558594703674, "learning_rate": 6.6497676383607945e-06, "loss": 0.0075, "step": 7870 }, { "epoch": 0.13317221973416257, "grad_norm": 0.2616838812828064, "learning_rate": 6.658217152513731e-06, "loss": 0.006, "step": 7880 }, { "epoch": 0.13334122001301302, "grad_norm": 0.4653517007827759, "learning_rate": 6.666666666666667e-06, "loss": 0.0053, "step": 7890 }, { "epoch": 0.1335102202918635, "grad_norm": 0.33546552062034607, "learning_rate": 6.675116180819603e-06, "loss": 0.0059, "step": 7900 }, { "epoch": 0.13367922057071394, "grad_norm": 0.21979467570781708, "learning_rate": 6.683565694972539e-06, "loss": 0.0053, "step": 7910 }, { "epoch": 0.1338482208495644, "grad_norm": 0.24753090739250183, "learning_rate": 6.692015209125475e-06, "loss": 0.0055, "step": 7920 }, { "epoch": 0.13401722112841485, "grad_norm": 0.4232696294784546, "learning_rate": 6.700464723278413e-06, "loss": 0.0077, "step": 7930 }, { "epoch": 0.13418622140726533, "grad_norm": 0.14672839641571045, "learning_rate": 6.708914237431349e-06, "loss": 0.0058, "step": 7940 }, { "epoch": 0.13435522168611577, "grad_norm": 0.1414640247821808, "learning_rate": 6.717363751584285e-06, "loss": 0.0064, "step": 7950 }, { "epoch": 0.13452422196496625, "grad_norm": 0.17650949954986572, "learning_rate": 6.725813265737221e-06, "loss": 0.0055, "step": 7960 }, { "epoch": 0.1346932222438167, "grad_norm": 0.14567339420318604, "learning_rate": 6.734262779890157e-06, "loss": 0.0063, "step": 7970 }, { "epoch": 0.13486222252266716, "grad_norm": 0.24944984912872314, "learning_rate": 6.742712294043093e-06, "loss": 0.0073, "step": 7980 }, { "epoch": 0.13503122280151764, "grad_norm": 0.5359504818916321, "learning_rate": 6.75116180819603e-06, "loss": 0.0056, "step": 7990 }, { "epoch": 0.13520022308036808, "grad_norm": 0.43457257747650146, "learning_rate": 6.7596113223489655e-06, "loss": 0.0068, "step": 8000 }, { "epoch": 0.13536922335921855, "grad_norm": 0.38084739446640015, "learning_rate": 6.768060836501902e-06, "loss": 0.0065, "step": 8010 }, { "epoch": 0.135538223638069, "grad_norm": 6.1073760986328125, "learning_rate": 6.776510350654838e-06, "loss": 0.0073, "step": 8020 }, { "epoch": 0.13570722391691947, "grad_norm": 0.7895514369010925, "learning_rate": 6.784959864807774e-06, "loss": 0.0088, "step": 8030 }, { "epoch": 0.13587622419576992, "grad_norm": 0.2781662046909332, "learning_rate": 6.793409378960711e-06, "loss": 0.0047, "step": 8040 }, { "epoch": 0.1360452244746204, "grad_norm": 0.16183938086032867, "learning_rate": 6.801858893113646e-06, "loss": 0.0062, "step": 8050 }, { "epoch": 0.13621422475347084, "grad_norm": 0.2602287828922272, "learning_rate": 6.810308407266583e-06, "loss": 0.0055, "step": 8060 }, { "epoch": 0.1363832250323213, "grad_norm": 0.1746702939271927, "learning_rate": 6.8187579214195185e-06, "loss": 0.007, "step": 8070 }, { "epoch": 0.13655222531117175, "grad_norm": 0.26238518953323364, "learning_rate": 6.827207435572455e-06, "loss": 0.0073, "step": 8080 }, { "epoch": 0.13672122559002223, "grad_norm": 0.22383089363574982, "learning_rate": 6.8356569497253914e-06, "loss": 0.0064, "step": 8090 }, { "epoch": 0.13689022586887267, "grad_norm": 0.33924341201782227, "learning_rate": 6.844106463878327e-06, "loss": 0.0068, "step": 8100 }, { "epoch": 0.13705922614772315, "grad_norm": 0.25141218304634094, "learning_rate": 6.852555978031264e-06, "loss": 0.0041, "step": 8110 }, { "epoch": 0.1372282264265736, "grad_norm": 0.15798458456993103, "learning_rate": 6.861005492184199e-06, "loss": 0.006, "step": 8120 }, { "epoch": 0.13739722670542406, "grad_norm": 0.6393792629241943, "learning_rate": 6.869455006337136e-06, "loss": 0.0043, "step": 8130 }, { "epoch": 0.13756622698427454, "grad_norm": 0.12887583673000336, "learning_rate": 6.877904520490072e-06, "loss": 0.0069, "step": 8140 }, { "epoch": 0.13773522726312498, "grad_norm": 0.3371468484401703, "learning_rate": 6.886354034643009e-06, "loss": 0.0036, "step": 8150 }, { "epoch": 0.13790422754197545, "grad_norm": 1.44239342212677, "learning_rate": 6.894803548795945e-06, "loss": 0.0063, "step": 8160 }, { "epoch": 0.1380732278208259, "grad_norm": 0.2369224727153778, "learning_rate": 6.903253062948882e-06, "loss": 0.0062, "step": 8170 }, { "epoch": 0.13824222809967637, "grad_norm": 0.30964913964271545, "learning_rate": 6.911702577101817e-06, "loss": 0.0074, "step": 8180 }, { "epoch": 0.13841122837852682, "grad_norm": 0.27758997678756714, "learning_rate": 6.920152091254754e-06, "loss": 0.0073, "step": 8190 }, { "epoch": 0.1385802286573773, "grad_norm": 0.295818030834198, "learning_rate": 6.9286016054076895e-06, "loss": 0.0076, "step": 8200 }, { "epoch": 0.13874922893622774, "grad_norm": 0.2563367784023285, "learning_rate": 6.937051119560626e-06, "loss": 0.0062, "step": 8210 }, { "epoch": 0.1389182292150782, "grad_norm": 0.406826913356781, "learning_rate": 6.9455006337135625e-06, "loss": 0.0056, "step": 8220 }, { "epoch": 0.13908722949392865, "grad_norm": 0.34856709837913513, "learning_rate": 6.953950147866498e-06, "loss": 0.0057, "step": 8230 }, { "epoch": 0.13925622977277913, "grad_norm": 0.04294797405600548, "learning_rate": 6.962399662019435e-06, "loss": 0.0047, "step": 8240 }, { "epoch": 0.13942523005162957, "grad_norm": 0.28117257356643677, "learning_rate": 6.97084917617237e-06, "loss": 0.0088, "step": 8250 }, { "epoch": 0.13959423033048005, "grad_norm": 0.15075592696666718, "learning_rate": 6.979298690325307e-06, "loss": 0.0051, "step": 8260 }, { "epoch": 0.13976323060933052, "grad_norm": 0.2688157558441162, "learning_rate": 6.987748204478243e-06, "loss": 0.0093, "step": 8270 }, { "epoch": 0.13993223088818096, "grad_norm": 0.277136892080307, "learning_rate": 6.996197718631179e-06, "loss": 0.0041, "step": 8280 }, { "epoch": 0.14010123116703144, "grad_norm": 0.15421010553836823, "learning_rate": 7.004647232784115e-06, "loss": 0.0052, "step": 8290 }, { "epoch": 0.14027023144588188, "grad_norm": 0.20295974612236023, "learning_rate": 7.013096746937051e-06, "loss": 0.006, "step": 8300 }, { "epoch": 0.14043923172473236, "grad_norm": 0.24425384402275085, "learning_rate": 7.0215462610899875e-06, "loss": 0.0055, "step": 8310 }, { "epoch": 0.1406082320035828, "grad_norm": 0.20600204169750214, "learning_rate": 7.029995775242924e-06, "loss": 0.0041, "step": 8320 }, { "epoch": 0.14077723228243327, "grad_norm": 0.3156473636627197, "learning_rate": 7.03844528939586e-06, "loss": 0.0092, "step": 8330 }, { "epoch": 0.14094623256128372, "grad_norm": 0.09433050453662872, "learning_rate": 7.046894803548796e-06, "loss": 0.0041, "step": 8340 }, { "epoch": 0.1411152328401342, "grad_norm": 0.14726519584655762, "learning_rate": 7.055344317701732e-06, "loss": 0.0072, "step": 8350 }, { "epoch": 0.14128423311898464, "grad_norm": 0.40286463499069214, "learning_rate": 7.063793831854668e-06, "loss": 0.0052, "step": 8360 }, { "epoch": 0.1414532333978351, "grad_norm": 0.3082537055015564, "learning_rate": 7.072243346007606e-06, "loss": 0.0037, "step": 8370 }, { "epoch": 0.14162223367668555, "grad_norm": 0.1920309066772461, "learning_rate": 7.080692860160542e-06, "loss": 0.008, "step": 8380 }, { "epoch": 0.14179123395553603, "grad_norm": 0.30830442905426025, "learning_rate": 7.089142374313478e-06, "loss": 0.0067, "step": 8390 }, { "epoch": 0.14196023423438647, "grad_norm": 0.23955096304416656, "learning_rate": 7.097591888466414e-06, "loss": 0.0042, "step": 8400 }, { "epoch": 0.14212923451323695, "grad_norm": 0.44451770186424255, "learning_rate": 7.10604140261935e-06, "loss": 0.0055, "step": 8410 }, { "epoch": 0.14229823479208742, "grad_norm": 0.15314729511737823, "learning_rate": 7.114490916772286e-06, "loss": 0.0032, "step": 8420 }, { "epoch": 0.14246723507093786, "grad_norm": 0.27596771717071533, "learning_rate": 7.122940430925223e-06, "loss": 0.0051, "step": 8430 }, { "epoch": 0.14263623534978834, "grad_norm": 0.11215230822563171, "learning_rate": 7.1313899450781586e-06, "loss": 0.0043, "step": 8440 }, { "epoch": 0.14280523562863878, "grad_norm": 0.35231873393058777, "learning_rate": 7.139839459231095e-06, "loss": 0.0051, "step": 8450 }, { "epoch": 0.14297423590748926, "grad_norm": 0.26749947667121887, "learning_rate": 7.148288973384031e-06, "loss": 0.0057, "step": 8460 }, { "epoch": 0.1431432361863397, "grad_norm": 0.1803816854953766, "learning_rate": 7.156738487536967e-06, "loss": 0.0042, "step": 8470 }, { "epoch": 0.14331223646519017, "grad_norm": 0.264035701751709, "learning_rate": 7.165188001689904e-06, "loss": 0.0067, "step": 8480 }, { "epoch": 0.14348123674404062, "grad_norm": 0.22792071104049683, "learning_rate": 7.173637515842839e-06, "loss": 0.0047, "step": 8490 }, { "epoch": 0.1436502370228911, "grad_norm": 0.0940081998705864, "learning_rate": 7.182087029995776e-06, "loss": 0.006, "step": 8500 }, { "epoch": 0.14381923730174154, "grad_norm": 0.33512696623802185, "learning_rate": 7.1905365441487115e-06, "loss": 0.0049, "step": 8510 }, { "epoch": 0.143988237580592, "grad_norm": 0.1594853699207306, "learning_rate": 7.198986058301648e-06, "loss": 0.0035, "step": 8520 }, { "epoch": 0.14415723785944246, "grad_norm": 0.33963024616241455, "learning_rate": 7.2074355724545845e-06, "loss": 0.0043, "step": 8530 }, { "epoch": 0.14432623813829293, "grad_norm": 2.5675406455993652, "learning_rate": 7.21588508660752e-06, "loss": 0.0066, "step": 8540 }, { "epoch": 0.1444952384171434, "grad_norm": 0.38166311383247375, "learning_rate": 7.224334600760457e-06, "loss": 0.0055, "step": 8550 }, { "epoch": 0.14466423869599385, "grad_norm": 0.15342292189598083, "learning_rate": 7.232784114913392e-06, "loss": 0.0049, "step": 8560 }, { "epoch": 0.14483323897484432, "grad_norm": 0.20990873873233795, "learning_rate": 7.241233629066329e-06, "loss": 0.0051, "step": 8570 }, { "epoch": 0.14500223925369476, "grad_norm": 0.17052221298217773, "learning_rate": 7.249683143219266e-06, "loss": 0.0049, "step": 8580 }, { "epoch": 0.14517123953254524, "grad_norm": 0.4310913681983948, "learning_rate": 7.258132657372203e-06, "loss": 0.0063, "step": 8590 }, { "epoch": 0.14534023981139568, "grad_norm": 0.25773683190345764, "learning_rate": 7.266582171525138e-06, "loss": 0.0108, "step": 8600 }, { "epoch": 0.14550924009024616, "grad_norm": 0.20230819284915924, "learning_rate": 7.275031685678075e-06, "loss": 0.008, "step": 8610 }, { "epoch": 0.1456782403690966, "grad_norm": 0.21330519020557404, "learning_rate": 7.28348119983101e-06, "loss": 0.0057, "step": 8620 }, { "epoch": 0.14584724064794707, "grad_norm": 0.22225132584571838, "learning_rate": 7.291930713983947e-06, "loss": 0.0047, "step": 8630 }, { "epoch": 0.14601624092679752, "grad_norm": 0.23057548701763153, "learning_rate": 7.300380228136883e-06, "loss": 0.0075, "step": 8640 }, { "epoch": 0.146185241205648, "grad_norm": 0.10859983414411545, "learning_rate": 7.308829742289819e-06, "loss": 0.0053, "step": 8650 }, { "epoch": 0.14635424148449844, "grad_norm": 0.3728826642036438, "learning_rate": 7.3172792564427555e-06, "loss": 0.0076, "step": 8660 }, { "epoch": 0.1465232417633489, "grad_norm": 0.24150754511356354, "learning_rate": 7.325728770595691e-06, "loss": 0.0065, "step": 8670 }, { "epoch": 0.14669224204219938, "grad_norm": 0.2621629536151886, "learning_rate": 7.334178284748628e-06, "loss": 0.0038, "step": 8680 }, { "epoch": 0.14686124232104983, "grad_norm": 0.1941870152950287, "learning_rate": 7.342627798901564e-06, "loss": 0.0056, "step": 8690 }, { "epoch": 0.1470302425999003, "grad_norm": 0.3373866677284241, "learning_rate": 7.3510773130545e-06, "loss": 0.007, "step": 8700 }, { "epoch": 0.14719924287875075, "grad_norm": 0.27150392532348633, "learning_rate": 7.359526827207436e-06, "loss": 0.0042, "step": 8710 }, { "epoch": 0.14736824315760122, "grad_norm": 0.271064430475235, "learning_rate": 7.367976341360372e-06, "loss": 0.0072, "step": 8720 }, { "epoch": 0.14753724343645166, "grad_norm": 0.11885469406843185, "learning_rate": 7.376425855513308e-06, "loss": 0.0064, "step": 8730 }, { "epoch": 0.14770624371530214, "grad_norm": 0.36805516481399536, "learning_rate": 7.384875369666245e-06, "loss": 0.0046, "step": 8740 }, { "epoch": 0.14787524399415258, "grad_norm": 0.3535545766353607, "learning_rate": 7.3933248838191806e-06, "loss": 0.0052, "step": 8750 }, { "epoch": 0.14804424427300306, "grad_norm": 0.4129721224308014, "learning_rate": 7.401774397972117e-06, "loss": 0.0063, "step": 8760 }, { "epoch": 0.1482132445518535, "grad_norm": 0.24997562170028687, "learning_rate": 7.410223912125053e-06, "loss": 0.0063, "step": 8770 }, { "epoch": 0.14838224483070397, "grad_norm": 0.28022584319114685, "learning_rate": 7.418673426277989e-06, "loss": 0.0059, "step": 8780 }, { "epoch": 0.14855124510955442, "grad_norm": 0.2782224118709564, "learning_rate": 7.427122940430926e-06, "loss": 0.0051, "step": 8790 }, { "epoch": 0.1487202453884049, "grad_norm": 0.17080119252204895, "learning_rate": 7.435572454583862e-06, "loss": 0.0047, "step": 8800 }, { "epoch": 0.14888924566725534, "grad_norm": 0.2720976769924164, "learning_rate": 7.444021968736799e-06, "loss": 0.0042, "step": 8810 }, { "epoch": 0.1490582459461058, "grad_norm": 0.1649436503648758, "learning_rate": 7.452471482889735e-06, "loss": 0.0037, "step": 8820 }, { "epoch": 0.14922724622495628, "grad_norm": 0.1506228744983673, "learning_rate": 7.460920997042671e-06, "loss": 0.0087, "step": 8830 }, { "epoch": 0.14939624650380673, "grad_norm": 0.21330970525741577, "learning_rate": 7.469370511195607e-06, "loss": 0.0055, "step": 8840 }, { "epoch": 0.1495652467826572, "grad_norm": 0.12506476044654846, "learning_rate": 7.477820025348543e-06, "loss": 0.0058, "step": 8850 }, { "epoch": 0.14973424706150765, "grad_norm": 0.1709899604320526, "learning_rate": 7.4862695395014794e-06, "loss": 0.0043, "step": 8860 }, { "epoch": 0.14990324734035812, "grad_norm": 0.16848011314868927, "learning_rate": 7.494719053654416e-06, "loss": 0.0042, "step": 8870 }, { "epoch": 0.15007224761920857, "grad_norm": 0.12850339710712433, "learning_rate": 7.503168567807352e-06, "loss": 0.0056, "step": 8880 }, { "epoch": 0.15024124789805904, "grad_norm": 0.3136073648929596, "learning_rate": 7.511618081960288e-06, "loss": 0.0029, "step": 8890 }, { "epoch": 0.15041024817690948, "grad_norm": 0.15144753456115723, "learning_rate": 7.520067596113224e-06, "loss": 0.0045, "step": 8900 }, { "epoch": 0.15057924845575996, "grad_norm": 0.4413564205169678, "learning_rate": 7.52851711026616e-06, "loss": 0.0054, "step": 8910 }, { "epoch": 0.1507482487346104, "grad_norm": 0.5044617056846619, "learning_rate": 7.536966624419097e-06, "loss": 0.008, "step": 8920 }, { "epoch": 0.15091724901346087, "grad_norm": 0.16861020028591156, "learning_rate": 7.545416138572032e-06, "loss": 0.004, "step": 8930 }, { "epoch": 0.15108624929231132, "grad_norm": 0.2561737298965454, "learning_rate": 7.553865652724969e-06, "loss": 0.0082, "step": 8940 }, { "epoch": 0.1512552495711618, "grad_norm": 0.12116044759750366, "learning_rate": 7.5623151668779045e-06, "loss": 0.005, "step": 8950 }, { "epoch": 0.15142424985001227, "grad_norm": 0.16940058767795563, "learning_rate": 7.570764681030841e-06, "loss": 0.0058, "step": 8960 }, { "epoch": 0.1515932501288627, "grad_norm": 0.18350815773010254, "learning_rate": 7.5792141951837775e-06, "loss": 0.0041, "step": 8970 }, { "epoch": 0.15176225040771318, "grad_norm": 0.1131502166390419, "learning_rate": 7.587663709336713e-06, "loss": 0.0044, "step": 8980 }, { "epoch": 0.15193125068656363, "grad_norm": 0.15428772568702698, "learning_rate": 7.59611322348965e-06, "loss": 0.0053, "step": 8990 }, { "epoch": 0.1521002509654141, "grad_norm": 0.19609490036964417, "learning_rate": 7.604562737642585e-06, "loss": 0.006, "step": 9000 }, { "epoch": 0.15226925124426455, "grad_norm": 0.2686976194381714, "learning_rate": 7.613012251795522e-06, "loss": 0.0036, "step": 9010 }, { "epoch": 0.15243825152311502, "grad_norm": 0.11909843981266022, "learning_rate": 7.621461765948459e-06, "loss": 0.0039, "step": 9020 }, { "epoch": 0.15260725180196547, "grad_norm": 0.26848167181015015, "learning_rate": 7.629911280101395e-06, "loss": 0.0048, "step": 9030 }, { "epoch": 0.15277625208081594, "grad_norm": 0.23602674901485443, "learning_rate": 7.638360794254331e-06, "loss": 0.0063, "step": 9040 }, { "epoch": 0.15294525235966638, "grad_norm": 0.47805437445640564, "learning_rate": 7.646810308407268e-06, "loss": 0.0057, "step": 9050 }, { "epoch": 0.15311425263851686, "grad_norm": 0.3113015294075012, "learning_rate": 7.655259822560204e-06, "loss": 0.0066, "step": 9060 }, { "epoch": 0.1532832529173673, "grad_norm": 0.1774858981370926, "learning_rate": 7.663709336713139e-06, "loss": 0.0048, "step": 9070 }, { "epoch": 0.15345225319621777, "grad_norm": 0.1720050722360611, "learning_rate": 7.672158850866076e-06, "loss": 0.0055, "step": 9080 }, { "epoch": 0.15362125347506822, "grad_norm": 0.2195959836244583, "learning_rate": 7.680608365019012e-06, "loss": 0.0076, "step": 9090 }, { "epoch": 0.1537902537539187, "grad_norm": 0.209056556224823, "learning_rate": 7.689057879171949e-06, "loss": 0.0069, "step": 9100 }, { "epoch": 0.15395925403276917, "grad_norm": 0.31863802671432495, "learning_rate": 7.697507393324885e-06, "loss": 0.0036, "step": 9110 }, { "epoch": 0.1541282543116196, "grad_norm": 0.1689879596233368, "learning_rate": 7.70595690747782e-06, "loss": 0.0035, "step": 9120 }, { "epoch": 0.15429725459047008, "grad_norm": 0.3629443049430847, "learning_rate": 7.714406421630756e-06, "loss": 0.0052, "step": 9130 }, { "epoch": 0.15446625486932053, "grad_norm": 0.2640196681022644, "learning_rate": 7.722855935783693e-06, "loss": 0.0094, "step": 9140 }, { "epoch": 0.154635255148171, "grad_norm": 0.06258325278759003, "learning_rate": 7.73130544993663e-06, "loss": 0.0059, "step": 9150 }, { "epoch": 0.15480425542702145, "grad_norm": 0.02285955473780632, "learning_rate": 7.739754964089566e-06, "loss": 0.0054, "step": 9160 }, { "epoch": 0.15497325570587192, "grad_norm": 0.1107051745057106, "learning_rate": 7.7482044782425e-06, "loss": 0.0037, "step": 9170 }, { "epoch": 0.15514225598472237, "grad_norm": 0.24938778579235077, "learning_rate": 7.756653992395437e-06, "loss": 0.0074, "step": 9180 }, { "epoch": 0.15531125626357284, "grad_norm": 0.18850697576999664, "learning_rate": 7.765103506548374e-06, "loss": 0.0047, "step": 9190 }, { "epoch": 0.15548025654242328, "grad_norm": 0.2750691771507263, "learning_rate": 7.77355302070131e-06, "loss": 0.0075, "step": 9200 }, { "epoch": 0.15564925682127376, "grad_norm": 0.13034598529338837, "learning_rate": 7.782002534854247e-06, "loss": 0.0044, "step": 9210 }, { "epoch": 0.1558182571001242, "grad_norm": 0.21011145412921906, "learning_rate": 7.790452049007183e-06, "loss": 0.0053, "step": 9220 }, { "epoch": 0.15598725737897468, "grad_norm": 0.30511417984962463, "learning_rate": 7.79890156316012e-06, "loss": 0.0055, "step": 9230 }, { "epoch": 0.15615625765782515, "grad_norm": 0.17314879596233368, "learning_rate": 7.807351077313056e-06, "loss": 0.0039, "step": 9240 }, { "epoch": 0.1563252579366756, "grad_norm": 0.2600672245025635, "learning_rate": 7.815800591465993e-06, "loss": 0.0065, "step": 9250 }, { "epoch": 0.15649425821552607, "grad_norm": 0.30571845173835754, "learning_rate": 7.824250105618927e-06, "loss": 0.0046, "step": 9260 }, { "epoch": 0.1566632584943765, "grad_norm": 0.2151174694299698, "learning_rate": 7.832699619771864e-06, "loss": 0.0062, "step": 9270 }, { "epoch": 0.15683225877322698, "grad_norm": 0.14800691604614258, "learning_rate": 7.8411491339248e-06, "loss": 0.0058, "step": 9280 }, { "epoch": 0.15700125905207743, "grad_norm": 0.2288718819618225, "learning_rate": 7.849598648077737e-06, "loss": 0.0062, "step": 9290 }, { "epoch": 0.1571702593309279, "grad_norm": 0.24087797105312347, "learning_rate": 7.858048162230673e-06, "loss": 0.0059, "step": 9300 }, { "epoch": 0.15733925960977835, "grad_norm": 0.2878643274307251, "learning_rate": 7.866497676383608e-06, "loss": 0.006, "step": 9310 }, { "epoch": 0.15750825988862882, "grad_norm": 0.14686961472034454, "learning_rate": 7.874947190536545e-06, "loss": 0.0054, "step": 9320 }, { "epoch": 0.15767726016747927, "grad_norm": 0.29242226481437683, "learning_rate": 7.883396704689481e-06, "loss": 0.0059, "step": 9330 }, { "epoch": 0.15784626044632974, "grad_norm": 0.32249775528907776, "learning_rate": 7.891846218842418e-06, "loss": 0.005, "step": 9340 }, { "epoch": 0.15801526072518018, "grad_norm": 0.6770492196083069, "learning_rate": 7.900295732995354e-06, "loss": 0.0064, "step": 9350 }, { "epoch": 0.15818426100403066, "grad_norm": 0.2366202175617218, "learning_rate": 7.908745247148289e-06, "loss": 0.0077, "step": 9360 }, { "epoch": 0.15835326128288113, "grad_norm": 0.36044564843177795, "learning_rate": 7.917194761301225e-06, "loss": 0.0071, "step": 9370 }, { "epoch": 0.15852226156173158, "grad_norm": 0.37434545159339905, "learning_rate": 7.925644275454162e-06, "loss": 0.0056, "step": 9380 }, { "epoch": 0.15869126184058205, "grad_norm": 0.4788360893726349, "learning_rate": 7.934093789607098e-06, "loss": 0.006, "step": 9390 }, { "epoch": 0.1588602621194325, "grad_norm": 0.28214511275291443, "learning_rate": 7.942543303760035e-06, "loss": 0.004, "step": 9400 }, { "epoch": 0.15902926239828297, "grad_norm": 0.11522787064313889, "learning_rate": 7.95099281791297e-06, "loss": 0.0041, "step": 9410 }, { "epoch": 0.1591982626771334, "grad_norm": 0.33369603753089905, "learning_rate": 7.959442332065906e-06, "loss": 0.0064, "step": 9420 }, { "epoch": 0.15936726295598388, "grad_norm": 0.15886670351028442, "learning_rate": 7.967891846218843e-06, "loss": 0.0045, "step": 9430 }, { "epoch": 0.15953626323483433, "grad_norm": 0.378985732793808, "learning_rate": 7.976341360371779e-06, "loss": 0.0095, "step": 9440 }, { "epoch": 0.1597052635136848, "grad_norm": 0.7028583884239197, "learning_rate": 7.984790874524716e-06, "loss": 0.0075, "step": 9450 }, { "epoch": 0.15987426379253525, "grad_norm": 0.5431110262870789, "learning_rate": 7.993240388677652e-06, "loss": 0.0071, "step": 9460 }, { "epoch": 0.16004326407138572, "grad_norm": 0.21072594821453094, "learning_rate": 8.001689902830589e-06, "loss": 0.0059, "step": 9470 }, { "epoch": 0.16021226435023617, "grad_norm": 0.2906375527381897, "learning_rate": 8.010139416983525e-06, "loss": 0.0061, "step": 9480 }, { "epoch": 0.16038126462908664, "grad_norm": 0.5510354042053223, "learning_rate": 8.01858893113646e-06, "loss": 0.0108, "step": 9490 }, { "epoch": 0.16055026490793708, "grad_norm": 0.0482487753033638, "learning_rate": 8.027038445289396e-06, "loss": 0.0063, "step": 9500 }, { "epoch": 0.16071926518678756, "grad_norm": 0.34865421056747437, "learning_rate": 8.035487959442333e-06, "loss": 0.0073, "step": 9510 }, { "epoch": 0.16088826546563803, "grad_norm": 0.1495426744222641, "learning_rate": 8.04393747359527e-06, "loss": 0.0062, "step": 9520 }, { "epoch": 0.16105726574448848, "grad_norm": 0.3056929409503937, "learning_rate": 8.052386987748206e-06, "loss": 0.0077, "step": 9530 }, { "epoch": 0.16122626602333895, "grad_norm": 0.13944317400455475, "learning_rate": 8.06083650190114e-06, "loss": 0.0044, "step": 9540 }, { "epoch": 0.1613952663021894, "grad_norm": 0.2999503016471863, "learning_rate": 8.069286016054077e-06, "loss": 0.0073, "step": 9550 }, { "epoch": 0.16156426658103987, "grad_norm": 0.22944676876068115, "learning_rate": 8.077735530207014e-06, "loss": 0.0043, "step": 9560 }, { "epoch": 0.1617332668598903, "grad_norm": 0.18615588545799255, "learning_rate": 8.08618504435995e-06, "loss": 0.0032, "step": 9570 }, { "epoch": 0.16190226713874079, "grad_norm": 0.10034358501434326, "learning_rate": 8.094634558512887e-06, "loss": 0.0052, "step": 9580 }, { "epoch": 0.16207126741759123, "grad_norm": 0.19554342329502106, "learning_rate": 8.103084072665821e-06, "loss": 0.0052, "step": 9590 }, { "epoch": 0.1622402676964417, "grad_norm": 0.18776758015155792, "learning_rate": 8.111533586818758e-06, "loss": 0.0092, "step": 9600 }, { "epoch": 0.16240926797529215, "grad_norm": 0.16730335354804993, "learning_rate": 8.119983100971694e-06, "loss": 0.0082, "step": 9610 }, { "epoch": 0.16257826825414262, "grad_norm": 0.2268962264060974, "learning_rate": 8.128432615124631e-06, "loss": 0.0031, "step": 9620 }, { "epoch": 0.16274726853299307, "grad_norm": 0.2297000288963318, "learning_rate": 8.136882129277567e-06, "loss": 0.0051, "step": 9630 }, { "epoch": 0.16291626881184354, "grad_norm": 0.29321032762527466, "learning_rate": 8.145331643430502e-06, "loss": 0.0077, "step": 9640 }, { "epoch": 0.163085269090694, "grad_norm": 0.10185467451810837, "learning_rate": 8.153781157583439e-06, "loss": 0.0075, "step": 9650 }, { "epoch": 0.16325426936954446, "grad_norm": 0.23513555526733398, "learning_rate": 8.162230671736375e-06, "loss": 0.0052, "step": 9660 }, { "epoch": 0.16342326964839493, "grad_norm": 0.29854118824005127, "learning_rate": 8.170680185889312e-06, "loss": 0.0057, "step": 9670 }, { "epoch": 0.16359226992724538, "grad_norm": 0.11316808313131332, "learning_rate": 8.179129700042248e-06, "loss": 0.0039, "step": 9680 }, { "epoch": 0.16376127020609585, "grad_norm": 0.11226405948400497, "learning_rate": 8.187579214195185e-06, "loss": 0.0053, "step": 9690 }, { "epoch": 0.1639302704849463, "grad_norm": 0.5727311372756958, "learning_rate": 8.196028728348121e-06, "loss": 0.006, "step": 9700 }, { "epoch": 0.16409927076379677, "grad_norm": 0.09775212407112122, "learning_rate": 8.204478242501058e-06, "loss": 0.0033, "step": 9710 }, { "epoch": 0.1642682710426472, "grad_norm": 0.11045091599225998, "learning_rate": 8.212927756653993e-06, "loss": 0.0039, "step": 9720 }, { "epoch": 0.16443727132149769, "grad_norm": 0.38140159845352173, "learning_rate": 8.221377270806929e-06, "loss": 0.0075, "step": 9730 }, { "epoch": 0.16460627160034813, "grad_norm": 0.2585310935974121, "learning_rate": 8.229826784959865e-06, "loss": 0.0073, "step": 9740 }, { "epoch": 0.1647752718791986, "grad_norm": 0.7000439167022705, "learning_rate": 8.238276299112802e-06, "loss": 0.0055, "step": 9750 }, { "epoch": 0.16494427215804905, "grad_norm": 0.12472444027662277, "learning_rate": 8.246725813265738e-06, "loss": 0.0044, "step": 9760 }, { "epoch": 0.16511327243689952, "grad_norm": 0.09951147437095642, "learning_rate": 8.255175327418673e-06, "loss": 0.0049, "step": 9770 }, { "epoch": 0.16528227271574997, "grad_norm": 0.23709943890571594, "learning_rate": 8.26362484157161e-06, "loss": 0.0069, "step": 9780 }, { "epoch": 0.16545127299460044, "grad_norm": 0.16604699194431305, "learning_rate": 8.272074355724546e-06, "loss": 0.005, "step": 9790 }, { "epoch": 0.1656202732734509, "grad_norm": 0.4178698658943176, "learning_rate": 8.280523869877483e-06, "loss": 0.0061, "step": 9800 }, { "epoch": 0.16578927355230136, "grad_norm": 1.2892835140228271, "learning_rate": 8.28897338403042e-06, "loss": 0.0044, "step": 9810 }, { "epoch": 0.16595827383115183, "grad_norm": 0.6563330292701721, "learning_rate": 8.297422898183354e-06, "loss": 0.0104, "step": 9820 }, { "epoch": 0.16612727411000228, "grad_norm": 0.11363092809915543, "learning_rate": 8.30587241233629e-06, "loss": 0.0057, "step": 9830 }, { "epoch": 0.16629627438885275, "grad_norm": 0.14816680550575256, "learning_rate": 8.314321926489227e-06, "loss": 0.0037, "step": 9840 }, { "epoch": 0.1664652746677032, "grad_norm": 0.134018212556839, "learning_rate": 8.322771440642164e-06, "loss": 0.0058, "step": 9850 }, { "epoch": 0.16663427494655367, "grad_norm": 0.20563648641109467, "learning_rate": 8.3312209547951e-06, "loss": 0.0073, "step": 9860 }, { "epoch": 0.1668032752254041, "grad_norm": 0.10587208718061447, "learning_rate": 8.339670468948035e-06, "loss": 0.0059, "step": 9870 }, { "epoch": 0.16697227550425459, "grad_norm": 0.2292458564043045, "learning_rate": 8.348119983100971e-06, "loss": 0.0078, "step": 9880 }, { "epoch": 0.16714127578310503, "grad_norm": 0.24652674794197083, "learning_rate": 8.35656949725391e-06, "loss": 0.0054, "step": 9890 }, { "epoch": 0.1673102760619555, "grad_norm": 0.15121500194072723, "learning_rate": 8.365019011406846e-06, "loss": 0.0047, "step": 9900 }, { "epoch": 0.16747927634080595, "grad_norm": 0.07074388861656189, "learning_rate": 8.37346852555978e-06, "loss": 0.0068, "step": 9910 }, { "epoch": 0.16764827661965642, "grad_norm": 0.0782100260257721, "learning_rate": 8.381918039712717e-06, "loss": 0.0041, "step": 9920 }, { "epoch": 0.1678172768985069, "grad_norm": 0.14748837053775787, "learning_rate": 8.390367553865654e-06, "loss": 0.0043, "step": 9930 }, { "epoch": 0.16798627717735734, "grad_norm": 0.1910974383354187, "learning_rate": 8.39881706801859e-06, "loss": 0.0063, "step": 9940 }, { "epoch": 0.1681552774562078, "grad_norm": 0.26988935470581055, "learning_rate": 8.407266582171527e-06, "loss": 0.0045, "step": 9950 }, { "epoch": 0.16832427773505826, "grad_norm": 0.22018760442733765, "learning_rate": 8.415716096324462e-06, "loss": 0.0067, "step": 9960 }, { "epoch": 0.16849327801390873, "grad_norm": 0.15286549925804138, "learning_rate": 8.424165610477398e-06, "loss": 0.005, "step": 9970 }, { "epoch": 0.16866227829275918, "grad_norm": 0.04976119101047516, "learning_rate": 8.432615124630335e-06, "loss": 0.0045, "step": 9980 }, { "epoch": 0.16883127857160965, "grad_norm": 0.1893671154975891, "learning_rate": 8.441064638783271e-06, "loss": 0.0047, "step": 9990 }, { "epoch": 0.1690002788504601, "grad_norm": 0.2919318974018097, "learning_rate": 8.449514152936208e-06, "loss": 0.0057, "step": 10000 }, { "epoch": 0.16916927912931057, "grad_norm": 0.21773135662078857, "learning_rate": 8.457963667089142e-06, "loss": 0.0059, "step": 10010 }, { "epoch": 0.169338279408161, "grad_norm": 0.13610824942588806, "learning_rate": 8.466413181242079e-06, "loss": 0.0052, "step": 10020 }, { "epoch": 0.1695072796870115, "grad_norm": 0.17957353591918945, "learning_rate": 8.474862695395015e-06, "loss": 0.0049, "step": 10030 }, { "epoch": 0.16967627996586193, "grad_norm": 0.2154211401939392, "learning_rate": 8.483312209547952e-06, "loss": 0.0064, "step": 10040 }, { "epoch": 0.1698452802447124, "grad_norm": 0.24149611592292786, "learning_rate": 8.491761723700888e-06, "loss": 0.0035, "step": 10050 }, { "epoch": 0.17001428052356288, "grad_norm": 0.43823128938674927, "learning_rate": 8.500211237853823e-06, "loss": 0.0052, "step": 10060 }, { "epoch": 0.17018328080241332, "grad_norm": 0.4004029333591461, "learning_rate": 8.50866075200676e-06, "loss": 0.004, "step": 10070 }, { "epoch": 0.1703522810812638, "grad_norm": 0.23778338730335236, "learning_rate": 8.517110266159696e-06, "loss": 0.0046, "step": 10080 }, { "epoch": 0.17052128136011424, "grad_norm": 0.09642806649208069, "learning_rate": 8.525559780312633e-06, "loss": 0.0073, "step": 10090 }, { "epoch": 0.1706902816389647, "grad_norm": 0.1605379730463028, "learning_rate": 8.534009294465569e-06, "loss": 0.0043, "step": 10100 }, { "epoch": 0.17085928191781516, "grad_norm": 0.24077445268630981, "learning_rate": 8.542458808618506e-06, "loss": 0.0057, "step": 10110 }, { "epoch": 0.17102828219666563, "grad_norm": 0.43282195925712585, "learning_rate": 8.550908322771442e-06, "loss": 0.0053, "step": 10120 }, { "epoch": 0.17119728247551608, "grad_norm": 0.23690132796764374, "learning_rate": 8.559357836924379e-06, "loss": 0.0047, "step": 10130 }, { "epoch": 0.17136628275436655, "grad_norm": 0.1514607071876526, "learning_rate": 8.567807351077313e-06, "loss": 0.0049, "step": 10140 }, { "epoch": 0.171535283033217, "grad_norm": 0.23836401104927063, "learning_rate": 8.57625686523025e-06, "loss": 0.0044, "step": 10150 }, { "epoch": 0.17170428331206747, "grad_norm": 0.17873355746269226, "learning_rate": 8.584706379383186e-06, "loss": 0.0039, "step": 10160 }, { "epoch": 0.1718732835909179, "grad_norm": 0.22357626259326935, "learning_rate": 8.593155893536123e-06, "loss": 0.0067, "step": 10170 }, { "epoch": 0.1720422838697684, "grad_norm": 0.3310186266899109, "learning_rate": 8.60160540768906e-06, "loss": 0.0048, "step": 10180 }, { "epoch": 0.17221128414861883, "grad_norm": 0.2950020134449005, "learning_rate": 8.610054921841994e-06, "loss": 0.0081, "step": 10190 }, { "epoch": 0.1723802844274693, "grad_norm": 0.1528913527727127, "learning_rate": 8.61850443599493e-06, "loss": 0.0035, "step": 10200 }, { "epoch": 0.17254928470631978, "grad_norm": 0.09008288383483887, "learning_rate": 8.626953950147867e-06, "loss": 0.0065, "step": 10210 }, { "epoch": 0.17271828498517022, "grad_norm": 0.28624215722084045, "learning_rate": 8.635403464300804e-06, "loss": 0.0055, "step": 10220 }, { "epoch": 0.1728872852640207, "grad_norm": 0.33840370178222656, "learning_rate": 8.64385297845374e-06, "loss": 0.0045, "step": 10230 }, { "epoch": 0.17305628554287114, "grad_norm": 0.2384103536605835, "learning_rate": 8.652302492606675e-06, "loss": 0.0059, "step": 10240 }, { "epoch": 0.17322528582172161, "grad_norm": 0.1607908308506012, "learning_rate": 8.660752006759611e-06, "loss": 0.0047, "step": 10250 }, { "epoch": 0.17339428610057206, "grad_norm": 0.31546470522880554, "learning_rate": 8.669201520912548e-06, "loss": 0.0043, "step": 10260 }, { "epoch": 0.17356328637942253, "grad_norm": 0.21398183703422546, "learning_rate": 8.677651035065484e-06, "loss": 0.0086, "step": 10270 }, { "epoch": 0.17373228665827298, "grad_norm": 0.35206085443496704, "learning_rate": 8.686100549218421e-06, "loss": 0.0052, "step": 10280 }, { "epoch": 0.17390128693712345, "grad_norm": 0.15468555688858032, "learning_rate": 8.694550063371356e-06, "loss": 0.004, "step": 10290 }, { "epoch": 0.1740702872159739, "grad_norm": 0.2814697027206421, "learning_rate": 8.702999577524292e-06, "loss": 0.0064, "step": 10300 }, { "epoch": 0.17423928749482437, "grad_norm": 0.3659762144088745, "learning_rate": 8.711449091677229e-06, "loss": 0.0088, "step": 10310 }, { "epoch": 0.1744082877736748, "grad_norm": 0.11075379699468613, "learning_rate": 8.719898605830165e-06, "loss": 0.007, "step": 10320 }, { "epoch": 0.1745772880525253, "grad_norm": 0.45485588908195496, "learning_rate": 8.728348119983102e-06, "loss": 0.0052, "step": 10330 }, { "epoch": 0.17474628833137576, "grad_norm": 0.1501804143190384, "learning_rate": 8.736797634136038e-06, "loss": 0.0037, "step": 10340 }, { "epoch": 0.1749152886102262, "grad_norm": 0.2205362170934677, "learning_rate": 8.745247148288975e-06, "loss": 0.007, "step": 10350 }, { "epoch": 0.17508428888907668, "grad_norm": 0.18038053810596466, "learning_rate": 8.753696662441911e-06, "loss": 0.003, "step": 10360 }, { "epoch": 0.17525328916792712, "grad_norm": 0.22610805928707123, "learning_rate": 8.762146176594846e-06, "loss": 0.0062, "step": 10370 }, { "epoch": 0.1754222894467776, "grad_norm": 0.3337898254394531, "learning_rate": 8.770595690747782e-06, "loss": 0.0055, "step": 10380 }, { "epoch": 0.17559128972562804, "grad_norm": 0.16095170378684998, "learning_rate": 8.779045204900719e-06, "loss": 0.0063, "step": 10390 }, { "epoch": 0.17576029000447851, "grad_norm": 0.22648227214813232, "learning_rate": 8.787494719053655e-06, "loss": 0.0039, "step": 10400 }, { "epoch": 0.17592929028332896, "grad_norm": 0.21391649544239044, "learning_rate": 8.795944233206592e-06, "loss": 0.0025, "step": 10410 }, { "epoch": 0.17609829056217943, "grad_norm": 0.1315697729587555, "learning_rate": 8.804393747359527e-06, "loss": 0.0049, "step": 10420 }, { "epoch": 0.17626729084102988, "grad_norm": 0.1340722292661667, "learning_rate": 8.812843261512463e-06, "loss": 0.0049, "step": 10430 }, { "epoch": 0.17643629111988035, "grad_norm": 0.2515009045600891, "learning_rate": 8.8212927756654e-06, "loss": 0.0042, "step": 10440 }, { "epoch": 0.1766052913987308, "grad_norm": 0.33529043197631836, "learning_rate": 8.829742289818336e-06, "loss": 0.0051, "step": 10450 }, { "epoch": 0.17677429167758127, "grad_norm": 0.880221962928772, "learning_rate": 8.838191803971273e-06, "loss": 0.0053, "step": 10460 }, { "epoch": 0.17694329195643174, "grad_norm": 0.22810405492782593, "learning_rate": 8.846641318124208e-06, "loss": 0.0056, "step": 10470 }, { "epoch": 0.1771122922352822, "grad_norm": 0.5324404239654541, "learning_rate": 8.855090832277144e-06, "loss": 0.0042, "step": 10480 }, { "epoch": 0.17728129251413266, "grad_norm": 0.30728843808174133, "learning_rate": 8.86354034643008e-06, "loss": 0.0052, "step": 10490 }, { "epoch": 0.1774502927929831, "grad_norm": 0.26511019468307495, "learning_rate": 8.871989860583017e-06, "loss": 0.0065, "step": 10500 }, { "epoch": 0.17761929307183358, "grad_norm": 0.3257712125778198, "learning_rate": 8.880439374735953e-06, "loss": 0.0076, "step": 10510 }, { "epoch": 0.17778829335068402, "grad_norm": 0.40313950181007385, "learning_rate": 8.888888888888888e-06, "loss": 0.0057, "step": 10520 }, { "epoch": 0.1779572936295345, "grad_norm": 0.4945511519908905, "learning_rate": 8.897338403041825e-06, "loss": 0.0065, "step": 10530 }, { "epoch": 0.17812629390838494, "grad_norm": 0.4786947965621948, "learning_rate": 8.905787917194763e-06, "loss": 0.0034, "step": 10540 }, { "epoch": 0.17829529418723541, "grad_norm": 0.21327023208141327, "learning_rate": 8.9142374313477e-06, "loss": 0.0039, "step": 10550 }, { "epoch": 0.17846429446608586, "grad_norm": 0.33647388219833374, "learning_rate": 8.922686945500634e-06, "loss": 0.0048, "step": 10560 }, { "epoch": 0.17863329474493633, "grad_norm": 0.5618618726730347, "learning_rate": 8.93113645965357e-06, "loss": 0.0049, "step": 10570 }, { "epoch": 0.17880229502378678, "grad_norm": 0.324091374874115, "learning_rate": 8.939585973806507e-06, "loss": 0.0055, "step": 10580 }, { "epoch": 0.17897129530263725, "grad_norm": 0.09489540755748749, "learning_rate": 8.948035487959444e-06, "loss": 0.0037, "step": 10590 }, { "epoch": 0.1791402955814877, "grad_norm": 0.1073186919093132, "learning_rate": 8.95648500211238e-06, "loss": 0.0056, "step": 10600 }, { "epoch": 0.17930929586033817, "grad_norm": 0.027942942455410957, "learning_rate": 8.964934516265315e-06, "loss": 0.0046, "step": 10610 }, { "epoch": 0.17947829613918864, "grad_norm": 0.21087293326854706, "learning_rate": 8.973384030418252e-06, "loss": 0.0036, "step": 10620 }, { "epoch": 0.1796472964180391, "grad_norm": 0.2617759704589844, "learning_rate": 8.981833544571188e-06, "loss": 0.0051, "step": 10630 }, { "epoch": 0.17981629669688956, "grad_norm": 0.3466041684150696, "learning_rate": 8.990283058724125e-06, "loss": 0.0122, "step": 10640 }, { "epoch": 0.17998529697574, "grad_norm": 0.1437150090932846, "learning_rate": 8.998732572877061e-06, "loss": 0.0045, "step": 10650 }, { "epoch": 0.18015429725459048, "grad_norm": 0.21957731246948242, "learning_rate": 9.007182087029996e-06, "loss": 0.0041, "step": 10660 }, { "epoch": 0.18032329753344092, "grad_norm": 0.1508844494819641, "learning_rate": 9.015631601182932e-06, "loss": 0.0075, "step": 10670 }, { "epoch": 0.1804922978122914, "grad_norm": 0.2619732916355133, "learning_rate": 9.024081115335869e-06, "loss": 0.0046, "step": 10680 }, { "epoch": 0.18066129809114184, "grad_norm": 0.18197904527187347, "learning_rate": 9.032530629488805e-06, "loss": 0.0043, "step": 10690 }, { "epoch": 0.18083029836999231, "grad_norm": 0.5457032322883606, "learning_rate": 9.040980143641742e-06, "loss": 0.0069, "step": 10700 }, { "epoch": 0.18099929864884276, "grad_norm": 0.12650050222873688, "learning_rate": 9.049429657794677e-06, "loss": 0.0044, "step": 10710 }, { "epoch": 0.18116829892769323, "grad_norm": 0.20195062458515167, "learning_rate": 9.057879171947613e-06, "loss": 0.0047, "step": 10720 }, { "epoch": 0.18133729920654368, "grad_norm": 0.3241768479347229, "learning_rate": 9.06632868610055e-06, "loss": 0.0059, "step": 10730 }, { "epoch": 0.18150629948539415, "grad_norm": 0.16222558915615082, "learning_rate": 9.074778200253486e-06, "loss": 0.0057, "step": 10740 }, { "epoch": 0.18167529976424462, "grad_norm": 0.21460038423538208, "learning_rate": 9.083227714406423e-06, "loss": 0.0047, "step": 10750 }, { "epoch": 0.18184430004309507, "grad_norm": 0.28541454672813416, "learning_rate": 9.091677228559359e-06, "loss": 0.0047, "step": 10760 }, { "epoch": 0.18201330032194554, "grad_norm": 0.20260149240493774, "learning_rate": 9.100126742712296e-06, "loss": 0.0083, "step": 10770 }, { "epoch": 0.182182300600796, "grad_norm": 0.5409811735153198, "learning_rate": 9.108576256865232e-06, "loss": 0.0039, "step": 10780 }, { "epoch": 0.18235130087964646, "grad_norm": 0.17389380931854248, "learning_rate": 9.117025771018167e-06, "loss": 0.0057, "step": 10790 }, { "epoch": 0.1825203011584969, "grad_norm": 0.23779360949993134, "learning_rate": 9.125475285171103e-06, "loss": 0.0038, "step": 10800 }, { "epoch": 0.18268930143734738, "grad_norm": 0.10513414442539215, "learning_rate": 9.13392479932404e-06, "loss": 0.0037, "step": 10810 }, { "epoch": 0.18285830171619782, "grad_norm": 0.17357668280601501, "learning_rate": 9.142374313476976e-06, "loss": 0.0061, "step": 10820 }, { "epoch": 0.1830273019950483, "grad_norm": 0.15187624096870422, "learning_rate": 9.150823827629913e-06, "loss": 0.0066, "step": 10830 }, { "epoch": 0.18319630227389874, "grad_norm": 0.14482544362545013, "learning_rate": 9.159273341782848e-06, "loss": 0.0057, "step": 10840 }, { "epoch": 0.18336530255274922, "grad_norm": 0.15263095498085022, "learning_rate": 9.167722855935784e-06, "loss": 0.0042, "step": 10850 }, { "epoch": 0.18353430283159966, "grad_norm": 0.12884816527366638, "learning_rate": 9.17617237008872e-06, "loss": 0.004, "step": 10860 }, { "epoch": 0.18370330311045013, "grad_norm": 0.1251261681318283, "learning_rate": 9.184621884241657e-06, "loss": 0.0049, "step": 10870 }, { "epoch": 0.18387230338930058, "grad_norm": 0.1957172453403473, "learning_rate": 9.193071398394594e-06, "loss": 0.0043, "step": 10880 }, { "epoch": 0.18404130366815105, "grad_norm": 0.3047395348548889, "learning_rate": 9.201520912547528e-06, "loss": 0.0054, "step": 10890 }, { "epoch": 0.18421030394700152, "grad_norm": 0.6501251459121704, "learning_rate": 9.209970426700465e-06, "loss": 0.0055, "step": 10900 }, { "epoch": 0.18437930422585197, "grad_norm": 0.10551861673593521, "learning_rate": 9.218419940853401e-06, "loss": 0.003, "step": 10910 }, { "epoch": 0.18454830450470244, "grad_norm": 0.08184027671813965, "learning_rate": 9.226869455006338e-06, "loss": 0.0072, "step": 10920 }, { "epoch": 0.1847173047835529, "grad_norm": 0.09533879905939102, "learning_rate": 9.235318969159274e-06, "loss": 0.0032, "step": 10930 }, { "epoch": 0.18488630506240336, "grad_norm": 0.3011474311351776, "learning_rate": 9.24376848331221e-06, "loss": 0.0048, "step": 10940 }, { "epoch": 0.1850553053412538, "grad_norm": 0.13552433252334595, "learning_rate": 9.252217997465146e-06, "loss": 0.0044, "step": 10950 }, { "epoch": 0.18522430562010428, "grad_norm": 1.5320141315460205, "learning_rate": 9.260667511618082e-06, "loss": 0.0098, "step": 10960 }, { "epoch": 0.18539330589895472, "grad_norm": 0.41396188735961914, "learning_rate": 9.269117025771019e-06, "loss": 0.0054, "step": 10970 }, { "epoch": 0.1855623061778052, "grad_norm": 0.27680331468582153, "learning_rate": 9.277566539923955e-06, "loss": 0.0067, "step": 10980 }, { "epoch": 0.18573130645665564, "grad_norm": 0.17900198698043823, "learning_rate": 9.286016054076892e-06, "loss": 0.0084, "step": 10990 }, { "epoch": 0.18590030673550612, "grad_norm": 0.09265404939651489, "learning_rate": 9.294465568229828e-06, "loss": 0.0049, "step": 11000 }, { "epoch": 0.18606930701435656, "grad_norm": 0.22261707484722137, "learning_rate": 9.302915082382765e-06, "loss": 0.0057, "step": 11010 }, { "epoch": 0.18623830729320703, "grad_norm": 0.32542920112609863, "learning_rate": 9.3113645965357e-06, "loss": 0.0043, "step": 11020 }, { "epoch": 0.1864073075720575, "grad_norm": 0.054340165108442307, "learning_rate": 9.319814110688636e-06, "loss": 0.004, "step": 11030 }, { "epoch": 0.18657630785090795, "grad_norm": 0.28003227710723877, "learning_rate": 9.328263624841572e-06, "loss": 0.006, "step": 11040 }, { "epoch": 0.18674530812975842, "grad_norm": 0.263875275850296, "learning_rate": 9.336713138994509e-06, "loss": 0.0058, "step": 11050 }, { "epoch": 0.18691430840860887, "grad_norm": 0.28672799468040466, "learning_rate": 9.345162653147445e-06, "loss": 0.0057, "step": 11060 }, { "epoch": 0.18708330868745934, "grad_norm": 0.16641516983509064, "learning_rate": 9.35361216730038e-06, "loss": 0.0075, "step": 11070 }, { "epoch": 0.1872523089663098, "grad_norm": 0.017938191071152687, "learning_rate": 9.362061681453317e-06, "loss": 0.0064, "step": 11080 }, { "epoch": 0.18742130924516026, "grad_norm": 0.17037849128246307, "learning_rate": 9.370511195606253e-06, "loss": 0.0069, "step": 11090 }, { "epoch": 0.1875903095240107, "grad_norm": 0.17609409987926483, "learning_rate": 9.37896070975919e-06, "loss": 0.0057, "step": 11100 }, { "epoch": 0.18775930980286118, "grad_norm": 0.1867038458585739, "learning_rate": 9.387410223912126e-06, "loss": 0.0042, "step": 11110 }, { "epoch": 0.18792831008171162, "grad_norm": 0.2825443744659424, "learning_rate": 9.395859738065061e-06, "loss": 0.005, "step": 11120 }, { "epoch": 0.1880973103605621, "grad_norm": 0.7442421913146973, "learning_rate": 9.404309252217997e-06, "loss": 0.0066, "step": 11130 }, { "epoch": 0.18826631063941254, "grad_norm": 0.43998825550079346, "learning_rate": 9.412758766370934e-06, "loss": 0.0054, "step": 11140 }, { "epoch": 0.18843531091826302, "grad_norm": 0.3201293647289276, "learning_rate": 9.42120828052387e-06, "loss": 0.0047, "step": 11150 }, { "epoch": 0.1886043111971135, "grad_norm": 0.541490912437439, "learning_rate": 9.429657794676807e-06, "loss": 0.0041, "step": 11160 }, { "epoch": 0.18877331147596393, "grad_norm": 0.24210959672927856, "learning_rate": 9.438107308829742e-06, "loss": 0.0045, "step": 11170 }, { "epoch": 0.1889423117548144, "grad_norm": 0.2670549154281616, "learning_rate": 9.446556822982678e-06, "loss": 0.0039, "step": 11180 }, { "epoch": 0.18911131203366485, "grad_norm": 0.5198076367378235, "learning_rate": 9.455006337135616e-06, "loss": 0.0061, "step": 11190 }, { "epoch": 0.18928031231251533, "grad_norm": 0.08111140131950378, "learning_rate": 9.463455851288553e-06, "loss": 0.0064, "step": 11200 }, { "epoch": 0.18944931259136577, "grad_norm": 0.10334686934947968, "learning_rate": 9.471905365441488e-06, "loss": 0.0072, "step": 11210 }, { "epoch": 0.18961831287021624, "grad_norm": 0.12224064022302628, "learning_rate": 9.480354879594424e-06, "loss": 0.005, "step": 11220 }, { "epoch": 0.1897873131490667, "grad_norm": 0.16509070992469788, "learning_rate": 9.48880439374736e-06, "loss": 0.005, "step": 11230 }, { "epoch": 0.18995631342791716, "grad_norm": 0.19058158993721008, "learning_rate": 9.497253907900297e-06, "loss": 0.0044, "step": 11240 }, { "epoch": 0.1901253137067676, "grad_norm": 0.14141295850276947, "learning_rate": 9.505703422053234e-06, "loss": 0.005, "step": 11250 }, { "epoch": 0.19029431398561808, "grad_norm": 0.6358665227890015, "learning_rate": 9.514152936206169e-06, "loss": 0.0063, "step": 11260 }, { "epoch": 0.19046331426446853, "grad_norm": 0.11508552730083466, "learning_rate": 9.522602450359105e-06, "loss": 0.0079, "step": 11270 }, { "epoch": 0.190632314543319, "grad_norm": 0.18003638088703156, "learning_rate": 9.531051964512042e-06, "loss": 0.0044, "step": 11280 }, { "epoch": 0.19080131482216944, "grad_norm": 0.10557529330253601, "learning_rate": 9.539501478664978e-06, "loss": 0.0054, "step": 11290 }, { "epoch": 0.19097031510101992, "grad_norm": 0.09687496721744537, "learning_rate": 9.547950992817914e-06, "loss": 0.007, "step": 11300 }, { "epoch": 0.1911393153798704, "grad_norm": 0.25146037340164185, "learning_rate": 9.55640050697085e-06, "loss": 0.0046, "step": 11310 }, { "epoch": 0.19130831565872083, "grad_norm": 0.15835073590278625, "learning_rate": 9.564850021123786e-06, "loss": 0.0037, "step": 11320 }, { "epoch": 0.1914773159375713, "grad_norm": 0.053235359489917755, "learning_rate": 9.573299535276722e-06, "loss": 0.0029, "step": 11330 }, { "epoch": 0.19164631621642175, "grad_norm": 0.16447119414806366, "learning_rate": 9.581749049429659e-06, "loss": 0.0043, "step": 11340 }, { "epoch": 0.19181531649527223, "grad_norm": 0.05096621811389923, "learning_rate": 9.590198563582595e-06, "loss": 0.0033, "step": 11350 }, { "epoch": 0.19198431677412267, "grad_norm": 0.11670669168233871, "learning_rate": 9.59864807773553e-06, "loss": 0.0055, "step": 11360 }, { "epoch": 0.19215331705297314, "grad_norm": 0.22420817613601685, "learning_rate": 9.607097591888467e-06, "loss": 0.0081, "step": 11370 }, { "epoch": 0.1923223173318236, "grad_norm": 0.18114367127418518, "learning_rate": 9.615547106041403e-06, "loss": 0.0052, "step": 11380 }, { "epoch": 0.19249131761067406, "grad_norm": 0.1688271462917328, "learning_rate": 9.62399662019434e-06, "loss": 0.0044, "step": 11390 }, { "epoch": 0.1926603178895245, "grad_norm": 0.2208949476480484, "learning_rate": 9.632446134347276e-06, "loss": 0.0045, "step": 11400 }, { "epoch": 0.19282931816837498, "grad_norm": 0.04804624989628792, "learning_rate": 9.640895648500213e-06, "loss": 0.0052, "step": 11410 }, { "epoch": 0.19299831844722543, "grad_norm": 0.037728212773799896, "learning_rate": 9.649345162653149e-06, "loss": 0.0049, "step": 11420 }, { "epoch": 0.1931673187260759, "grad_norm": 0.0866350308060646, "learning_rate": 9.657794676806086e-06, "loss": 0.0038, "step": 11430 }, { "epoch": 0.19333631900492637, "grad_norm": 0.18869557976722717, "learning_rate": 9.66624419095902e-06, "loss": 0.0041, "step": 11440 }, { "epoch": 0.19350531928377682, "grad_norm": 0.2800125181674957, "learning_rate": 9.674693705111957e-06, "loss": 0.0081, "step": 11450 }, { "epoch": 0.1936743195626273, "grad_norm": 0.07992298156023026, "learning_rate": 9.683143219264893e-06, "loss": 0.0055, "step": 11460 }, { "epoch": 0.19384331984147773, "grad_norm": 0.34909525513648987, "learning_rate": 9.69159273341783e-06, "loss": 0.0044, "step": 11470 }, { "epoch": 0.1940123201203282, "grad_norm": 0.2653554677963257, "learning_rate": 9.700042247570766e-06, "loss": 0.0047, "step": 11480 }, { "epoch": 0.19418132039917865, "grad_norm": 0.13063736259937286, "learning_rate": 9.708491761723701e-06, "loss": 0.0034, "step": 11490 }, { "epoch": 0.19435032067802913, "grad_norm": 0.1563275158405304, "learning_rate": 9.716941275876638e-06, "loss": 0.0025, "step": 11500 }, { "epoch": 0.19451932095687957, "grad_norm": 0.07389644533395767, "learning_rate": 9.725390790029574e-06, "loss": 0.0054, "step": 11510 }, { "epoch": 0.19468832123573004, "grad_norm": 0.27305924892425537, "learning_rate": 9.73384030418251e-06, "loss": 0.0043, "step": 11520 }, { "epoch": 0.1948573215145805, "grad_norm": 0.21362082660198212, "learning_rate": 9.742289818335447e-06, "loss": 0.0053, "step": 11530 }, { "epoch": 0.19502632179343096, "grad_norm": 0.3026335835456848, "learning_rate": 9.750739332488382e-06, "loss": 0.0055, "step": 11540 }, { "epoch": 0.1951953220722814, "grad_norm": 0.14857229590415955, "learning_rate": 9.759188846641318e-06, "loss": 0.0041, "step": 11550 }, { "epoch": 0.19536432235113188, "grad_norm": 0.2065477967262268, "learning_rate": 9.767638360794255e-06, "loss": 0.0044, "step": 11560 }, { "epoch": 0.19553332262998233, "grad_norm": 0.39100638031959534, "learning_rate": 9.776087874947191e-06, "loss": 0.0028, "step": 11570 }, { "epoch": 0.1957023229088328, "grad_norm": 0.5789624452590942, "learning_rate": 9.784537389100128e-06, "loss": 0.0028, "step": 11580 }, { "epoch": 0.19587132318768327, "grad_norm": 0.23838987946510315, "learning_rate": 9.792986903253063e-06, "loss": 0.0048, "step": 11590 }, { "epoch": 0.19604032346653372, "grad_norm": 0.1580793559551239, "learning_rate": 9.801436417405999e-06, "loss": 0.0048, "step": 11600 }, { "epoch": 0.1962093237453842, "grad_norm": 0.1848992258310318, "learning_rate": 9.809885931558936e-06, "loss": 0.0031, "step": 11610 }, { "epoch": 0.19637832402423464, "grad_norm": 0.21419130265712738, "learning_rate": 9.818335445711872e-06, "loss": 0.0055, "step": 11620 }, { "epoch": 0.1965473243030851, "grad_norm": 0.13937658071517944, "learning_rate": 9.826784959864809e-06, "loss": 0.0049, "step": 11630 }, { "epoch": 0.19671632458193555, "grad_norm": 0.29598268866539, "learning_rate": 9.835234474017745e-06, "loss": 0.0031, "step": 11640 }, { "epoch": 0.19688532486078603, "grad_norm": 0.18870386481285095, "learning_rate": 9.843683988170682e-06, "loss": 0.0043, "step": 11650 }, { "epoch": 0.19705432513963647, "grad_norm": 0.2481774091720581, "learning_rate": 9.852133502323618e-06, "loss": 0.0048, "step": 11660 }, { "epoch": 0.19722332541848694, "grad_norm": 0.19238494336605072, "learning_rate": 9.860583016476553e-06, "loss": 0.009, "step": 11670 }, { "epoch": 0.1973923256973374, "grad_norm": 0.18605957925319672, "learning_rate": 9.86903253062949e-06, "loss": 0.0086, "step": 11680 }, { "epoch": 0.19756132597618786, "grad_norm": 0.38150960206985474, "learning_rate": 9.877482044782426e-06, "loss": 0.0047, "step": 11690 }, { "epoch": 0.1977303262550383, "grad_norm": 0.2200096994638443, "learning_rate": 9.885931558935362e-06, "loss": 0.0046, "step": 11700 }, { "epoch": 0.19789932653388878, "grad_norm": 0.34150439500808716, "learning_rate": 9.894381073088299e-06, "loss": 0.0058, "step": 11710 }, { "epoch": 0.19806832681273925, "grad_norm": 0.24692192673683167, "learning_rate": 9.902830587241234e-06, "loss": 0.0061, "step": 11720 }, { "epoch": 0.1982373270915897, "grad_norm": 0.19490328431129456, "learning_rate": 9.91128010139417e-06, "loss": 0.0064, "step": 11730 }, { "epoch": 0.19840632737044017, "grad_norm": 0.2527712881565094, "learning_rate": 9.919729615547107e-06, "loss": 0.0036, "step": 11740 }, { "epoch": 0.19857532764929062, "grad_norm": 0.13580848276615143, "learning_rate": 9.928179129700043e-06, "loss": 0.0033, "step": 11750 }, { "epoch": 0.1987443279281411, "grad_norm": 0.4252132773399353, "learning_rate": 9.93662864385298e-06, "loss": 0.0046, "step": 11760 }, { "epoch": 0.19891332820699154, "grad_norm": 0.08987266570329666, "learning_rate": 9.945078158005914e-06, "loss": 0.004, "step": 11770 }, { "epoch": 0.199082328485842, "grad_norm": 0.13477776944637299, "learning_rate": 9.953527672158851e-06, "loss": 0.006, "step": 11780 }, { "epoch": 0.19925132876469245, "grad_norm": 0.3274911046028137, "learning_rate": 9.961977186311787e-06, "loss": 0.005, "step": 11790 }, { "epoch": 0.19942032904354293, "grad_norm": 0.059423573315143585, "learning_rate": 9.970426700464724e-06, "loss": 0.0035, "step": 11800 }, { "epoch": 0.19958932932239337, "grad_norm": 0.06708939373493195, "learning_rate": 9.97887621461766e-06, "loss": 0.005, "step": 11810 }, { "epoch": 0.19975832960124384, "grad_norm": 0.05580686032772064, "learning_rate": 9.987325728770595e-06, "loss": 0.0049, "step": 11820 }, { "epoch": 0.1999273298800943, "grad_norm": 0.09520850330591202, "learning_rate": 9.995775242923532e-06, "loss": 0.0036, "step": 11830 }, { "epoch": 0.20009633015894476, "grad_norm": 0.23919424414634705, "learning_rate": 9.999999945621971e-06, "loss": 0.0068, "step": 11840 }, { "epoch": 0.20026533043779524, "grad_norm": 0.14946545660495758, "learning_rate": 9.99999951059775e-06, "loss": 0.0044, "step": 11850 }, { "epoch": 0.20043433071664568, "grad_norm": 0.25027787685394287, "learning_rate": 9.999998640549348e-06, "loss": 0.0048, "step": 11860 }, { "epoch": 0.20060333099549615, "grad_norm": 0.1638702154159546, "learning_rate": 9.999997335476836e-06, "loss": 0.0077, "step": 11870 }, { "epoch": 0.2007723312743466, "grad_norm": 0.42983323335647583, "learning_rate": 9.999995595380332e-06, "loss": 0.0062, "step": 11880 }, { "epoch": 0.20094133155319707, "grad_norm": 0.23045586049556732, "learning_rate": 9.999993420259984e-06, "loss": 0.0055, "step": 11890 }, { "epoch": 0.20111033183204752, "grad_norm": 0.27778342366218567, "learning_rate": 9.999990810115985e-06, "loss": 0.0068, "step": 11900 }, { "epoch": 0.201279332110898, "grad_norm": 0.20811425149440765, "learning_rate": 9.999987764948558e-06, "loss": 0.006, "step": 11910 }, { "epoch": 0.20144833238974844, "grad_norm": 0.4125745892524719, "learning_rate": 9.999984284757971e-06, "loss": 0.0061, "step": 11920 }, { "epoch": 0.2016173326685989, "grad_norm": 0.26201602816581726, "learning_rate": 9.999980369544525e-06, "loss": 0.0058, "step": 11930 }, { "epoch": 0.20178633294744935, "grad_norm": 0.18778546154499054, "learning_rate": 9.999976019308562e-06, "loss": 0.006, "step": 11940 }, { "epoch": 0.20195533322629983, "grad_norm": 0.19676972925662994, "learning_rate": 9.99997123405046e-06, "loss": 0.0065, "step": 11950 }, { "epoch": 0.20212433350515027, "grad_norm": 0.24656665325164795, "learning_rate": 9.999966013770634e-06, "loss": 0.005, "step": 11960 }, { "epoch": 0.20229333378400075, "grad_norm": 0.11270641535520554, "learning_rate": 9.999960358469542e-06, "loss": 0.0057, "step": 11970 }, { "epoch": 0.2024623340628512, "grad_norm": 0.3879396915435791, "learning_rate": 9.999954268147671e-06, "loss": 0.0051, "step": 11980 }, { "epoch": 0.20263133434170166, "grad_norm": 0.047397516667842865, "learning_rate": 9.999947742805554e-06, "loss": 0.0045, "step": 11990 }, { "epoch": 0.20280033462055214, "grad_norm": 0.5076800584793091, "learning_rate": 9.999940782443758e-06, "loss": 0.0106, "step": 12000 }, { "epoch": 0.20296933489940258, "grad_norm": 0.12176298350095749, "learning_rate": 9.99993338706289e-06, "loss": 0.0052, "step": 12010 }, { "epoch": 0.20313833517825305, "grad_norm": 0.17117035388946533, "learning_rate": 9.99992555666359e-06, "loss": 0.0031, "step": 12020 }, { "epoch": 0.2033073354571035, "grad_norm": 0.1444518268108368, "learning_rate": 9.999917291246543e-06, "loss": 0.0044, "step": 12030 }, { "epoch": 0.20347633573595397, "grad_norm": 0.33203762769699097, "learning_rate": 9.999908590812466e-06, "loss": 0.0044, "step": 12040 }, { "epoch": 0.20364533601480442, "grad_norm": 0.17868980765342712, "learning_rate": 9.999899455362117e-06, "loss": 0.004, "step": 12050 }, { "epoch": 0.2038143362936549, "grad_norm": 0.03706150874495506, "learning_rate": 9.999889884896288e-06, "loss": 0.0063, "step": 12060 }, { "epoch": 0.20398333657250534, "grad_norm": 0.12980827689170837, "learning_rate": 9.999879879415816e-06, "loss": 0.005, "step": 12070 }, { "epoch": 0.2041523368513558, "grad_norm": 0.25462740659713745, "learning_rate": 9.99986943892157e-06, "loss": 0.0041, "step": 12080 }, { "epoch": 0.20432133713020625, "grad_norm": 0.0007251466158777475, "learning_rate": 9.999858563414457e-06, "loss": 0.006, "step": 12090 }, { "epoch": 0.20449033740905673, "grad_norm": 0.18278099596500397, "learning_rate": 9.999847252895423e-06, "loss": 0.0057, "step": 12100 }, { "epoch": 0.20465933768790717, "grad_norm": 0.4333384335041046, "learning_rate": 9.999835507365454e-06, "loss": 0.0051, "step": 12110 }, { "epoch": 0.20482833796675765, "grad_norm": 0.1187647208571434, "learning_rate": 9.999823326825572e-06, "loss": 0.0085, "step": 12120 }, { "epoch": 0.20499733824560812, "grad_norm": 0.17793165147304535, "learning_rate": 9.999810711276837e-06, "loss": 0.0067, "step": 12130 }, { "epoch": 0.20516633852445856, "grad_norm": 0.3019254207611084, "learning_rate": 9.999797660720343e-06, "loss": 0.0085, "step": 12140 }, { "epoch": 0.20533533880330904, "grad_norm": 0.12040320783853531, "learning_rate": 9.999784175157228e-06, "loss": 0.0049, "step": 12150 }, { "epoch": 0.20550433908215948, "grad_norm": 0.2843913733959198, "learning_rate": 9.999770254588666e-06, "loss": 0.0058, "step": 12160 }, { "epoch": 0.20567333936100995, "grad_norm": 0.25708165764808655, "learning_rate": 9.999755899015866e-06, "loss": 0.0053, "step": 12170 }, { "epoch": 0.2058423396398604, "grad_norm": 0.28958919644355774, "learning_rate": 9.99974110844008e-06, "loss": 0.006, "step": 12180 }, { "epoch": 0.20601133991871087, "grad_norm": 0.15175886452198029, "learning_rate": 9.999725882862594e-06, "loss": 0.0044, "step": 12190 }, { "epoch": 0.20618034019756132, "grad_norm": 0.17259806394577026, "learning_rate": 9.999710222284731e-06, "loss": 0.0075, "step": 12200 }, { "epoch": 0.2063493404764118, "grad_norm": 0.310881644487381, "learning_rate": 9.999694126707856e-06, "loss": 0.0054, "step": 12210 }, { "epoch": 0.20651834075526224, "grad_norm": 0.09686554968357086, "learning_rate": 9.999677596133364e-06, "loss": 0.0053, "step": 12220 }, { "epoch": 0.2066873410341127, "grad_norm": 0.17074330151081085, "learning_rate": 9.999660630562699e-06, "loss": 0.0036, "step": 12230 }, { "epoch": 0.20685634131296315, "grad_norm": 0.41990748047828674, "learning_rate": 9.999643229997337e-06, "loss": 0.0053, "step": 12240 }, { "epoch": 0.20702534159181363, "grad_norm": 0.23658278584480286, "learning_rate": 9.99962539443879e-06, "loss": 0.0059, "step": 12250 }, { "epoch": 0.2071943418706641, "grad_norm": 0.38540852069854736, "learning_rate": 9.999607123888608e-06, "loss": 0.0063, "step": 12260 }, { "epoch": 0.20736334214951455, "grad_norm": 0.14451487362384796, "learning_rate": 9.999588418348384e-06, "loss": 0.0065, "step": 12270 }, { "epoch": 0.20753234242836502, "grad_norm": 0.19181889295578003, "learning_rate": 9.999569277819743e-06, "loss": 0.0044, "step": 12280 }, { "epoch": 0.20770134270721546, "grad_norm": 0.29033470153808594, "learning_rate": 9.999549702304351e-06, "loss": 0.0039, "step": 12290 }, { "epoch": 0.20787034298606594, "grad_norm": 0.13398291170597076, "learning_rate": 9.999529691803913e-06, "loss": 0.0059, "step": 12300 }, { "epoch": 0.20803934326491638, "grad_norm": 0.39608412981033325, "learning_rate": 9.999509246320169e-06, "loss": 0.0062, "step": 12310 }, { "epoch": 0.20820834354376686, "grad_norm": 0.03699997439980507, "learning_rate": 9.999488365854894e-06, "loss": 0.0053, "step": 12320 }, { "epoch": 0.2083773438226173, "grad_norm": 0.14555566012859344, "learning_rate": 9.999467050409912e-06, "loss": 0.0038, "step": 12330 }, { "epoch": 0.20854634410146777, "grad_norm": 0.20318925380706787, "learning_rate": 9.999445299987072e-06, "loss": 0.0061, "step": 12340 }, { "epoch": 0.20871534438031822, "grad_norm": 0.29658663272857666, "learning_rate": 9.999423114588268e-06, "loss": 0.0059, "step": 12350 }, { "epoch": 0.2088843446591687, "grad_norm": 0.1962607055902481, "learning_rate": 9.99940049421543e-06, "loss": 0.0042, "step": 12360 }, { "epoch": 0.20905334493801914, "grad_norm": 0.12968648970127106, "learning_rate": 9.999377438870528e-06, "loss": 0.0033, "step": 12370 }, { "epoch": 0.2092223452168696, "grad_norm": 1.8838653564453125, "learning_rate": 9.999353948555563e-06, "loss": 0.0058, "step": 12380 }, { "epoch": 0.20939134549572005, "grad_norm": 0.09402038902044296, "learning_rate": 9.999330023272584e-06, "loss": 0.0041, "step": 12390 }, { "epoch": 0.20956034577457053, "grad_norm": 0.13466863334178925, "learning_rate": 9.999305663023672e-06, "loss": 0.0052, "step": 12400 }, { "epoch": 0.209729346053421, "grad_norm": 0.32404276728630066, "learning_rate": 9.999280867810943e-06, "loss": 0.0065, "step": 12410 }, { "epoch": 0.20989834633227145, "grad_norm": 0.14441674947738647, "learning_rate": 9.999255637636558e-06, "loss": 0.0055, "step": 12420 }, { "epoch": 0.21006734661112192, "grad_norm": 0.39780494570732117, "learning_rate": 9.999229972502708e-06, "loss": 0.0066, "step": 12430 }, { "epoch": 0.21023634688997236, "grad_norm": 0.31614312529563904, "learning_rate": 9.999203872411632e-06, "loss": 0.0062, "step": 12440 }, { "epoch": 0.21040534716882284, "grad_norm": 0.23963358998298645, "learning_rate": 9.999177337365596e-06, "loss": 0.0046, "step": 12450 }, { "epoch": 0.21057434744767328, "grad_norm": 0.15409396588802338, "learning_rate": 9.99915036736691e-06, "loss": 0.0046, "step": 12460 }, { "epoch": 0.21074334772652376, "grad_norm": 0.12386608868837357, "learning_rate": 9.999122962417919e-06, "loss": 0.0033, "step": 12470 }, { "epoch": 0.2109123480053742, "grad_norm": 0.1905480921268463, "learning_rate": 9.999095122521012e-06, "loss": 0.0061, "step": 12480 }, { "epoch": 0.21108134828422467, "grad_norm": 0.27343010902404785, "learning_rate": 9.999066847678606e-06, "loss": 0.0117, "step": 12490 }, { "epoch": 0.21125034856307512, "grad_norm": 0.10000277310609818, "learning_rate": 9.999038137893163e-06, "loss": 0.0034, "step": 12500 }, { "epoch": 0.2114193488419256, "grad_norm": 0.2904250919818878, "learning_rate": 9.999008993167183e-06, "loss": 0.0053, "step": 12510 }, { "epoch": 0.21158834912077604, "grad_norm": 0.10671574622392654, "learning_rate": 9.998979413503199e-06, "loss": 0.0038, "step": 12520 }, { "epoch": 0.2117573493996265, "grad_norm": 0.04285205155611038, "learning_rate": 9.998949398903784e-06, "loss": 0.0053, "step": 12530 }, { "epoch": 0.21192634967847698, "grad_norm": 0.1715831607580185, "learning_rate": 9.998918949371552e-06, "loss": 0.0054, "step": 12540 }, { "epoch": 0.21209534995732743, "grad_norm": 0.12727899849414825, "learning_rate": 9.998888064909152e-06, "loss": 0.0064, "step": 12550 }, { "epoch": 0.2122643502361779, "grad_norm": 0.20481939613819122, "learning_rate": 9.998856745519269e-06, "loss": 0.0041, "step": 12560 }, { "epoch": 0.21243335051502835, "grad_norm": 0.20393306016921997, "learning_rate": 9.998824991204628e-06, "loss": 0.0031, "step": 12570 }, { "epoch": 0.21260235079387882, "grad_norm": 0.3028099834918976, "learning_rate": 9.998792801967996e-06, "loss": 0.0097, "step": 12580 }, { "epoch": 0.21277135107272926, "grad_norm": 0.20722444355487823, "learning_rate": 9.998760177812167e-06, "loss": 0.0032, "step": 12590 }, { "epoch": 0.21294035135157974, "grad_norm": 0.2849743068218231, "learning_rate": 9.998727118739986e-06, "loss": 0.005, "step": 12600 }, { "epoch": 0.21310935163043018, "grad_norm": 0.10578224062919617, "learning_rate": 9.998693624754325e-06, "loss": 0.0038, "step": 12610 }, { "epoch": 0.21327835190928066, "grad_norm": 0.17871376872062683, "learning_rate": 9.998659695858099e-06, "loss": 0.0049, "step": 12620 }, { "epoch": 0.2134473521881311, "grad_norm": 0.1357841044664383, "learning_rate": 9.998625332054262e-06, "loss": 0.0027, "step": 12630 }, { "epoch": 0.21361635246698157, "grad_norm": 0.1257234364748001, "learning_rate": 9.9985905333458e-06, "loss": 0.0063, "step": 12640 }, { "epoch": 0.21378535274583202, "grad_norm": 0.22507745027542114, "learning_rate": 9.998555299735744e-06, "loss": 0.005, "step": 12650 }, { "epoch": 0.2139543530246825, "grad_norm": 0.1501859575510025, "learning_rate": 9.998519631227158e-06, "loss": 0.0045, "step": 12660 }, { "epoch": 0.21412335330353294, "grad_norm": 0.13181540369987488, "learning_rate": 9.998483527823146e-06, "loss": 0.0036, "step": 12670 }, { "epoch": 0.2142923535823834, "grad_norm": 0.11986613273620605, "learning_rate": 9.998446989526849e-06, "loss": 0.0042, "step": 12680 }, { "epoch": 0.21446135386123388, "grad_norm": 0.18259811401367188, "learning_rate": 9.998410016341447e-06, "loss": 0.0049, "step": 12690 }, { "epoch": 0.21463035414008433, "grad_norm": 0.17600387334823608, "learning_rate": 9.998372608270152e-06, "loss": 0.0053, "step": 12700 }, { "epoch": 0.2147993544189348, "grad_norm": 0.11839409172534943, "learning_rate": 9.998334765316226e-06, "loss": 0.0041, "step": 12710 }, { "epoch": 0.21496835469778525, "grad_norm": 0.10443863272666931, "learning_rate": 9.998296487482956e-06, "loss": 0.0043, "step": 12720 }, { "epoch": 0.21513735497663572, "grad_norm": 0.3014087677001953, "learning_rate": 9.998257774773676e-06, "loss": 0.0054, "step": 12730 }, { "epoch": 0.21530635525548616, "grad_norm": 0.06494636088609695, "learning_rate": 9.99821862719175e-06, "loss": 0.0044, "step": 12740 }, { "epoch": 0.21547535553433664, "grad_norm": 0.1542501151561737, "learning_rate": 9.99817904474059e-06, "loss": 0.0068, "step": 12750 }, { "epoch": 0.21564435581318708, "grad_norm": 0.21469393372535706, "learning_rate": 9.998139027423635e-06, "loss": 0.0048, "step": 12760 }, { "epoch": 0.21581335609203756, "grad_norm": 0.20954947173595428, "learning_rate": 9.998098575244367e-06, "loss": 0.0073, "step": 12770 }, { "epoch": 0.215982356370888, "grad_norm": 0.24559859931468964, "learning_rate": 9.998057688206307e-06, "loss": 0.0045, "step": 12780 }, { "epoch": 0.21615135664973847, "grad_norm": 0.24553582072257996, "learning_rate": 9.998016366313012e-06, "loss": 0.0036, "step": 12790 }, { "epoch": 0.21632035692858892, "grad_norm": 0.267829954624176, "learning_rate": 9.997974609568077e-06, "loss": 0.0041, "step": 12800 }, { "epoch": 0.2164893572074394, "grad_norm": 0.16241054236888885, "learning_rate": 9.997932417975135e-06, "loss": 0.0038, "step": 12810 }, { "epoch": 0.21665835748628987, "grad_norm": 0.10603076964616776, "learning_rate": 9.997889791537859e-06, "loss": 0.0027, "step": 12820 }, { "epoch": 0.2168273577651403, "grad_norm": 0.020528987050056458, "learning_rate": 9.997846730259955e-06, "loss": 0.0046, "step": 12830 }, { "epoch": 0.21699635804399078, "grad_norm": 0.18575718998908997, "learning_rate": 9.99780323414517e-06, "loss": 0.0039, "step": 12840 }, { "epoch": 0.21716535832284123, "grad_norm": 0.21036140620708466, "learning_rate": 9.997759303197287e-06, "loss": 0.003, "step": 12850 }, { "epoch": 0.2173343586016917, "grad_norm": 0.22635206580162048, "learning_rate": 9.99771493742013e-06, "loss": 0.0051, "step": 12860 }, { "epoch": 0.21750335888054215, "grad_norm": 0.116457998752594, "learning_rate": 9.997670136817561e-06, "loss": 0.005, "step": 12870 }, { "epoch": 0.21767235915939262, "grad_norm": 0.17869815230369568, "learning_rate": 9.997624901393477e-06, "loss": 0.0035, "step": 12880 }, { "epoch": 0.21784135943824307, "grad_norm": 0.2751922309398651, "learning_rate": 9.99757923115181e-06, "loss": 0.0049, "step": 12890 }, { "epoch": 0.21801035971709354, "grad_norm": 0.4475092589855194, "learning_rate": 9.997533126096537e-06, "loss": 0.0066, "step": 12900 }, { "epoch": 0.21817935999594398, "grad_norm": 0.48583754897117615, "learning_rate": 9.997486586231668e-06, "loss": 0.0025, "step": 12910 }, { "epoch": 0.21834836027479446, "grad_norm": 0.20378488302230835, "learning_rate": 9.997439611561252e-06, "loss": 0.0034, "step": 12920 }, { "epoch": 0.2185173605536449, "grad_norm": 0.058693524450063705, "learning_rate": 9.997392202089378e-06, "loss": 0.0057, "step": 12930 }, { "epoch": 0.21868636083249537, "grad_norm": 0.1765138804912567, "learning_rate": 9.997344357820168e-06, "loss": 0.0042, "step": 12940 }, { "epoch": 0.21885536111134585, "grad_norm": 0.2786969542503357, "learning_rate": 9.997296078757788e-06, "loss": 0.0047, "step": 12950 }, { "epoch": 0.2190243613901963, "grad_norm": 0.1344185769557953, "learning_rate": 9.997247364906435e-06, "loss": 0.0039, "step": 12960 }, { "epoch": 0.21919336166904677, "grad_norm": 0.14174804091453552, "learning_rate": 9.99719821627035e-06, "loss": 0.0056, "step": 12970 }, { "epoch": 0.2193623619478972, "grad_norm": 0.055436067283153534, "learning_rate": 9.99714863285381e-06, "loss": 0.0027, "step": 12980 }, { "epoch": 0.21953136222674768, "grad_norm": 0.20866861939430237, "learning_rate": 9.997098614661124e-06, "loss": 0.0069, "step": 12990 }, { "epoch": 0.21970036250559813, "grad_norm": 0.08676794171333313, "learning_rate": 9.997048161696649e-06, "loss": 0.005, "step": 13000 }, { "epoch": 0.2198693627844486, "grad_norm": 0.09381774812936783, "learning_rate": 9.996997273964771e-06, "loss": 0.0027, "step": 13010 }, { "epoch": 0.22003836306329905, "grad_norm": 0.12107016891241074, "learning_rate": 9.996945951469921e-06, "loss": 0.0041, "step": 13020 }, { "epoch": 0.22020736334214952, "grad_norm": 0.07253371924161911, "learning_rate": 9.996894194216562e-06, "loss": 0.0029, "step": 13030 }, { "epoch": 0.22037636362099997, "grad_norm": 0.08685167133808136, "learning_rate": 9.996842002209196e-06, "loss": 0.0068, "step": 13040 }, { "epoch": 0.22054536389985044, "grad_norm": 0.10493378341197968, "learning_rate": 9.996789375452367e-06, "loss": 0.0054, "step": 13050 }, { "epoch": 0.22071436417870088, "grad_norm": 0.12263305485248566, "learning_rate": 9.996736313950652e-06, "loss": 0.0058, "step": 13060 }, { "epoch": 0.22088336445755136, "grad_norm": 0.18619713187217712, "learning_rate": 9.996682817708668e-06, "loss": 0.0034, "step": 13070 }, { "epoch": 0.2210523647364018, "grad_norm": 0.053724195808172226, "learning_rate": 9.996628886731071e-06, "loss": 0.0032, "step": 13080 }, { "epoch": 0.22122136501525227, "grad_norm": 0.023195017129182816, "learning_rate": 9.996574521022548e-06, "loss": 0.0053, "step": 13090 }, { "epoch": 0.22139036529410275, "grad_norm": 0.2828596830368042, "learning_rate": 9.996519720587835e-06, "loss": 0.0035, "step": 13100 }, { "epoch": 0.2215593655729532, "grad_norm": 0.19304883480072021, "learning_rate": 9.996464485431699e-06, "loss": 0.004, "step": 13110 }, { "epoch": 0.22172836585180367, "grad_norm": 0.15398164093494415, "learning_rate": 9.996408815558941e-06, "loss": 0.0043, "step": 13120 }, { "epoch": 0.2218973661306541, "grad_norm": 0.1082804799079895, "learning_rate": 9.99635271097441e-06, "loss": 0.0053, "step": 13130 }, { "epoch": 0.22206636640950458, "grad_norm": 0.09701705724000931, "learning_rate": 9.996296171682984e-06, "loss": 0.0043, "step": 13140 }, { "epoch": 0.22223536668835503, "grad_norm": 0.28328144550323486, "learning_rate": 9.996239197689586e-06, "loss": 0.0054, "step": 13150 }, { "epoch": 0.2224043669672055, "grad_norm": 0.1199091449379921, "learning_rate": 9.996181788999168e-06, "loss": 0.0038, "step": 13160 }, { "epoch": 0.22257336724605595, "grad_norm": 0.1811678409576416, "learning_rate": 9.996123945616727e-06, "loss": 0.0034, "step": 13170 }, { "epoch": 0.22274236752490642, "grad_norm": 0.05057886987924576, "learning_rate": 9.996065667547298e-06, "loss": 0.0039, "step": 13180 }, { "epoch": 0.22291136780375687, "grad_norm": 0.11425051838159561, "learning_rate": 9.996006954795949e-06, "loss": 0.0028, "step": 13190 }, { "epoch": 0.22308036808260734, "grad_norm": 0.1719987392425537, "learning_rate": 9.995947807367787e-06, "loss": 0.0037, "step": 13200 }, { "epoch": 0.22324936836145778, "grad_norm": 0.1575050801038742, "learning_rate": 9.99588822526796e-06, "loss": 0.0032, "step": 13210 }, { "epoch": 0.22341836864030826, "grad_norm": 0.3843485116958618, "learning_rate": 9.995828208501654e-06, "loss": 0.0034, "step": 13220 }, { "epoch": 0.22358736891915873, "grad_norm": 0.17446254193782806, "learning_rate": 9.995767757074087e-06, "loss": 0.0036, "step": 13230 }, { "epoch": 0.22375636919800918, "grad_norm": 0.059524331241846085, "learning_rate": 9.99570687099052e-06, "loss": 0.0047, "step": 13240 }, { "epoch": 0.22392536947685965, "grad_norm": 0.15919871628284454, "learning_rate": 9.995645550256251e-06, "loss": 0.0094, "step": 13250 }, { "epoch": 0.2240943697557101, "grad_norm": 0.22587934136390686, "learning_rate": 9.995583794876616e-06, "loss": 0.0054, "step": 13260 }, { "epoch": 0.22426337003456057, "grad_norm": 0.13634823262691498, "learning_rate": 9.995521604856983e-06, "loss": 0.007, "step": 13270 }, { "epoch": 0.224432370313411, "grad_norm": 0.06666050106287003, "learning_rate": 9.99545898020277e-06, "loss": 0.0052, "step": 13280 }, { "epoch": 0.22460137059226148, "grad_norm": 0.3350073993206024, "learning_rate": 9.99539592091942e-06, "loss": 0.0062, "step": 13290 }, { "epoch": 0.22477037087111193, "grad_norm": 0.10985402017831802, "learning_rate": 9.995332427012422e-06, "loss": 0.0033, "step": 13300 }, { "epoch": 0.2249393711499624, "grad_norm": 0.12870152294635773, "learning_rate": 9.9952684984873e-06, "loss": 0.0066, "step": 13310 }, { "epoch": 0.22510837142881285, "grad_norm": 0.09722672402858734, "learning_rate": 9.995204135349617e-06, "loss": 0.0038, "step": 13320 }, { "epoch": 0.22527737170766332, "grad_norm": 0.49585485458374023, "learning_rate": 9.995139337604971e-06, "loss": 0.0064, "step": 13330 }, { "epoch": 0.22544637198651377, "grad_norm": 0.21925857663154602, "learning_rate": 9.995074105259001e-06, "loss": 0.0032, "step": 13340 }, { "epoch": 0.22561537226536424, "grad_norm": 0.18533477187156677, "learning_rate": 9.995008438317384e-06, "loss": 0.0038, "step": 13350 }, { "epoch": 0.22578437254421468, "grad_norm": 0.18802869319915771, "learning_rate": 9.994942336785829e-06, "loss": 0.0033, "step": 13360 }, { "epoch": 0.22595337282306516, "grad_norm": 0.17008714377880096, "learning_rate": 9.99487580067009e-06, "loss": 0.0039, "step": 13370 }, { "epoch": 0.22612237310191563, "grad_norm": 0.15874530375003815, "learning_rate": 9.994808829975955e-06, "loss": 0.0054, "step": 13380 }, { "epoch": 0.22629137338076608, "grad_norm": 0.16346663236618042, "learning_rate": 9.994741424709253e-06, "loss": 0.0061, "step": 13390 }, { "epoch": 0.22646037365961655, "grad_norm": 0.21573509275913239, "learning_rate": 9.994673584875846e-06, "loss": 0.0048, "step": 13400 }, { "epoch": 0.226629373938467, "grad_norm": 0.18036310374736786, "learning_rate": 9.994605310481637e-06, "loss": 0.0044, "step": 13410 }, { "epoch": 0.22679837421731747, "grad_norm": 0.5115329623222351, "learning_rate": 9.994536601532568e-06, "loss": 0.0047, "step": 13420 }, { "epoch": 0.2269673744961679, "grad_norm": 0.09441306442022324, "learning_rate": 9.994467458034613e-06, "loss": 0.0052, "step": 13430 }, { "epoch": 0.22713637477501838, "grad_norm": 0.09542250633239746, "learning_rate": 9.994397879993793e-06, "loss": 0.0049, "step": 13440 }, { "epoch": 0.22730537505386883, "grad_norm": 0.15195991098880768, "learning_rate": 9.994327867416159e-06, "loss": 0.0063, "step": 13450 }, { "epoch": 0.2274743753327193, "grad_norm": 0.039866454899311066, "learning_rate": 9.9942574203078e-06, "loss": 0.0039, "step": 13460 }, { "epoch": 0.22764337561156975, "grad_norm": 0.17334040999412537, "learning_rate": 9.99418653867485e-06, "loss": 0.004, "step": 13470 }, { "epoch": 0.22781237589042022, "grad_norm": 0.1515452116727829, "learning_rate": 9.994115222523472e-06, "loss": 0.0044, "step": 13480 }, { "epoch": 0.22798137616927067, "grad_norm": 0.11643579602241516, "learning_rate": 9.994043471859875e-06, "loss": 0.0074, "step": 13490 }, { "epoch": 0.22815037644812114, "grad_norm": 0.14805810153484344, "learning_rate": 9.993971286690299e-06, "loss": 0.0057, "step": 13500 }, { "epoch": 0.2283193767269716, "grad_norm": 0.09401725232601166, "learning_rate": 9.993898667021023e-06, "loss": 0.005, "step": 13510 }, { "epoch": 0.22848837700582206, "grad_norm": 0.22201889753341675, "learning_rate": 9.993825612858367e-06, "loss": 0.0054, "step": 13520 }, { "epoch": 0.22865737728467253, "grad_norm": 0.13076730072498322, "learning_rate": 9.993752124208687e-06, "loss": 0.0049, "step": 13530 }, { "epoch": 0.22882637756352298, "grad_norm": 0.08481460809707642, "learning_rate": 9.993678201078378e-06, "loss": 0.0084, "step": 13540 }, { "epoch": 0.22899537784237345, "grad_norm": 0.02569173462688923, "learning_rate": 9.99360384347387e-06, "loss": 0.0055, "step": 13550 }, { "epoch": 0.2291643781212239, "grad_norm": 0.2832903563976288, "learning_rate": 9.993529051401633e-06, "loss": 0.0044, "step": 13560 }, { "epoch": 0.22933337840007437, "grad_norm": 0.156660258769989, "learning_rate": 9.993453824868175e-06, "loss": 0.0042, "step": 13570 }, { "epoch": 0.2295023786789248, "grad_norm": 0.3335116505622864, "learning_rate": 9.993378163880038e-06, "loss": 0.0073, "step": 13580 }, { "epoch": 0.22967137895777529, "grad_norm": 0.055631548166275024, "learning_rate": 9.993302068443811e-06, "loss": 0.0034, "step": 13590 }, { "epoch": 0.22984037923662573, "grad_norm": 0.18227708339691162, "learning_rate": 9.993225538566109e-06, "loss": 0.0073, "step": 13600 }, { "epoch": 0.2300093795154762, "grad_norm": 0.28870803117752075, "learning_rate": 9.993148574253592e-06, "loss": 0.0063, "step": 13610 }, { "epoch": 0.23017837979432665, "grad_norm": 0.09444483369588852, "learning_rate": 9.993071175512957e-06, "loss": 0.005, "step": 13620 }, { "epoch": 0.23034738007317712, "grad_norm": 0.1265878528356552, "learning_rate": 9.992993342350939e-06, "loss": 0.0032, "step": 13630 }, { "epoch": 0.2305163803520276, "grad_norm": 0.06479588150978088, "learning_rate": 9.992915074774307e-06, "loss": 0.0044, "step": 13640 }, { "epoch": 0.23068538063087804, "grad_norm": 0.23029182851314545, "learning_rate": 9.992836372789871e-06, "loss": 0.0049, "step": 13650 }, { "epoch": 0.2308543809097285, "grad_norm": 0.19470201432704926, "learning_rate": 9.992757236404483e-06, "loss": 0.0044, "step": 13660 }, { "epoch": 0.23102338118857896, "grad_norm": 0.3662036657333374, "learning_rate": 9.992677665625023e-06, "loss": 0.0042, "step": 13670 }, { "epoch": 0.23119238146742943, "grad_norm": 0.27756965160369873, "learning_rate": 9.992597660458416e-06, "loss": 0.0049, "step": 13680 }, { "epoch": 0.23136138174627988, "grad_norm": 0.3882659673690796, "learning_rate": 9.992517220911623e-06, "loss": 0.0051, "step": 13690 }, { "epoch": 0.23153038202513035, "grad_norm": 0.11957082152366638, "learning_rate": 9.992436346991642e-06, "loss": 0.0041, "step": 13700 }, { "epoch": 0.2316993823039808, "grad_norm": 0.15464948117733002, "learning_rate": 9.992355038705512e-06, "loss": 0.0051, "step": 13710 }, { "epoch": 0.23186838258283127, "grad_norm": 0.08093637973070145, "learning_rate": 9.992273296060304e-06, "loss": 0.0043, "step": 13720 }, { "epoch": 0.2320373828616817, "grad_norm": 0.38809144496917725, "learning_rate": 9.99219111906313e-06, "loss": 0.006, "step": 13730 }, { "epoch": 0.23220638314053219, "grad_norm": 0.18487603962421417, "learning_rate": 9.992108507721143e-06, "loss": 0.0062, "step": 13740 }, { "epoch": 0.23237538341938263, "grad_norm": 0.12486666440963745, "learning_rate": 9.992025462041526e-06, "loss": 0.0048, "step": 13750 }, { "epoch": 0.2325443836982331, "grad_norm": 0.21606610715389252, "learning_rate": 9.99194198203151e-06, "loss": 0.0052, "step": 13760 }, { "epoch": 0.23271338397708355, "grad_norm": 0.11294318735599518, "learning_rate": 9.991858067698353e-06, "loss": 0.0031, "step": 13770 }, { "epoch": 0.23288238425593402, "grad_norm": 0.15346220135688782, "learning_rate": 9.991773719049358e-06, "loss": 0.0054, "step": 13780 }, { "epoch": 0.2330513845347845, "grad_norm": 0.19085946679115295, "learning_rate": 9.991688936091863e-06, "loss": 0.0027, "step": 13790 }, { "epoch": 0.23322038481363494, "grad_norm": 0.15518149733543396, "learning_rate": 9.991603718833248e-06, "loss": 0.0055, "step": 13800 }, { "epoch": 0.2333893850924854, "grad_norm": 0.08052901178598404, "learning_rate": 9.991518067280921e-06, "loss": 0.0067, "step": 13810 }, { "epoch": 0.23355838537133586, "grad_norm": 0.08903773874044418, "learning_rate": 9.991431981442341e-06, "loss": 0.0048, "step": 13820 }, { "epoch": 0.23372738565018633, "grad_norm": 0.12610691785812378, "learning_rate": 9.991345461324992e-06, "loss": 0.0054, "step": 13830 }, { "epoch": 0.23389638592903678, "grad_norm": 0.2848169505596161, "learning_rate": 9.991258506936406e-06, "loss": 0.0054, "step": 13840 }, { "epoch": 0.23406538620788725, "grad_norm": 0.10816321521997452, "learning_rate": 9.991171118284147e-06, "loss": 0.0043, "step": 13850 }, { "epoch": 0.2342343864867377, "grad_norm": 0.053277160972356796, "learning_rate": 9.991083295375817e-06, "loss": 0.0052, "step": 13860 }, { "epoch": 0.23440338676558817, "grad_norm": 0.09209351986646652, "learning_rate": 9.990995038219056e-06, "loss": 0.0033, "step": 13870 }, { "epoch": 0.2345723870444386, "grad_norm": 0.13402052223682404, "learning_rate": 9.990906346821548e-06, "loss": 0.0038, "step": 13880 }, { "epoch": 0.23474138732328909, "grad_norm": 0.19042134284973145, "learning_rate": 9.990817221191005e-06, "loss": 0.0041, "step": 13890 }, { "epoch": 0.23491038760213953, "grad_norm": 0.1634221225976944, "learning_rate": 9.990727661335183e-06, "loss": 0.0058, "step": 13900 }, { "epoch": 0.23507938788099, "grad_norm": 0.06429363042116165, "learning_rate": 9.990637667261873e-06, "loss": 0.0052, "step": 13910 }, { "epoch": 0.23524838815984048, "grad_norm": 0.07568903267383575, "learning_rate": 9.990547238978907e-06, "loss": 0.0033, "step": 13920 }, { "epoch": 0.23541738843869092, "grad_norm": 0.23190951347351074, "learning_rate": 9.99045637649415e-06, "loss": 0.0061, "step": 13930 }, { "epoch": 0.2355863887175414, "grad_norm": 0.03557845950126648, "learning_rate": 9.990365079815509e-06, "loss": 0.0037, "step": 13940 }, { "epoch": 0.23575538899639184, "grad_norm": 0.10422530025243759, "learning_rate": 9.990273348950928e-06, "loss": 0.0051, "step": 13950 }, { "epoch": 0.2359243892752423, "grad_norm": 0.2304789423942566, "learning_rate": 9.990181183908387e-06, "loss": 0.0036, "step": 13960 }, { "epoch": 0.23609338955409276, "grad_norm": 0.1503400355577469, "learning_rate": 9.990088584695905e-06, "loss": 0.0067, "step": 13970 }, { "epoch": 0.23626238983294323, "grad_norm": 0.2617582678794861, "learning_rate": 9.989995551321538e-06, "loss": 0.0047, "step": 13980 }, { "epoch": 0.23643139011179368, "grad_norm": 0.2739869952201843, "learning_rate": 9.989902083793383e-06, "loss": 0.0047, "step": 13990 }, { "epoch": 0.23660039039064415, "grad_norm": 0.12977376580238342, "learning_rate": 9.989808182119569e-06, "loss": 0.0034, "step": 14000 }, { "epoch": 0.2367693906694946, "grad_norm": 0.3293931782245636, "learning_rate": 9.989713846308267e-06, "loss": 0.0039, "step": 14010 }, { "epoch": 0.23693839094834507, "grad_norm": 0.11267578601837158, "learning_rate": 9.989619076367683e-06, "loss": 0.0038, "step": 14020 }, { "epoch": 0.2371073912271955, "grad_norm": 0.33272507786750793, "learning_rate": 9.989523872306067e-06, "loss": 0.0038, "step": 14030 }, { "epoch": 0.23727639150604599, "grad_norm": 0.20745112001895905, "learning_rate": 9.989428234131697e-06, "loss": 0.0048, "step": 14040 }, { "epoch": 0.23744539178489643, "grad_norm": 0.1488361805677414, "learning_rate": 9.989332161852896e-06, "loss": 0.0037, "step": 14050 }, { "epoch": 0.2376143920637469, "grad_norm": 0.036361102014780045, "learning_rate": 9.989235655478025e-06, "loss": 0.0051, "step": 14060 }, { "epoch": 0.23778339234259738, "grad_norm": 0.2059086710214615, "learning_rate": 9.989138715015478e-06, "loss": 0.0039, "step": 14070 }, { "epoch": 0.23795239262144782, "grad_norm": 0.08130540698766708, "learning_rate": 9.989041340473688e-06, "loss": 0.0047, "step": 14080 }, { "epoch": 0.2381213929002983, "grad_norm": 0.1256265938282013, "learning_rate": 9.98894353186113e-06, "loss": 0.0036, "step": 14090 }, { "epoch": 0.23829039317914874, "grad_norm": 0.09473961591720581, "learning_rate": 9.988845289186314e-06, "loss": 0.0075, "step": 14100 }, { "epoch": 0.2384593934579992, "grad_norm": 0.14110074937343597, "learning_rate": 9.988746612457786e-06, "loss": 0.0036, "step": 14110 }, { "epoch": 0.23862839373684966, "grad_norm": 0.3486090302467346, "learning_rate": 9.988647501684131e-06, "loss": 0.0057, "step": 14120 }, { "epoch": 0.23879739401570013, "grad_norm": 0.377188503742218, "learning_rate": 9.988547956873974e-06, "loss": 0.0035, "step": 14130 }, { "epoch": 0.23896639429455058, "grad_norm": 0.2691023349761963, "learning_rate": 9.988447978035974e-06, "loss": 0.0036, "step": 14140 }, { "epoch": 0.23913539457340105, "grad_norm": 0.24957741796970367, "learning_rate": 9.988347565178828e-06, "loss": 0.0042, "step": 14150 }, { "epoch": 0.2393043948522515, "grad_norm": 0.2357136458158493, "learning_rate": 9.988246718311276e-06, "loss": 0.004, "step": 14160 }, { "epoch": 0.23947339513110197, "grad_norm": 0.7160472273826599, "learning_rate": 9.988145437442093e-06, "loss": 0.0036, "step": 14170 }, { "epoch": 0.2396423954099524, "grad_norm": 0.24182184040546417, "learning_rate": 9.988043722580088e-06, "loss": 0.0042, "step": 14180 }, { "epoch": 0.2398113956888029, "grad_norm": 0.3829415440559387, "learning_rate": 9.98794157373411e-06, "loss": 0.0038, "step": 14190 }, { "epoch": 0.23998039596765336, "grad_norm": 0.15229769051074982, "learning_rate": 9.987838990913049e-06, "loss": 0.0045, "step": 14200 }, { "epoch": 0.2401493962465038, "grad_norm": 0.3212862014770508, "learning_rate": 9.98773597412583e-06, "loss": 0.0061, "step": 14210 }, { "epoch": 0.24031839652535428, "grad_norm": 0.2428811639547348, "learning_rate": 9.987632523381412e-06, "loss": 0.0045, "step": 14220 }, { "epoch": 0.24048739680420472, "grad_norm": 0.329699844121933, "learning_rate": 9.9875286386888e-06, "loss": 0.0074, "step": 14230 }, { "epoch": 0.2406563970830552, "grad_norm": 0.148408442735672, "learning_rate": 9.987424320057035e-06, "loss": 0.0042, "step": 14240 }, { "epoch": 0.24082539736190564, "grad_norm": 0.28386399149894714, "learning_rate": 9.987319567495187e-06, "loss": 0.0038, "step": 14250 }, { "epoch": 0.24099439764075611, "grad_norm": 0.15869386494159698, "learning_rate": 9.987214381012372e-06, "loss": 0.0039, "step": 14260 }, { "epoch": 0.24116339791960656, "grad_norm": 0.12310047447681427, "learning_rate": 9.987108760617741e-06, "loss": 0.003, "step": 14270 }, { "epoch": 0.24133239819845703, "grad_norm": 0.14694136381149292, "learning_rate": 9.987002706320488e-06, "loss": 0.0042, "step": 14280 }, { "epoch": 0.24150139847730748, "grad_norm": 0.07741039246320724, "learning_rate": 9.986896218129834e-06, "loss": 0.004, "step": 14290 }, { "epoch": 0.24167039875615795, "grad_norm": 0.3411828875541687, "learning_rate": 9.986789296055048e-06, "loss": 0.0042, "step": 14300 }, { "epoch": 0.2418393990350084, "grad_norm": 0.185929074883461, "learning_rate": 9.986681940105432e-06, "loss": 0.0046, "step": 14310 }, { "epoch": 0.24200839931385887, "grad_norm": 0.06522829830646515, "learning_rate": 9.986574150290327e-06, "loss": 0.0048, "step": 14320 }, { "epoch": 0.24217739959270934, "grad_norm": 0.23070041835308075, "learning_rate": 9.986465926619109e-06, "loss": 0.0044, "step": 14330 }, { "epoch": 0.2423463998715598, "grad_norm": 0.07087959349155426, "learning_rate": 9.986357269101197e-06, "loss": 0.0026, "step": 14340 }, { "epoch": 0.24251540015041026, "grad_norm": 0.13839539885520935, "learning_rate": 9.986248177746041e-06, "loss": 0.0041, "step": 14350 }, { "epoch": 0.2426844004292607, "grad_norm": 0.3412225842475891, "learning_rate": 9.986138652563135e-06, "loss": 0.0033, "step": 14360 }, { "epoch": 0.24285340070811118, "grad_norm": 0.11813254654407501, "learning_rate": 9.98602869356201e-06, "loss": 0.0035, "step": 14370 }, { "epoch": 0.24302240098696162, "grad_norm": 0.12311132252216339, "learning_rate": 9.985918300752229e-06, "loss": 0.0029, "step": 14380 }, { "epoch": 0.2431914012658121, "grad_norm": 0.008245767094194889, "learning_rate": 9.985807474143398e-06, "loss": 0.0034, "step": 14390 }, { "epoch": 0.24336040154466254, "grad_norm": 0.19633473455905914, "learning_rate": 9.985696213745162e-06, "loss": 0.0063, "step": 14400 }, { "epoch": 0.24352940182351301, "grad_norm": 0.1152830645442009, "learning_rate": 9.985584519567197e-06, "loss": 0.0034, "step": 14410 }, { "epoch": 0.24369840210236346, "grad_norm": 0.30895453691482544, "learning_rate": 9.985472391619225e-06, "loss": 0.0049, "step": 14420 }, { "epoch": 0.24386740238121393, "grad_norm": 0.09675145894289017, "learning_rate": 9.985359829910998e-06, "loss": 0.0049, "step": 14430 }, { "epoch": 0.24403640266006438, "grad_norm": 0.15377745032310486, "learning_rate": 9.98524683445231e-06, "loss": 0.007, "step": 14440 }, { "epoch": 0.24420540293891485, "grad_norm": 0.1542172133922577, "learning_rate": 9.985133405252997e-06, "loss": 0.0051, "step": 14450 }, { "epoch": 0.2443744032177653, "grad_norm": 0.09837678074836731, "learning_rate": 9.985019542322923e-06, "loss": 0.0047, "step": 14460 }, { "epoch": 0.24454340349661577, "grad_norm": 0.2775014638900757, "learning_rate": 9.984905245671995e-06, "loss": 0.004, "step": 14470 }, { "epoch": 0.24471240377546624, "grad_norm": 0.11706257611513138, "learning_rate": 9.984790515310158e-06, "loss": 0.0027, "step": 14480 }, { "epoch": 0.2448814040543167, "grad_norm": 0.20297011733055115, "learning_rate": 9.984675351247395e-06, "loss": 0.0052, "step": 14490 }, { "epoch": 0.24505040433316716, "grad_norm": 0.12076503783464432, "learning_rate": 9.984559753493724e-06, "loss": 0.0027, "step": 14500 }, { "epoch": 0.2452194046120176, "grad_norm": 0.1017741933465004, "learning_rate": 9.984443722059206e-06, "loss": 0.0047, "step": 14510 }, { "epoch": 0.24538840489086808, "grad_norm": 0.1515427678823471, "learning_rate": 9.984327256953932e-06, "loss": 0.0059, "step": 14520 }, { "epoch": 0.24555740516971852, "grad_norm": 0.11836301535367966, "learning_rate": 9.984210358188038e-06, "loss": 0.003, "step": 14530 }, { "epoch": 0.245726405448569, "grad_norm": 0.14873522520065308, "learning_rate": 9.984093025771693e-06, "loss": 0.0032, "step": 14540 }, { "epoch": 0.24589540572741944, "grad_norm": 0.13684223592281342, "learning_rate": 9.983975259715106e-06, "loss": 0.0048, "step": 14550 }, { "epoch": 0.24606440600626991, "grad_norm": 0.22442513704299927, "learning_rate": 9.983857060028524e-06, "loss": 0.0029, "step": 14560 }, { "epoch": 0.24623340628512036, "grad_norm": 0.11228632926940918, "learning_rate": 9.983738426722232e-06, "loss": 0.0063, "step": 14570 }, { "epoch": 0.24640240656397083, "grad_norm": 0.13161133229732513, "learning_rate": 9.983619359806547e-06, "loss": 0.0031, "step": 14580 }, { "epoch": 0.24657140684282128, "grad_norm": 0.17749890685081482, "learning_rate": 9.983499859291834e-06, "loss": 0.005, "step": 14590 }, { "epoch": 0.24674040712167175, "grad_norm": 0.1559005081653595, "learning_rate": 9.983379925188488e-06, "loss": 0.0049, "step": 14600 }, { "epoch": 0.24690940740052222, "grad_norm": 0.11561734974384308, "learning_rate": 9.983259557506941e-06, "loss": 0.0076, "step": 14610 }, { "epoch": 0.24707840767937267, "grad_norm": 0.20156532526016235, "learning_rate": 9.98313875625767e-06, "loss": 0.0062, "step": 14620 }, { "epoch": 0.24724740795822314, "grad_norm": 0.08245956152677536, "learning_rate": 9.983017521451183e-06, "loss": 0.0033, "step": 14630 }, { "epoch": 0.2474164082370736, "grad_norm": 0.09482322633266449, "learning_rate": 9.982895853098027e-06, "loss": 0.0037, "step": 14640 }, { "epoch": 0.24758540851592406, "grad_norm": 0.09171206504106522, "learning_rate": 9.98277375120879e-06, "loss": 0.0037, "step": 14650 }, { "epoch": 0.2477544087947745, "grad_norm": 0.049698662012815475, "learning_rate": 9.982651215794096e-06, "loss": 0.0028, "step": 14660 }, { "epoch": 0.24792340907362498, "grad_norm": 0.1317257583141327, "learning_rate": 9.982528246864603e-06, "loss": 0.0029, "step": 14670 }, { "epoch": 0.24809240935247542, "grad_norm": 0.2114088535308838, "learning_rate": 9.982404844431013e-06, "loss": 0.0031, "step": 14680 }, { "epoch": 0.2482614096313259, "grad_norm": 0.08456574380397797, "learning_rate": 9.98228100850406e-06, "loss": 0.0031, "step": 14690 }, { "epoch": 0.24843040991017634, "grad_norm": 0.26122376322746277, "learning_rate": 9.98215673909452e-06, "loss": 0.0055, "step": 14700 }, { "epoch": 0.24859941018902681, "grad_norm": 0.3789493143558502, "learning_rate": 9.982032036213202e-06, "loss": 0.0029, "step": 14710 }, { "epoch": 0.24876841046787726, "grad_norm": 0.2621791660785675, "learning_rate": 9.981906899870962e-06, "loss": 0.0056, "step": 14720 }, { "epoch": 0.24893741074672773, "grad_norm": 0.06250303238630295, "learning_rate": 9.981781330078683e-06, "loss": 0.0031, "step": 14730 }, { "epoch": 0.2491064110255782, "grad_norm": 0.3181895613670349, "learning_rate": 9.98165532684729e-06, "loss": 0.0054, "step": 14740 }, { "epoch": 0.24927541130442865, "grad_norm": 0.15799207985401154, "learning_rate": 9.981528890187749e-06, "loss": 0.0058, "step": 14750 }, { "epoch": 0.24944441158327912, "grad_norm": 0.1720704287290573, "learning_rate": 9.981402020111057e-06, "loss": 0.0031, "step": 14760 }, { "epoch": 0.24961341186212957, "grad_norm": 0.13233742117881775, "learning_rate": 9.981274716628253e-06, "loss": 0.004, "step": 14770 }, { "epoch": 0.24978241214098004, "grad_norm": 0.15866127610206604, "learning_rate": 9.981146979750414e-06, "loss": 0.0043, "step": 14780 }, { "epoch": 0.2499514124198305, "grad_norm": 0.08606252819299698, "learning_rate": 9.981018809488655e-06, "loss": 0.004, "step": 14790 }, { "epoch": 0.25012041269868096, "grad_norm": 0.14118173718452454, "learning_rate": 9.980890205854125e-06, "loss": 0.0039, "step": 14800 }, { "epoch": 0.25028941297753143, "grad_norm": 0.057821471244096756, "learning_rate": 9.980761168858015e-06, "loss": 0.0035, "step": 14810 }, { "epoch": 0.25045841325638185, "grad_norm": 0.3060109615325928, "learning_rate": 9.98063169851155e-06, "loss": 0.0048, "step": 14820 }, { "epoch": 0.2506274135352323, "grad_norm": 0.39351001381874084, "learning_rate": 9.980501794825995e-06, "loss": 0.0069, "step": 14830 }, { "epoch": 0.2507964138140828, "grad_norm": 0.2610602378845215, "learning_rate": 9.980371457812654e-06, "loss": 0.0054, "step": 14840 }, { "epoch": 0.25096541409293327, "grad_norm": 0.08033086359500885, "learning_rate": 9.980240687482864e-06, "loss": 0.0038, "step": 14850 }, { "epoch": 0.2511344143717837, "grad_norm": 0.2523294985294342, "learning_rate": 9.980109483848005e-06, "loss": 0.0047, "step": 14860 }, { "epoch": 0.25130341465063416, "grad_norm": 0.09582140296697617, "learning_rate": 9.979977846919494e-06, "loss": 0.0029, "step": 14870 }, { "epoch": 0.25147241492948463, "grad_norm": 0.07006009668111801, "learning_rate": 9.979845776708779e-06, "loss": 0.0043, "step": 14880 }, { "epoch": 0.2516414152083351, "grad_norm": 0.2001093178987503, "learning_rate": 9.979713273227356e-06, "loss": 0.0037, "step": 14890 }, { "epoch": 0.2518104154871856, "grad_norm": 0.3163839876651764, "learning_rate": 9.979580336486749e-06, "loss": 0.0052, "step": 14900 }, { "epoch": 0.251979415766036, "grad_norm": 0.15247350931167603, "learning_rate": 9.979446966498527e-06, "loss": 0.004, "step": 14910 }, { "epoch": 0.25214841604488647, "grad_norm": 0.207497701048851, "learning_rate": 9.979313163274293e-06, "loss": 0.0054, "step": 14920 }, { "epoch": 0.25231741632373694, "grad_norm": 0.05913986265659332, "learning_rate": 9.979178926825688e-06, "loss": 0.0025, "step": 14930 }, { "epoch": 0.2524864166025874, "grad_norm": 0.11547628045082092, "learning_rate": 9.979044257164391e-06, "loss": 0.0053, "step": 14940 }, { "epoch": 0.25265541688143783, "grad_norm": 0.4229731857776642, "learning_rate": 9.978909154302122e-06, "loss": 0.0034, "step": 14950 }, { "epoch": 0.2528244171602883, "grad_norm": 0.09081882983446121, "learning_rate": 9.97877361825063e-06, "loss": 0.0111, "step": 14960 }, { "epoch": 0.2529934174391388, "grad_norm": 0.1778619885444641, "learning_rate": 9.978637649021712e-06, "loss": 0.0062, "step": 14970 }, { "epoch": 0.25316241771798925, "grad_norm": 0.22037698328495026, "learning_rate": 9.978501246627197e-06, "loss": 0.0048, "step": 14980 }, { "epoch": 0.25333141799683967, "grad_norm": 0.22217440605163574, "learning_rate": 9.978364411078951e-06, "loss": 0.0035, "step": 14990 }, { "epoch": 0.25350041827569014, "grad_norm": 0.1691211760044098, "learning_rate": 9.97822714238888e-06, "loss": 0.0059, "step": 15000 }, { "epoch": 0.2536694185545406, "grad_norm": 0.3209209740161896, "learning_rate": 9.97808944056893e-06, "loss": 0.0058, "step": 15010 }, { "epoch": 0.2538384188333911, "grad_norm": 0.17985154688358307, "learning_rate": 9.97795130563108e-06, "loss": 0.0034, "step": 15020 }, { "epoch": 0.25400741911224156, "grad_norm": 0.31680458784103394, "learning_rate": 9.977812737587345e-06, "loss": 0.0048, "step": 15030 }, { "epoch": 0.254176419391092, "grad_norm": 0.3769959807395935, "learning_rate": 9.977673736449783e-06, "loss": 0.003, "step": 15040 }, { "epoch": 0.25434541966994245, "grad_norm": 0.18421614170074463, "learning_rate": 9.97753430223049e-06, "loss": 0.0032, "step": 15050 }, { "epoch": 0.2545144199487929, "grad_norm": 0.24074816703796387, "learning_rate": 9.977394434941597e-06, "loss": 0.0049, "step": 15060 }, { "epoch": 0.2546834202276434, "grad_norm": 0.34638261795043945, "learning_rate": 9.977254134595271e-06, "loss": 0.0047, "step": 15070 }, { "epoch": 0.2548524205064938, "grad_norm": 0.0570184588432312, "learning_rate": 9.97711340120372e-06, "loss": 0.0046, "step": 15080 }, { "epoch": 0.2550214207853443, "grad_norm": 0.15112407505512238, "learning_rate": 9.976972234779188e-06, "loss": 0.0074, "step": 15090 }, { "epoch": 0.25519042106419476, "grad_norm": 0.0630689412355423, "learning_rate": 9.976830635333958e-06, "loss": 0.005, "step": 15100 }, { "epoch": 0.25535942134304523, "grad_norm": 0.07248848676681519, "learning_rate": 9.97668860288035e-06, "loss": 0.0033, "step": 15110 }, { "epoch": 0.25552842162189565, "grad_norm": 0.321814626455307, "learning_rate": 9.97654613743072e-06, "loss": 0.0048, "step": 15120 }, { "epoch": 0.2556974219007461, "grad_norm": 0.09801140427589417, "learning_rate": 9.976403238997466e-06, "loss": 0.0043, "step": 15130 }, { "epoch": 0.2558664221795966, "grad_norm": 0.09544934332370758, "learning_rate": 9.976259907593017e-06, "loss": 0.0035, "step": 15140 }, { "epoch": 0.25603542245844707, "grad_norm": 0.16570298373699188, "learning_rate": 9.976116143229846e-06, "loss": 0.003, "step": 15150 }, { "epoch": 0.25620442273729754, "grad_norm": 0.26754939556121826, "learning_rate": 9.975971945920459e-06, "loss": 0.0046, "step": 15160 }, { "epoch": 0.25637342301614796, "grad_norm": 0.3961745798587799, "learning_rate": 9.975827315677406e-06, "loss": 0.004, "step": 15170 }, { "epoch": 0.25654242329499843, "grad_norm": 0.1366022229194641, "learning_rate": 9.975682252513269e-06, "loss": 0.0052, "step": 15180 }, { "epoch": 0.2567114235738489, "grad_norm": 0.3830682337284088, "learning_rate": 9.975536756440666e-06, "loss": 0.0032, "step": 15190 }, { "epoch": 0.2568804238526994, "grad_norm": 0.07213328778743744, "learning_rate": 9.975390827472258e-06, "loss": 0.0035, "step": 15200 }, { "epoch": 0.2570494241315498, "grad_norm": 0.14138446748256683, "learning_rate": 9.975244465620744e-06, "loss": 0.0038, "step": 15210 }, { "epoch": 0.25721842441040027, "grad_norm": 0.1475197672843933, "learning_rate": 9.975097670898855e-06, "loss": 0.0031, "step": 15220 }, { "epoch": 0.25738742468925074, "grad_norm": 0.013520384207367897, "learning_rate": 9.974950443319362e-06, "loss": 0.0044, "step": 15230 }, { "epoch": 0.2575564249681012, "grad_norm": 0.14274832606315613, "learning_rate": 9.974802782895077e-06, "loss": 0.003, "step": 15240 }, { "epoch": 0.25772542524695163, "grad_norm": 0.19510887563228607, "learning_rate": 9.974654689638847e-06, "loss": 0.0048, "step": 15250 }, { "epoch": 0.2578944255258021, "grad_norm": 0.12843619287014008, "learning_rate": 9.974506163563557e-06, "loss": 0.0047, "step": 15260 }, { "epoch": 0.2580634258046526, "grad_norm": 0.1665114313364029, "learning_rate": 9.974357204682127e-06, "loss": 0.004, "step": 15270 }, { "epoch": 0.25823242608350305, "grad_norm": 0.17540499567985535, "learning_rate": 9.97420781300752e-06, "loss": 0.0041, "step": 15280 }, { "epoch": 0.25840142636235347, "grad_norm": 0.061821773648262024, "learning_rate": 9.974057988552733e-06, "loss": 0.0047, "step": 15290 }, { "epoch": 0.25857042664120394, "grad_norm": 0.13247564435005188, "learning_rate": 9.9739077313308e-06, "loss": 0.0037, "step": 15300 }, { "epoch": 0.2587394269200544, "grad_norm": 0.21004728972911835, "learning_rate": 9.973757041354795e-06, "loss": 0.0037, "step": 15310 }, { "epoch": 0.2589084271989049, "grad_norm": 0.06903041154146194, "learning_rate": 9.97360591863783e-06, "loss": 0.0059, "step": 15320 }, { "epoch": 0.25907742747775536, "grad_norm": 0.11489465087652206, "learning_rate": 9.973454363193053e-06, "loss": 0.0039, "step": 15330 }, { "epoch": 0.2592464277566058, "grad_norm": 0.28296104073524475, "learning_rate": 9.973302375033648e-06, "loss": 0.0035, "step": 15340 }, { "epoch": 0.25941542803545625, "grad_norm": 0.038037847727537155, "learning_rate": 9.97314995417284e-06, "loss": 0.0036, "step": 15350 }, { "epoch": 0.2595844283143067, "grad_norm": 0.24455541372299194, "learning_rate": 9.972997100623892e-06, "loss": 0.0051, "step": 15360 }, { "epoch": 0.2597534285931572, "grad_norm": 0.1739683598279953, "learning_rate": 9.9728438144001e-06, "loss": 0.0057, "step": 15370 }, { "epoch": 0.2599224288720076, "grad_norm": 0.014664621092379093, "learning_rate": 9.972690095514802e-06, "loss": 0.0034, "step": 15380 }, { "epoch": 0.2600914291508581, "grad_norm": 0.13587848842144012, "learning_rate": 9.972535943981374e-06, "loss": 0.0029, "step": 15390 }, { "epoch": 0.26026042942970856, "grad_norm": 0.09054923802614212, "learning_rate": 9.972381359813227e-06, "loss": 0.0051, "step": 15400 }, { "epoch": 0.26042942970855903, "grad_norm": 0.14758436381816864, "learning_rate": 9.972226343023809e-06, "loss": 0.0034, "step": 15410 }, { "epoch": 0.26059842998740945, "grad_norm": 0.23704244196414948, "learning_rate": 9.972070893626607e-06, "loss": 0.0048, "step": 15420 }, { "epoch": 0.2607674302662599, "grad_norm": 0.11782558262348175, "learning_rate": 9.971915011635149e-06, "loss": 0.0038, "step": 15430 }, { "epoch": 0.2609364305451104, "grad_norm": 0.19053632020950317, "learning_rate": 9.971758697062994e-06, "loss": 0.0038, "step": 15440 }, { "epoch": 0.26110543082396087, "grad_norm": 0.02710811421275139, "learning_rate": 9.971601949923744e-06, "loss": 0.0044, "step": 15450 }, { "epoch": 0.26127443110281134, "grad_norm": 0.18848317861557007, "learning_rate": 9.971444770231036e-06, "loss": 0.0058, "step": 15460 }, { "epoch": 0.26144343138166176, "grad_norm": 0.09840314090251923, "learning_rate": 9.971287157998546e-06, "loss": 0.0035, "step": 15470 }, { "epoch": 0.26161243166051223, "grad_norm": 0.17986223101615906, "learning_rate": 9.971129113239988e-06, "loss": 0.0056, "step": 15480 }, { "epoch": 0.2617814319393627, "grad_norm": 0.10968855768442154, "learning_rate": 9.970970635969111e-06, "loss": 0.0032, "step": 15490 }, { "epoch": 0.2619504322182132, "grad_norm": 0.06092964857816696, "learning_rate": 9.970811726199702e-06, "loss": 0.0094, "step": 15500 }, { "epoch": 0.2621194324970636, "grad_norm": 0.04663922265172005, "learning_rate": 9.970652383945591e-06, "loss": 0.0034, "step": 15510 }, { "epoch": 0.26228843277591407, "grad_norm": 0.06795930862426758, "learning_rate": 9.970492609220638e-06, "loss": 0.0035, "step": 15520 }, { "epoch": 0.26245743305476454, "grad_norm": 0.2160760760307312, "learning_rate": 9.970332402038745e-06, "loss": 0.0046, "step": 15530 }, { "epoch": 0.262626433333615, "grad_norm": 0.15789270401000977, "learning_rate": 9.970171762413852e-06, "loss": 0.0044, "step": 15540 }, { "epoch": 0.26279543361246543, "grad_norm": 0.3337605893611908, "learning_rate": 9.970010690359935e-06, "loss": 0.0051, "step": 15550 }, { "epoch": 0.2629644338913159, "grad_norm": 0.013609246350824833, "learning_rate": 9.969849185891007e-06, "loss": 0.0034, "step": 15560 }, { "epoch": 0.2631334341701664, "grad_norm": 0.1721629798412323, "learning_rate": 9.96968724902112e-06, "loss": 0.0052, "step": 15570 }, { "epoch": 0.26330243444901685, "grad_norm": 0.07175387442111969, "learning_rate": 9.969524879764364e-06, "loss": 0.0045, "step": 15580 }, { "epoch": 0.2634714347278673, "grad_norm": 0.09726123511791229, "learning_rate": 9.969362078134867e-06, "loss": 0.006, "step": 15590 }, { "epoch": 0.26364043500671774, "grad_norm": 0.16029754281044006, "learning_rate": 9.96919884414679e-06, "loss": 0.0029, "step": 15600 }, { "epoch": 0.2638094352855682, "grad_norm": 0.14958487451076508, "learning_rate": 9.96903517781434e-06, "loss": 0.0041, "step": 15610 }, { "epoch": 0.2639784355644187, "grad_norm": 0.2625289857387543, "learning_rate": 9.96887107915175e-06, "loss": 0.0043, "step": 15620 }, { "epoch": 0.26414743584326916, "grad_norm": 0.011520381085574627, "learning_rate": 9.968706548173304e-06, "loss": 0.004, "step": 15630 }, { "epoch": 0.2643164361221196, "grad_norm": 0.38226380944252014, "learning_rate": 9.968541584893314e-06, "loss": 0.0042, "step": 15640 }, { "epoch": 0.26448543640097005, "grad_norm": 0.09610099345445633, "learning_rate": 9.968376189326131e-06, "loss": 0.0028, "step": 15650 }, { "epoch": 0.2646544366798205, "grad_norm": 0.21713967621326447, "learning_rate": 9.96821036148615e-06, "loss": 0.0039, "step": 15660 }, { "epoch": 0.264823436958671, "grad_norm": 0.4078729450702667, "learning_rate": 9.968044101387794e-06, "loss": 0.0071, "step": 15670 }, { "epoch": 0.2649924372375214, "grad_norm": 0.15719065070152283, "learning_rate": 9.967877409045533e-06, "loss": 0.0045, "step": 15680 }, { "epoch": 0.2651614375163719, "grad_norm": 0.22879549860954285, "learning_rate": 9.967710284473866e-06, "loss": 0.0042, "step": 15690 }, { "epoch": 0.26533043779522236, "grad_norm": 0.3180147409439087, "learning_rate": 9.967542727687334e-06, "loss": 0.0077, "step": 15700 }, { "epoch": 0.26549943807407284, "grad_norm": 0.05374595522880554, "learning_rate": 9.967374738700518e-06, "loss": 0.0026, "step": 15710 }, { "epoch": 0.2656684383529233, "grad_norm": 0.38689932227134705, "learning_rate": 9.967206317528032e-06, "loss": 0.0033, "step": 15720 }, { "epoch": 0.2658374386317737, "grad_norm": 0.09185245633125305, "learning_rate": 9.967037464184529e-06, "loss": 0.0041, "step": 15730 }, { "epoch": 0.2660064389106242, "grad_norm": 0.2640765905380249, "learning_rate": 9.9668681786847e-06, "loss": 0.0045, "step": 15740 }, { "epoch": 0.26617543918947467, "grad_norm": 0.12624865770339966, "learning_rate": 9.966698461043277e-06, "loss": 0.005, "step": 15750 }, { "epoch": 0.26634443946832514, "grad_norm": 0.20609816908836365, "learning_rate": 9.966528311275022e-06, "loss": 0.0037, "step": 15760 }, { "epoch": 0.26651343974717556, "grad_norm": 0.1872451901435852, "learning_rate": 9.96635772939474e-06, "loss": 0.0063, "step": 15770 }, { "epoch": 0.26668244002602604, "grad_norm": 0.11002341657876968, "learning_rate": 9.966186715417274e-06, "loss": 0.0052, "step": 15780 }, { "epoch": 0.2668514403048765, "grad_norm": 0.08211161196231842, "learning_rate": 9.966015269357502e-06, "loss": 0.0076, "step": 15790 }, { "epoch": 0.267020440583727, "grad_norm": 0.08170771598815918, "learning_rate": 9.96584339123034e-06, "loss": 0.0029, "step": 15800 }, { "epoch": 0.2671894408625774, "grad_norm": 0.11849153786897659, "learning_rate": 9.965671081050745e-06, "loss": 0.0057, "step": 15810 }, { "epoch": 0.26735844114142787, "grad_norm": 0.09837999939918518, "learning_rate": 9.965498338833705e-06, "loss": 0.0043, "step": 15820 }, { "epoch": 0.26752744142027834, "grad_norm": 0.4591045081615448, "learning_rate": 9.96532516459425e-06, "loss": 0.007, "step": 15830 }, { "epoch": 0.2676964416991288, "grad_norm": 0.29004165530204773, "learning_rate": 9.96515155834745e-06, "loss": 0.0033, "step": 15840 }, { "epoch": 0.2678654419779793, "grad_norm": 0.16533708572387695, "learning_rate": 9.964977520108407e-06, "loss": 0.0034, "step": 15850 }, { "epoch": 0.2680344422568297, "grad_norm": 0.26187756657600403, "learning_rate": 9.964803049892265e-06, "loss": 0.0049, "step": 15860 }, { "epoch": 0.2682034425356802, "grad_norm": 0.1001354232430458, "learning_rate": 9.964628147714202e-06, "loss": 0.004, "step": 15870 }, { "epoch": 0.26837244281453065, "grad_norm": 0.17999950051307678, "learning_rate": 9.964452813589435e-06, "loss": 0.0053, "step": 15880 }, { "epoch": 0.2685414430933811, "grad_norm": 0.1562132090330124, "learning_rate": 9.96427704753322e-06, "loss": 0.0032, "step": 15890 }, { "epoch": 0.26871044337223154, "grad_norm": 0.04589414224028587, "learning_rate": 9.96410084956085e-06, "loss": 0.0016, "step": 15900 }, { "epoch": 0.268879443651082, "grad_norm": 0.2619129419326782, "learning_rate": 9.963924219687655e-06, "loss": 0.005, "step": 15910 }, { "epoch": 0.2690484439299325, "grad_norm": 0.1960720419883728, "learning_rate": 9.963747157929e-06, "loss": 0.0046, "step": 15920 }, { "epoch": 0.26921744420878296, "grad_norm": 0.056682687252759933, "learning_rate": 9.963569664300294e-06, "loss": 0.0034, "step": 15930 }, { "epoch": 0.2693864444876334, "grad_norm": 0.1986301988363266, "learning_rate": 9.963391738816979e-06, "loss": 0.0027, "step": 15940 }, { "epoch": 0.26955544476648385, "grad_norm": 0.21491877734661102, "learning_rate": 9.963213381494532e-06, "loss": 0.006, "step": 15950 }, { "epoch": 0.2697244450453343, "grad_norm": 0.14494657516479492, "learning_rate": 9.963034592348476e-06, "loss": 0.0042, "step": 15960 }, { "epoch": 0.2698934453241848, "grad_norm": 0.11323204636573792, "learning_rate": 9.962855371394362e-06, "loss": 0.0042, "step": 15970 }, { "epoch": 0.2700624456030353, "grad_norm": 0.14095667004585266, "learning_rate": 9.962675718647788e-06, "loss": 0.0035, "step": 15980 }, { "epoch": 0.2702314458818857, "grad_norm": 0.14305680990219116, "learning_rate": 9.962495634124378e-06, "loss": 0.0026, "step": 15990 }, { "epoch": 0.27040044616073616, "grad_norm": 0.2864258289337158, "learning_rate": 9.962315117839807e-06, "loss": 0.0041, "step": 16000 }, { "epoch": 0.27056944643958664, "grad_norm": 0.07068732380867004, "learning_rate": 9.962134169809776e-06, "loss": 0.0027, "step": 16010 }, { "epoch": 0.2707384467184371, "grad_norm": 0.0339294970035553, "learning_rate": 9.961952790050031e-06, "loss": 0.0058, "step": 16020 }, { "epoch": 0.2709074469972875, "grad_norm": 0.15254400670528412, "learning_rate": 9.961770978576352e-06, "loss": 0.0059, "step": 16030 }, { "epoch": 0.271076447276138, "grad_norm": 0.18165895342826843, "learning_rate": 9.961588735404557e-06, "loss": 0.0042, "step": 16040 }, { "epoch": 0.2712454475549885, "grad_norm": 0.2015973925590515, "learning_rate": 9.961406060550503e-06, "loss": 0.0036, "step": 16050 }, { "epoch": 0.27141444783383895, "grad_norm": 0.14254651963710785, "learning_rate": 9.961222954030084e-06, "loss": 0.0029, "step": 16060 }, { "epoch": 0.27158344811268936, "grad_norm": 0.3237738013267517, "learning_rate": 9.961039415859228e-06, "loss": 0.0047, "step": 16070 }, { "epoch": 0.27175244839153984, "grad_norm": 0.03651570901274681, "learning_rate": 9.960855446053908e-06, "loss": 0.0059, "step": 16080 }, { "epoch": 0.2719214486703903, "grad_norm": 0.13540002703666687, "learning_rate": 9.960671044630129e-06, "loss": 0.0028, "step": 16090 }, { "epoch": 0.2720904489492408, "grad_norm": 0.1004335880279541, "learning_rate": 9.960486211603932e-06, "loss": 0.0044, "step": 16100 }, { "epoch": 0.2722594492280912, "grad_norm": 0.03541584312915802, "learning_rate": 9.960300946991402e-06, "loss": 0.0029, "step": 16110 }, { "epoch": 0.2724284495069417, "grad_norm": 0.2536197602748871, "learning_rate": 9.960115250808654e-06, "loss": 0.003, "step": 16120 }, { "epoch": 0.27259744978579215, "grad_norm": 0.3499818742275238, "learning_rate": 9.959929123071848e-06, "loss": 0.0057, "step": 16130 }, { "epoch": 0.2727664500646426, "grad_norm": 0.018729379400610924, "learning_rate": 9.959742563797178e-06, "loss": 0.0041, "step": 16140 }, { "epoch": 0.2729354503434931, "grad_norm": 0.20778696238994598, "learning_rate": 9.959555573000874e-06, "loss": 0.0038, "step": 16150 }, { "epoch": 0.2731044506223435, "grad_norm": 0.03607981279492378, "learning_rate": 9.959368150699205e-06, "loss": 0.0048, "step": 16160 }, { "epoch": 0.273273450901194, "grad_norm": 0.04568514972925186, "learning_rate": 9.95918029690848e-06, "loss": 0.0036, "step": 16170 }, { "epoch": 0.27344245118004445, "grad_norm": 0.08559437096118927, "learning_rate": 9.958992011645038e-06, "loss": 0.0034, "step": 16180 }, { "epoch": 0.2736114514588949, "grad_norm": 0.027680907398462296, "learning_rate": 9.958803294925268e-06, "loss": 0.0036, "step": 16190 }, { "epoch": 0.27378045173774535, "grad_norm": 0.092922143638134, "learning_rate": 9.958614146765583e-06, "loss": 0.0029, "step": 16200 }, { "epoch": 0.2739494520165958, "grad_norm": 0.14137724041938782, "learning_rate": 9.958424567182443e-06, "loss": 0.0056, "step": 16210 }, { "epoch": 0.2741184522954463, "grad_norm": 0.19524511694908142, "learning_rate": 9.95823455619234e-06, "loss": 0.0022, "step": 16220 }, { "epoch": 0.27428745257429676, "grad_norm": 0.0908576250076294, "learning_rate": 9.95804411381181e-06, "loss": 0.0026, "step": 16230 }, { "epoch": 0.2744564528531472, "grad_norm": 0.16146492958068848, "learning_rate": 9.957853240057418e-06, "loss": 0.0034, "step": 16240 }, { "epoch": 0.27462545313199765, "grad_norm": 0.23391632735729218, "learning_rate": 9.957661934945773e-06, "loss": 0.0046, "step": 16250 }, { "epoch": 0.2747944534108481, "grad_norm": 0.20969712734222412, "learning_rate": 9.95747019849352e-06, "loss": 0.0047, "step": 16260 }, { "epoch": 0.2749634536896986, "grad_norm": 0.026646453887224197, "learning_rate": 9.957278030717338e-06, "loss": 0.0022, "step": 16270 }, { "epoch": 0.2751324539685491, "grad_norm": 0.10912485420703888, "learning_rate": 9.957085431633953e-06, "loss": 0.0026, "step": 16280 }, { "epoch": 0.2753014542473995, "grad_norm": 0.29427626729011536, "learning_rate": 9.956892401260115e-06, "loss": 0.0045, "step": 16290 }, { "epoch": 0.27547045452624996, "grad_norm": 0.12857367098331451, "learning_rate": 9.956698939612622e-06, "loss": 0.0033, "step": 16300 }, { "epoch": 0.27563945480510044, "grad_norm": 0.16636571288108826, "learning_rate": 9.956505046708304e-06, "loss": 0.0052, "step": 16310 }, { "epoch": 0.2758084550839509, "grad_norm": 0.10839074850082397, "learning_rate": 9.956310722564033e-06, "loss": 0.0036, "step": 16320 }, { "epoch": 0.2759774553628013, "grad_norm": 0.11550406366586685, "learning_rate": 9.956115967196716e-06, "loss": 0.0037, "step": 16330 }, { "epoch": 0.2761464556416518, "grad_norm": 0.10233989357948303, "learning_rate": 9.955920780623296e-06, "loss": 0.0042, "step": 16340 }, { "epoch": 0.2763154559205023, "grad_norm": 0.20577624440193176, "learning_rate": 9.955725162860758e-06, "loss": 0.0025, "step": 16350 }, { "epoch": 0.27648445619935275, "grad_norm": 0.16312482953071594, "learning_rate": 9.955529113926119e-06, "loss": 0.0054, "step": 16360 }, { "epoch": 0.27665345647820316, "grad_norm": 0.14550970494747162, "learning_rate": 9.955332633836435e-06, "loss": 0.0054, "step": 16370 }, { "epoch": 0.27682245675705364, "grad_norm": 0.35984915494918823, "learning_rate": 9.955135722608804e-06, "loss": 0.0052, "step": 16380 }, { "epoch": 0.2769914570359041, "grad_norm": 0.199117511510849, "learning_rate": 9.954938380260357e-06, "loss": 0.0045, "step": 16390 }, { "epoch": 0.2771604573147546, "grad_norm": 0.2786180377006531, "learning_rate": 9.954740606808265e-06, "loss": 0.0045, "step": 16400 }, { "epoch": 0.27732945759360506, "grad_norm": 0.08469782024621964, "learning_rate": 9.95454240226973e-06, "loss": 0.0043, "step": 16410 }, { "epoch": 0.2774984578724555, "grad_norm": 0.10433948785066605, "learning_rate": 9.954343766662004e-06, "loss": 0.0037, "step": 16420 }, { "epoch": 0.27766745815130595, "grad_norm": 0.3268999457359314, "learning_rate": 9.954144700002367e-06, "loss": 0.0046, "step": 16430 }, { "epoch": 0.2778364584301564, "grad_norm": 0.06011407449841499, "learning_rate": 9.953945202308135e-06, "loss": 0.0039, "step": 16440 }, { "epoch": 0.2780054587090069, "grad_norm": 0.05640736222267151, "learning_rate": 9.953745273596669e-06, "loss": 0.004, "step": 16450 }, { "epoch": 0.2781744589878573, "grad_norm": 0.11180564761161804, "learning_rate": 9.953544913885362e-06, "loss": 0.0052, "step": 16460 }, { "epoch": 0.2783434592667078, "grad_norm": 0.030079906806349754, "learning_rate": 9.953344123191649e-06, "loss": 0.0026, "step": 16470 }, { "epoch": 0.27851245954555826, "grad_norm": 0.22607122361660004, "learning_rate": 9.953142901532996e-06, "loss": 0.004, "step": 16480 }, { "epoch": 0.27868145982440873, "grad_norm": 0.04031490907073021, "learning_rate": 9.952941248926913e-06, "loss": 0.0056, "step": 16490 }, { "epoch": 0.27885046010325915, "grad_norm": 0.047156427055597305, "learning_rate": 9.952739165390944e-06, "loss": 0.0033, "step": 16500 }, { "epoch": 0.2790194603821096, "grad_norm": 0.01596965454518795, "learning_rate": 9.95253665094267e-06, "loss": 0.0017, "step": 16510 }, { "epoch": 0.2791884606609601, "grad_norm": 0.19963178038597107, "learning_rate": 9.952333705599712e-06, "loss": 0.0047, "step": 16520 }, { "epoch": 0.27935746093981056, "grad_norm": 0.07075642049312592, "learning_rate": 9.952130329379728e-06, "loss": 0.0033, "step": 16530 }, { "epoch": 0.27952646121866104, "grad_norm": 0.15401625633239746, "learning_rate": 9.95192652230041e-06, "loss": 0.0042, "step": 16540 }, { "epoch": 0.27969546149751146, "grad_norm": 0.15883556008338928, "learning_rate": 9.951722284379493e-06, "loss": 0.0035, "step": 16550 }, { "epoch": 0.27986446177636193, "grad_norm": 0.3158673346042633, "learning_rate": 9.951517615634745e-06, "loss": 0.0031, "step": 16560 }, { "epoch": 0.2800334620552124, "grad_norm": 0.07894056290388107, "learning_rate": 9.951312516083975e-06, "loss": 0.0033, "step": 16570 }, { "epoch": 0.2802024623340629, "grad_norm": 0.1073017567396164, "learning_rate": 9.951106985745024e-06, "loss": 0.0032, "step": 16580 }, { "epoch": 0.2803714626129133, "grad_norm": 0.0701015368103981, "learning_rate": 9.95090102463578e-06, "loss": 0.0037, "step": 16590 }, { "epoch": 0.28054046289176376, "grad_norm": 0.08924991637468338, "learning_rate": 9.950694632774157e-06, "loss": 0.0047, "step": 16600 }, { "epoch": 0.28070946317061424, "grad_norm": 0.1637372523546219, "learning_rate": 9.950487810178115e-06, "loss": 0.0028, "step": 16610 }, { "epoch": 0.2808784634494647, "grad_norm": 0.04116513207554817, "learning_rate": 9.950280556865649e-06, "loss": 0.0053, "step": 16620 }, { "epoch": 0.28104746372831513, "grad_norm": 0.14847958087921143, "learning_rate": 9.950072872854787e-06, "loss": 0.0024, "step": 16630 }, { "epoch": 0.2812164640071656, "grad_norm": 0.06650839000940323, "learning_rate": 9.949864758163603e-06, "loss": 0.0027, "step": 16640 }, { "epoch": 0.2813854642860161, "grad_norm": 0.0613737478852272, "learning_rate": 9.949656212810201e-06, "loss": 0.0046, "step": 16650 }, { "epoch": 0.28155446456486655, "grad_norm": 0.13115599751472473, "learning_rate": 9.94944723681273e-06, "loss": 0.0041, "step": 16660 }, { "epoch": 0.281723464843717, "grad_norm": 0.14419174194335938, "learning_rate": 9.949237830189366e-06, "loss": 0.0045, "step": 16670 }, { "epoch": 0.28189246512256744, "grad_norm": 0.12032968550920486, "learning_rate": 9.949027992958333e-06, "loss": 0.0029, "step": 16680 }, { "epoch": 0.2820614654014179, "grad_norm": 0.2870495319366455, "learning_rate": 9.948817725137884e-06, "loss": 0.0047, "step": 16690 }, { "epoch": 0.2822304656802684, "grad_norm": 0.1272762268781662, "learning_rate": 9.948607026746316e-06, "loss": 0.0047, "step": 16700 }, { "epoch": 0.28239946595911886, "grad_norm": 0.2602250874042511, "learning_rate": 9.948395897801962e-06, "loss": 0.0046, "step": 16710 }, { "epoch": 0.2825684662379693, "grad_norm": 0.01893269270658493, "learning_rate": 9.948184338323188e-06, "loss": 0.0033, "step": 16720 }, { "epoch": 0.28273746651681975, "grad_norm": 0.1346733719110489, "learning_rate": 9.9479723483284e-06, "loss": 0.0032, "step": 16730 }, { "epoch": 0.2829064667956702, "grad_norm": 0.08700961619615555, "learning_rate": 9.947759927836048e-06, "loss": 0.0035, "step": 16740 }, { "epoch": 0.2830754670745207, "grad_norm": 0.15652678906917572, "learning_rate": 9.947547076864607e-06, "loss": 0.0038, "step": 16750 }, { "epoch": 0.2832444673533711, "grad_norm": 0.20715713500976562, "learning_rate": 9.9473337954326e-06, "loss": 0.0062, "step": 16760 }, { "epoch": 0.2834134676322216, "grad_norm": 0.2449064552783966, "learning_rate": 9.947120083558582e-06, "loss": 0.0028, "step": 16770 }, { "epoch": 0.28358246791107206, "grad_norm": 0.11783338338136673, "learning_rate": 9.946905941261148e-06, "loss": 0.0056, "step": 16780 }, { "epoch": 0.28375146818992253, "grad_norm": 0.1863008588552475, "learning_rate": 9.946691368558929e-06, "loss": 0.0034, "step": 16790 }, { "epoch": 0.28392046846877295, "grad_norm": 0.23544996976852417, "learning_rate": 9.946476365470591e-06, "loss": 0.0068, "step": 16800 }, { "epoch": 0.2840894687476234, "grad_norm": 0.2011137753725052, "learning_rate": 9.946260932014847e-06, "loss": 0.0038, "step": 16810 }, { "epoch": 0.2842584690264739, "grad_norm": 0.17312456667423248, "learning_rate": 9.946045068210434e-06, "loss": 0.0035, "step": 16820 }, { "epoch": 0.28442746930532437, "grad_norm": 0.1152682825922966, "learning_rate": 9.945828774076138e-06, "loss": 0.0022, "step": 16830 }, { "epoch": 0.28459646958417484, "grad_norm": 0.1757705807685852, "learning_rate": 9.945612049630774e-06, "loss": 0.0036, "step": 16840 }, { "epoch": 0.28476546986302526, "grad_norm": 0.06967198848724365, "learning_rate": 9.9453948948932e-06, "loss": 0.0026, "step": 16850 }, { "epoch": 0.28493447014187573, "grad_norm": 0.0472136065363884, "learning_rate": 9.94517730988231e-06, "loss": 0.0037, "step": 16860 }, { "epoch": 0.2851034704207262, "grad_norm": 0.10674723237752914, "learning_rate": 9.944959294617031e-06, "loss": 0.0043, "step": 16870 }, { "epoch": 0.2852724706995767, "grad_norm": 0.24035988748073578, "learning_rate": 9.944740849116338e-06, "loss": 0.0041, "step": 16880 }, { "epoch": 0.2854414709784271, "grad_norm": 0.3217427730560303, "learning_rate": 9.944521973399233e-06, "loss": 0.0041, "step": 16890 }, { "epoch": 0.28561047125727757, "grad_norm": 0.21625342965126038, "learning_rate": 9.944302667484757e-06, "loss": 0.0033, "step": 16900 }, { "epoch": 0.28577947153612804, "grad_norm": 0.18237721920013428, "learning_rate": 9.944082931391997e-06, "loss": 0.0042, "step": 16910 }, { "epoch": 0.2859484718149785, "grad_norm": 0.11755727231502533, "learning_rate": 9.943862765140065e-06, "loss": 0.004, "step": 16920 }, { "epoch": 0.28611747209382893, "grad_norm": 0.1553824096918106, "learning_rate": 9.943642168748117e-06, "loss": 0.0064, "step": 16930 }, { "epoch": 0.2862864723726794, "grad_norm": 0.1014246940612793, "learning_rate": 9.94342114223535e-06, "loss": 0.0065, "step": 16940 }, { "epoch": 0.2864554726515299, "grad_norm": 0.13518716394901276, "learning_rate": 9.943199685620992e-06, "loss": 0.0041, "step": 16950 }, { "epoch": 0.28662447293038035, "grad_norm": 0.2961231768131256, "learning_rate": 9.942977798924312e-06, "loss": 0.0026, "step": 16960 }, { "epoch": 0.2867934732092308, "grad_norm": 0.24852749705314636, "learning_rate": 9.942755482164612e-06, "loss": 0.0048, "step": 16970 }, { "epoch": 0.28696247348808124, "grad_norm": 0.09043409675359726, "learning_rate": 9.94253273536124e-06, "loss": 0.0025, "step": 16980 }, { "epoch": 0.2871314737669317, "grad_norm": 0.08916544169187546, "learning_rate": 9.942309558533569e-06, "loss": 0.0062, "step": 16990 }, { "epoch": 0.2873004740457822, "grad_norm": 0.1598898470401764, "learning_rate": 9.942085951701024e-06, "loss": 0.0044, "step": 17000 }, { "epoch": 0.28746947432463266, "grad_norm": 0.13179609179496765, "learning_rate": 9.941861914883055e-06, "loss": 0.003, "step": 17010 }, { "epoch": 0.2876384746034831, "grad_norm": 0.1741129755973816, "learning_rate": 9.941637448099155e-06, "loss": 0.0036, "step": 17020 }, { "epoch": 0.28780747488233355, "grad_norm": 0.08615001291036606, "learning_rate": 9.941412551368854e-06, "loss": 0.0174, "step": 17030 }, { "epoch": 0.287976475161184, "grad_norm": 0.17396552860736847, "learning_rate": 9.941187224711719e-06, "loss": 0.0055, "step": 17040 }, { "epoch": 0.2881454754400345, "grad_norm": 0.08446299284696579, "learning_rate": 9.940961468147356e-06, "loss": 0.0025, "step": 17050 }, { "epoch": 0.2883144757188849, "grad_norm": 0.18342605233192444, "learning_rate": 9.940735281695406e-06, "loss": 0.0037, "step": 17060 }, { "epoch": 0.2884834759977354, "grad_norm": 0.1513669490814209, "learning_rate": 9.940508665375547e-06, "loss": 0.0038, "step": 17070 }, { "epoch": 0.28865247627658586, "grad_norm": 0.13163068890571594, "learning_rate": 9.940281619207497e-06, "loss": 0.0026, "step": 17080 }, { "epoch": 0.28882147655543633, "grad_norm": 0.09194418042898178, "learning_rate": 9.94005414321101e-06, "loss": 0.0026, "step": 17090 }, { "epoch": 0.2889904768342868, "grad_norm": 0.10978283733129501, "learning_rate": 9.939826237405878e-06, "loss": 0.0032, "step": 17100 }, { "epoch": 0.2891594771131372, "grad_norm": 0.09779514372348785, "learning_rate": 9.939597901811929e-06, "loss": 0.0055, "step": 17110 }, { "epoch": 0.2893284773919877, "grad_norm": 0.13635870814323425, "learning_rate": 9.939369136449029e-06, "loss": 0.0034, "step": 17120 }, { "epoch": 0.28949747767083817, "grad_norm": 0.1415141522884369, "learning_rate": 9.939139941337084e-06, "loss": 0.0028, "step": 17130 }, { "epoch": 0.28966647794968864, "grad_norm": 0.8230068683624268, "learning_rate": 9.93891031649603e-06, "loss": 0.0057, "step": 17140 }, { "epoch": 0.28983547822853906, "grad_norm": 0.15510357916355133, "learning_rate": 9.938680261945853e-06, "loss": 0.0047, "step": 17150 }, { "epoch": 0.29000447850738953, "grad_norm": 0.07704704254865646, "learning_rate": 9.938449777706562e-06, "loss": 0.0046, "step": 17160 }, { "epoch": 0.29017347878624, "grad_norm": 0.1460547000169754, "learning_rate": 9.938218863798215e-06, "loss": 0.0042, "step": 17170 }, { "epoch": 0.2903424790650905, "grad_norm": 0.16609102487564087, "learning_rate": 9.937987520240899e-06, "loss": 0.0043, "step": 17180 }, { "epoch": 0.2905114793439409, "grad_norm": 0.05784421041607857, "learning_rate": 9.937755747054745e-06, "loss": 0.0033, "step": 17190 }, { "epoch": 0.29068047962279137, "grad_norm": 0.23304054141044617, "learning_rate": 9.937523544259915e-06, "loss": 0.0037, "step": 17200 }, { "epoch": 0.29084947990164184, "grad_norm": 0.0587453655898571, "learning_rate": 9.937290911876616e-06, "loss": 0.0041, "step": 17210 }, { "epoch": 0.2910184801804923, "grad_norm": 0.14351072907447815, "learning_rate": 9.937057849925084e-06, "loss": 0.0033, "step": 17220 }, { "epoch": 0.2911874804593428, "grad_norm": 0.07545598596334457, "learning_rate": 9.9368243584256e-06, "loss": 0.0028, "step": 17230 }, { "epoch": 0.2913564807381932, "grad_norm": 0.21592868864536285, "learning_rate": 9.936590437398476e-06, "loss": 0.0038, "step": 17240 }, { "epoch": 0.2915254810170437, "grad_norm": 0.18980614840984344, "learning_rate": 9.936356086864067e-06, "loss": 0.0042, "step": 17250 }, { "epoch": 0.29169448129589415, "grad_norm": 0.09946509450674057, "learning_rate": 9.936121306842761e-06, "loss": 0.0034, "step": 17260 }, { "epoch": 0.2918634815747446, "grad_norm": 0.09308329969644547, "learning_rate": 9.935886097354985e-06, "loss": 0.0039, "step": 17270 }, { "epoch": 0.29203248185359504, "grad_norm": 0.11000710725784302, "learning_rate": 9.935650458421202e-06, "loss": 0.0029, "step": 17280 }, { "epoch": 0.2922014821324455, "grad_norm": 0.12374405562877655, "learning_rate": 9.935414390061918e-06, "loss": 0.0041, "step": 17290 }, { "epoch": 0.292370482411296, "grad_norm": 0.08498592674732208, "learning_rate": 9.935177892297668e-06, "loss": 0.004, "step": 17300 }, { "epoch": 0.29253948269014646, "grad_norm": 0.1327739804983139, "learning_rate": 9.93494096514903e-06, "loss": 0.0042, "step": 17310 }, { "epoch": 0.2927084829689969, "grad_norm": 0.0903698205947876, "learning_rate": 9.934703608636618e-06, "loss": 0.0042, "step": 17320 }, { "epoch": 0.29287748324784735, "grad_norm": 0.1082349494099617, "learning_rate": 9.934465822781083e-06, "loss": 0.0023, "step": 17330 }, { "epoch": 0.2930464835266978, "grad_norm": 0.14297062158584595, "learning_rate": 9.934227607603112e-06, "loss": 0.0048, "step": 17340 }, { "epoch": 0.2932154838055483, "grad_norm": 0.013389919884502888, "learning_rate": 9.933988963123435e-06, "loss": 0.0043, "step": 17350 }, { "epoch": 0.29338448408439877, "grad_norm": 0.30560749769210815, "learning_rate": 9.93374988936281e-06, "loss": 0.0037, "step": 17360 }, { "epoch": 0.2935534843632492, "grad_norm": 0.1624475121498108, "learning_rate": 9.93351038634204e-06, "loss": 0.0025, "step": 17370 }, { "epoch": 0.29372248464209966, "grad_norm": 0.04842815920710564, "learning_rate": 9.933270454081964e-06, "loss": 0.0026, "step": 17380 }, { "epoch": 0.29389148492095013, "grad_norm": 0.12428752332925797, "learning_rate": 9.933030092603458e-06, "loss": 0.0034, "step": 17390 }, { "epoch": 0.2940604851998006, "grad_norm": 0.05903220921754837, "learning_rate": 9.93278930192743e-06, "loss": 0.0022, "step": 17400 }, { "epoch": 0.294229485478651, "grad_norm": 0.15623825788497925, "learning_rate": 9.932548082074833e-06, "loss": 0.0051, "step": 17410 }, { "epoch": 0.2943984857575015, "grad_norm": 0.16055922210216522, "learning_rate": 9.932306433066656e-06, "loss": 0.0041, "step": 17420 }, { "epoch": 0.29456748603635197, "grad_norm": 0.3821702301502228, "learning_rate": 9.932064354923921e-06, "loss": 0.0057, "step": 17430 }, { "epoch": 0.29473648631520244, "grad_norm": 0.37602075934410095, "learning_rate": 9.93182184766769e-06, "loss": 0.0031, "step": 17440 }, { "epoch": 0.29490548659405286, "grad_norm": 0.11960271745920181, "learning_rate": 9.931578911319063e-06, "loss": 0.0039, "step": 17450 }, { "epoch": 0.29507448687290333, "grad_norm": 0.4761727452278137, "learning_rate": 9.931335545899177e-06, "loss": 0.0043, "step": 17460 }, { "epoch": 0.2952434871517538, "grad_norm": 0.17506860196590424, "learning_rate": 9.931091751429207e-06, "loss": 0.0032, "step": 17470 }, { "epoch": 0.2954124874306043, "grad_norm": 0.12806014716625214, "learning_rate": 9.93084752793036e-06, "loss": 0.0046, "step": 17480 }, { "epoch": 0.2955814877094547, "grad_norm": 0.11130823940038681, "learning_rate": 9.930602875423889e-06, "loss": 0.0039, "step": 17490 }, { "epoch": 0.29575048798830517, "grad_norm": 0.09847419708967209, "learning_rate": 9.930357793931081e-06, "loss": 0.004, "step": 17500 }, { "epoch": 0.29591948826715564, "grad_norm": 0.2048054039478302, "learning_rate": 9.930112283473253e-06, "loss": 0.0032, "step": 17510 }, { "epoch": 0.2960884885460061, "grad_norm": 0.41881263256073, "learning_rate": 9.92986634407177e-06, "loss": 0.0033, "step": 17520 }, { "epoch": 0.2962574888248566, "grad_norm": 0.13665935397148132, "learning_rate": 9.92961997574803e-06, "loss": 0.0041, "step": 17530 }, { "epoch": 0.296426489103707, "grad_norm": 0.1357094794511795, "learning_rate": 9.929373178523469e-06, "loss": 0.0034, "step": 17540 }, { "epoch": 0.2965954893825575, "grad_norm": 0.11714345961809158, "learning_rate": 9.929125952419558e-06, "loss": 0.0031, "step": 17550 }, { "epoch": 0.29676448966140795, "grad_norm": 0.21937525272369385, "learning_rate": 9.928878297457804e-06, "loss": 0.0038, "step": 17560 }, { "epoch": 0.2969334899402584, "grad_norm": 0.5983049273490906, "learning_rate": 9.92863021365976e-06, "loss": 0.0035, "step": 17570 }, { "epoch": 0.29710249021910884, "grad_norm": 0.06767556816339493, "learning_rate": 9.928381701047006e-06, "loss": 0.0049, "step": 17580 }, { "epoch": 0.2972714904979593, "grad_norm": 0.13499832153320312, "learning_rate": 9.928132759641166e-06, "loss": 0.0045, "step": 17590 }, { "epoch": 0.2974404907768098, "grad_norm": 0.26578205823898315, "learning_rate": 9.9278833894639e-06, "loss": 0.0046, "step": 17600 }, { "epoch": 0.29760949105566026, "grad_norm": 0.06498241424560547, "learning_rate": 9.9276335905369e-06, "loss": 0.0046, "step": 17610 }, { "epoch": 0.2977784913345107, "grad_norm": 0.1358170062303543, "learning_rate": 9.927383362881906e-06, "loss": 0.0038, "step": 17620 }, { "epoch": 0.29794749161336115, "grad_norm": 0.16412805020809174, "learning_rate": 9.927132706520684e-06, "loss": 0.003, "step": 17630 }, { "epoch": 0.2981164918922116, "grad_norm": 0.07152877002954483, "learning_rate": 9.926881621475044e-06, "loss": 0.0044, "step": 17640 }, { "epoch": 0.2982854921710621, "grad_norm": 0.12229660153388977, "learning_rate": 9.92663010776683e-06, "loss": 0.0027, "step": 17650 }, { "epoch": 0.29845449244991257, "grad_norm": 0.2264779806137085, "learning_rate": 9.926378165417928e-06, "loss": 0.0025, "step": 17660 }, { "epoch": 0.298623492728763, "grad_norm": 0.18397444486618042, "learning_rate": 9.926125794450258e-06, "loss": 0.002, "step": 17670 }, { "epoch": 0.29879249300761346, "grad_norm": 0.022286036983132362, "learning_rate": 9.925872994885776e-06, "loss": 0.0028, "step": 17680 }, { "epoch": 0.29896149328646393, "grad_norm": 0.18695835769176483, "learning_rate": 9.925619766746476e-06, "loss": 0.0033, "step": 17690 }, { "epoch": 0.2991304935653144, "grad_norm": 0.14758239686489105, "learning_rate": 9.925366110054391e-06, "loss": 0.0044, "step": 17700 }, { "epoch": 0.2992994938441648, "grad_norm": 0.0414218008518219, "learning_rate": 9.925112024831591e-06, "loss": 0.0029, "step": 17710 }, { "epoch": 0.2994684941230153, "grad_norm": 0.23307713866233826, "learning_rate": 9.924857511100181e-06, "loss": 0.0042, "step": 17720 }, { "epoch": 0.29963749440186577, "grad_norm": 0.11337589472532272, "learning_rate": 9.924602568882308e-06, "loss": 0.0031, "step": 17730 }, { "epoch": 0.29980649468071624, "grad_norm": 0.10292692482471466, "learning_rate": 9.92434719820015e-06, "loss": 0.0041, "step": 17740 }, { "epoch": 0.29997549495956666, "grad_norm": 0.1608804613351822, "learning_rate": 9.924091399075928e-06, "loss": 0.0048, "step": 17750 }, { "epoch": 0.30014449523841713, "grad_norm": 0.12451475858688354, "learning_rate": 9.923835171531896e-06, "loss": 0.0039, "step": 17760 }, { "epoch": 0.3003134955172676, "grad_norm": 0.35020482540130615, "learning_rate": 9.923578515590347e-06, "loss": 0.0028, "step": 17770 }, { "epoch": 0.3004824957961181, "grad_norm": 0.14740000665187836, "learning_rate": 9.923321431273611e-06, "loss": 0.0063, "step": 17780 }, { "epoch": 0.30065149607496855, "grad_norm": 0.1438693404197693, "learning_rate": 9.923063918604057e-06, "loss": 0.0032, "step": 17790 }, { "epoch": 0.30082049635381897, "grad_norm": 0.15545758605003357, "learning_rate": 9.92280597760409e-06, "loss": 0.0055, "step": 17800 }, { "epoch": 0.30098949663266944, "grad_norm": 0.057372089475393295, "learning_rate": 9.922547608296151e-06, "loss": 0.0027, "step": 17810 }, { "epoch": 0.3011584969115199, "grad_norm": 0.08195595443248749, "learning_rate": 9.92228881070272e-06, "loss": 0.0024, "step": 17820 }, { "epoch": 0.3013274971903704, "grad_norm": 0.13404421508312225, "learning_rate": 9.922029584846314e-06, "loss": 0.0027, "step": 17830 }, { "epoch": 0.3014964974692208, "grad_norm": 0.0030572651885449886, "learning_rate": 9.921769930749485e-06, "loss": 0.0032, "step": 17840 }, { "epoch": 0.3016654977480713, "grad_norm": 0.08229197561740875, "learning_rate": 9.921509848434827e-06, "loss": 0.0036, "step": 17850 }, { "epoch": 0.30183449802692175, "grad_norm": 0.07791959494352341, "learning_rate": 9.921249337924966e-06, "loss": 0.0031, "step": 17860 }, { "epoch": 0.3020034983057722, "grad_norm": 0.23877891898155212, "learning_rate": 9.920988399242569e-06, "loss": 0.0087, "step": 17870 }, { "epoch": 0.30217249858462264, "grad_norm": 0.0586976483464241, "learning_rate": 9.920727032410337e-06, "loss": 0.0033, "step": 17880 }, { "epoch": 0.3023414988634731, "grad_norm": 0.06728573143482208, "learning_rate": 9.920465237451013e-06, "loss": 0.003, "step": 17890 }, { "epoch": 0.3025104991423236, "grad_norm": 0.1478877067565918, "learning_rate": 9.920203014387373e-06, "loss": 0.0033, "step": 17900 }, { "epoch": 0.30267949942117406, "grad_norm": 0.08358483016490936, "learning_rate": 9.919940363242233e-06, "loss": 0.0041, "step": 17910 }, { "epoch": 0.30284849970002453, "grad_norm": 0.12691207230091095, "learning_rate": 9.919677284038443e-06, "loss": 0.0036, "step": 17920 }, { "epoch": 0.30301749997887495, "grad_norm": 0.056970302015542984, "learning_rate": 9.919413776798892e-06, "loss": 0.0034, "step": 17930 }, { "epoch": 0.3031865002577254, "grad_norm": 0.11025747656822205, "learning_rate": 9.919149841546509e-06, "loss": 0.0033, "step": 17940 }, { "epoch": 0.3033555005365759, "grad_norm": 0.15607964992523193, "learning_rate": 9.918885478304253e-06, "loss": 0.0033, "step": 17950 }, { "epoch": 0.30352450081542637, "grad_norm": 0.1555921882390976, "learning_rate": 9.91862068709513e-06, "loss": 0.0025, "step": 17960 }, { "epoch": 0.3036935010942768, "grad_norm": 0.09082618355751038, "learning_rate": 9.918355467942176e-06, "loss": 0.0023, "step": 17970 }, { "epoch": 0.30386250137312726, "grad_norm": 0.3090972900390625, "learning_rate": 9.918089820868466e-06, "loss": 0.0019, "step": 17980 }, { "epoch": 0.30403150165197773, "grad_norm": 0.057993724942207336, "learning_rate": 9.917823745897113e-06, "loss": 0.0031, "step": 17990 }, { "epoch": 0.3042005019308282, "grad_norm": 0.06888581812381744, "learning_rate": 9.917557243051266e-06, "loss": 0.004, "step": 18000 }, { "epoch": 0.3043695022096786, "grad_norm": 0.13699199259281158, "learning_rate": 9.917290312354113e-06, "loss": 0.0028, "step": 18010 }, { "epoch": 0.3045385024885291, "grad_norm": 0.1704409122467041, "learning_rate": 9.917022953828879e-06, "loss": 0.0052, "step": 18020 }, { "epoch": 0.30470750276737957, "grad_norm": 0.24415592849254608, "learning_rate": 9.916755167498824e-06, "loss": 0.0046, "step": 18030 }, { "epoch": 0.30487650304623004, "grad_norm": 0.09228435158729553, "learning_rate": 9.916486953387247e-06, "loss": 0.0037, "step": 18040 }, { "epoch": 0.3050455033250805, "grad_norm": 0.24966703355312347, "learning_rate": 9.916218311517484e-06, "loss": 0.0032, "step": 18050 }, { "epoch": 0.30521450360393093, "grad_norm": 0.07386928051710129, "learning_rate": 9.915949241912908e-06, "loss": 0.0035, "step": 18060 }, { "epoch": 0.3053835038827814, "grad_norm": 0.183248370885849, "learning_rate": 9.91567974459693e-06, "loss": 0.006, "step": 18070 }, { "epoch": 0.3055525041616319, "grad_norm": 0.3374176621437073, "learning_rate": 9.915409819592998e-06, "loss": 0.0043, "step": 18080 }, { "epoch": 0.30572150444048235, "grad_norm": 0.034306954592466354, "learning_rate": 9.915139466924594e-06, "loss": 0.0044, "step": 18090 }, { "epoch": 0.30589050471933277, "grad_norm": 0.2904483675956726, "learning_rate": 9.914868686615244e-06, "loss": 0.0037, "step": 18100 }, { "epoch": 0.30605950499818324, "grad_norm": 0.18893156945705414, "learning_rate": 9.914597478688503e-06, "loss": 0.0033, "step": 18110 }, { "epoch": 0.3062285052770337, "grad_norm": 0.22676680982112885, "learning_rate": 9.91432584316797e-06, "loss": 0.0047, "step": 18120 }, { "epoch": 0.3063975055558842, "grad_norm": 0.13738246262073517, "learning_rate": 9.91405378007728e-06, "loss": 0.0047, "step": 18130 }, { "epoch": 0.3065665058347346, "grad_norm": 0.08430465310811996, "learning_rate": 9.913781289440102e-06, "loss": 0.0024, "step": 18140 }, { "epoch": 0.3067355061135851, "grad_norm": 0.05394160374999046, "learning_rate": 9.913508371280143e-06, "loss": 0.0036, "step": 18150 }, { "epoch": 0.30690450639243555, "grad_norm": 0.12102994322776794, "learning_rate": 9.913235025621148e-06, "loss": 0.0035, "step": 18160 }, { "epoch": 0.307073506671286, "grad_norm": 0.20373289287090302, "learning_rate": 9.912961252486903e-06, "loss": 0.003, "step": 18170 }, { "epoch": 0.30724250695013644, "grad_norm": 0.33118996024131775, "learning_rate": 9.912687051901224e-06, "loss": 0.004, "step": 18180 }, { "epoch": 0.3074115072289869, "grad_norm": 0.1756182610988617, "learning_rate": 9.91241242388797e-06, "loss": 0.003, "step": 18190 }, { "epoch": 0.3075805075078374, "grad_norm": 0.3151039183139801, "learning_rate": 9.912137368471032e-06, "loss": 0.0035, "step": 18200 }, { "epoch": 0.30774950778668786, "grad_norm": 0.08164257556200027, "learning_rate": 9.911861885674344e-06, "loss": 0.004, "step": 18210 }, { "epoch": 0.30791850806553833, "grad_norm": 0.2701294422149658, "learning_rate": 9.911585975521873e-06, "loss": 0.0043, "step": 18220 }, { "epoch": 0.30808750834438875, "grad_norm": 0.16288606822490692, "learning_rate": 9.911309638037626e-06, "loss": 0.0034, "step": 18230 }, { "epoch": 0.3082565086232392, "grad_norm": 0.08081130683422089, "learning_rate": 9.911032873245645e-06, "loss": 0.0033, "step": 18240 }, { "epoch": 0.3084255089020897, "grad_norm": 0.1643926352262497, "learning_rate": 9.910755681170009e-06, "loss": 0.0051, "step": 18250 }, { "epoch": 0.30859450918094017, "grad_norm": 0.0930451974272728, "learning_rate": 9.910478061834834e-06, "loss": 0.0039, "step": 18260 }, { "epoch": 0.3087635094597906, "grad_norm": 0.12892429530620575, "learning_rate": 9.910200015264278e-06, "loss": 0.0049, "step": 18270 }, { "epoch": 0.30893250973864106, "grad_norm": 0.14059489965438843, "learning_rate": 9.909921541482527e-06, "loss": 0.0049, "step": 18280 }, { "epoch": 0.30910151001749153, "grad_norm": 0.028928019106388092, "learning_rate": 9.909642640513817e-06, "loss": 0.0031, "step": 18290 }, { "epoch": 0.309270510296342, "grad_norm": 0.08975011855363846, "learning_rate": 9.909363312382408e-06, "loss": 0.0036, "step": 18300 }, { "epoch": 0.3094395105751924, "grad_norm": 0.2675141990184784, "learning_rate": 9.909083557112602e-06, "loss": 0.0116, "step": 18310 }, { "epoch": 0.3096085108540429, "grad_norm": 0.08267785608768463, "learning_rate": 9.908803374728744e-06, "loss": 0.0043, "step": 18320 }, { "epoch": 0.30977751113289337, "grad_norm": 0.17032286524772644, "learning_rate": 9.908522765255208e-06, "loss": 0.0026, "step": 18330 }, { "epoch": 0.30994651141174384, "grad_norm": 0.12598468363285065, "learning_rate": 9.90824172871641e-06, "loss": 0.0033, "step": 18340 }, { "epoch": 0.3101155116905943, "grad_norm": 0.14041945338249207, "learning_rate": 9.907960265136801e-06, "loss": 0.0043, "step": 18350 }, { "epoch": 0.31028451196944473, "grad_norm": 0.09343992173671722, "learning_rate": 9.907678374540866e-06, "loss": 0.0017, "step": 18360 }, { "epoch": 0.3104535122482952, "grad_norm": 0.080118827521801, "learning_rate": 9.907396056953137e-06, "loss": 0.003, "step": 18370 }, { "epoch": 0.3106225125271457, "grad_norm": 0.05156289041042328, "learning_rate": 9.907113312398173e-06, "loss": 0.0037, "step": 18380 }, { "epoch": 0.31079151280599615, "grad_norm": 0.17726033926010132, "learning_rate": 9.906830140900577e-06, "loss": 0.0066, "step": 18390 }, { "epoch": 0.31096051308484657, "grad_norm": 0.01746257022023201, "learning_rate": 9.906546542484984e-06, "loss": 0.0033, "step": 18400 }, { "epoch": 0.31112951336369704, "grad_norm": 0.14338521659374237, "learning_rate": 9.906262517176066e-06, "loss": 0.0019, "step": 18410 }, { "epoch": 0.3112985136425475, "grad_norm": 0.04118502512574196, "learning_rate": 9.90597806499854e-06, "loss": 0.0024, "step": 18420 }, { "epoch": 0.311467513921398, "grad_norm": 0.25993072986602783, "learning_rate": 9.905693185977152e-06, "loss": 0.0045, "step": 18430 }, { "epoch": 0.3116365142002484, "grad_norm": 0.2973000407218933, "learning_rate": 9.90540788013669e-06, "loss": 0.0035, "step": 18440 }, { "epoch": 0.3118055144790989, "grad_norm": 0.11956703662872314, "learning_rate": 9.905122147501973e-06, "loss": 0.0023, "step": 18450 }, { "epoch": 0.31197451475794935, "grad_norm": 0.11619943380355835, "learning_rate": 9.904835988097865e-06, "loss": 0.0022, "step": 18460 }, { "epoch": 0.3121435150367998, "grad_norm": 0.28381380438804626, "learning_rate": 9.90454940194926e-06, "loss": 0.0048, "step": 18470 }, { "epoch": 0.3123125153156503, "grad_norm": 0.403537392616272, "learning_rate": 9.904262389081093e-06, "loss": 0.0049, "step": 18480 }, { "epoch": 0.3124815155945007, "grad_norm": 0.042656026780605316, "learning_rate": 9.90397494951834e-06, "loss": 0.0039, "step": 18490 }, { "epoch": 0.3126505158733512, "grad_norm": 0.25615885853767395, "learning_rate": 9.903687083286003e-06, "loss": 0.0048, "step": 18500 }, { "epoch": 0.31281951615220166, "grad_norm": 0.16681556403636932, "learning_rate": 9.903398790409132e-06, "loss": 0.0039, "step": 18510 }, { "epoch": 0.31298851643105213, "grad_norm": 0.08900097012519836, "learning_rate": 9.90311007091281e-06, "loss": 0.0022, "step": 18520 }, { "epoch": 0.31315751670990255, "grad_norm": 0.26374733448028564, "learning_rate": 9.902820924822153e-06, "loss": 0.0039, "step": 18530 }, { "epoch": 0.313326516988753, "grad_norm": 0.11339973658323288, "learning_rate": 9.902531352162322e-06, "loss": 0.0041, "step": 18540 }, { "epoch": 0.3134955172676035, "grad_norm": 0.15740644931793213, "learning_rate": 9.902241352958511e-06, "loss": 0.0047, "step": 18550 }, { "epoch": 0.31366451754645397, "grad_norm": 0.1276281625032425, "learning_rate": 9.90195092723595e-06, "loss": 0.0031, "step": 18560 }, { "epoch": 0.3138335178253044, "grad_norm": 0.07520712167024612, "learning_rate": 9.901660075019907e-06, "loss": 0.0035, "step": 18570 }, { "epoch": 0.31400251810415486, "grad_norm": 0.09208130836486816, "learning_rate": 9.901368796335688e-06, "loss": 0.0026, "step": 18580 }, { "epoch": 0.31417151838300533, "grad_norm": 0.10288451611995697, "learning_rate": 9.901077091208638e-06, "loss": 0.0049, "step": 18590 }, { "epoch": 0.3143405186618558, "grad_norm": 0.2334715723991394, "learning_rate": 9.900784959664134e-06, "loss": 0.004, "step": 18600 }, { "epoch": 0.3145095189407063, "grad_norm": 0.11997061967849731, "learning_rate": 9.900492401727592e-06, "loss": 0.0022, "step": 18610 }, { "epoch": 0.3146785192195567, "grad_norm": 0.019103042781352997, "learning_rate": 9.90019941742447e-06, "loss": 0.0023, "step": 18620 }, { "epoch": 0.31484751949840717, "grad_norm": 0.15194158256053925, "learning_rate": 9.899906006780256e-06, "loss": 0.0024, "step": 18630 }, { "epoch": 0.31501651977725764, "grad_norm": 0.12645916640758514, "learning_rate": 9.899612169820477e-06, "loss": 0.004, "step": 18640 }, { "epoch": 0.3151855200561081, "grad_norm": 0.11526061594486237, "learning_rate": 9.899317906570702e-06, "loss": 0.0048, "step": 18650 }, { "epoch": 0.31535452033495853, "grad_norm": 0.03650401905179024, "learning_rate": 9.89902321705653e-06, "loss": 0.0041, "step": 18660 }, { "epoch": 0.315523520613809, "grad_norm": 0.14329461753368378, "learning_rate": 9.898728101303603e-06, "loss": 0.0025, "step": 18670 }, { "epoch": 0.3156925208926595, "grad_norm": 0.21272212266921997, "learning_rate": 9.898432559337596e-06, "loss": 0.0046, "step": 18680 }, { "epoch": 0.31586152117150995, "grad_norm": 0.15306775271892548, "learning_rate": 9.898136591184222e-06, "loss": 0.004, "step": 18690 }, { "epoch": 0.31603052145036037, "grad_norm": 0.13471972942352295, "learning_rate": 9.897840196869235e-06, "loss": 0.0029, "step": 18700 }, { "epoch": 0.31619952172921084, "grad_norm": 0.2979624271392822, "learning_rate": 9.897543376418419e-06, "loss": 0.0038, "step": 18710 }, { "epoch": 0.3163685220080613, "grad_norm": 0.11547307670116425, "learning_rate": 9.897246129857598e-06, "loss": 0.0033, "step": 18720 }, { "epoch": 0.3165375222869118, "grad_norm": 0.30266544222831726, "learning_rate": 9.896948457212638e-06, "loss": 0.0057, "step": 18730 }, { "epoch": 0.31670652256576226, "grad_norm": 0.08800787478685379, "learning_rate": 9.896650358509437e-06, "loss": 0.0021, "step": 18740 }, { "epoch": 0.3168755228446127, "grad_norm": 0.09709448367357254, "learning_rate": 9.896351833773929e-06, "loss": 0.0039, "step": 18750 }, { "epoch": 0.31704452312346315, "grad_norm": 0.07401283830404282, "learning_rate": 9.896052883032087e-06, "loss": 0.003, "step": 18760 }, { "epoch": 0.3172135234023136, "grad_norm": 0.14612315595149994, "learning_rate": 9.895753506309923e-06, "loss": 0.003, "step": 18770 }, { "epoch": 0.3173825236811641, "grad_norm": 0.06939349323511124, "learning_rate": 9.895453703633485e-06, "loss": 0.0043, "step": 18780 }, { "epoch": 0.3175515239600145, "grad_norm": 0.1797635704278946, "learning_rate": 9.895153475028853e-06, "loss": 0.0033, "step": 18790 }, { "epoch": 0.317720524238865, "grad_norm": 0.12291466444730759, "learning_rate": 9.894852820522152e-06, "loss": 0.0037, "step": 18800 }, { "epoch": 0.31788952451771546, "grad_norm": 0.1714431196451187, "learning_rate": 9.89455174013954e-06, "loss": 0.0031, "step": 18810 }, { "epoch": 0.31805852479656593, "grad_norm": 0.2693570554256439, "learning_rate": 9.894250233907213e-06, "loss": 0.0048, "step": 18820 }, { "epoch": 0.31822752507541635, "grad_norm": 0.1704714298248291, "learning_rate": 9.893948301851402e-06, "loss": 0.0029, "step": 18830 }, { "epoch": 0.3183965253542668, "grad_norm": 0.23571054637432098, "learning_rate": 9.893645943998376e-06, "loss": 0.0038, "step": 18840 }, { "epoch": 0.3185655256331173, "grad_norm": 0.1295132339000702, "learning_rate": 9.893343160374442e-06, "loss": 0.0046, "step": 18850 }, { "epoch": 0.31873452591196777, "grad_norm": 0.2537764310836792, "learning_rate": 9.893039951005945e-06, "loss": 0.0044, "step": 18860 }, { "epoch": 0.3189035261908182, "grad_norm": 0.08305542916059494, "learning_rate": 9.892736315919265e-06, "loss": 0.0022, "step": 18870 }, { "epoch": 0.31907252646966866, "grad_norm": 0.04880817234516144, "learning_rate": 9.892432255140821e-06, "loss": 0.0027, "step": 18880 }, { "epoch": 0.31924152674851913, "grad_norm": 0.11052796244621277, "learning_rate": 9.892127768697064e-06, "loss": 0.0035, "step": 18890 }, { "epoch": 0.3194105270273696, "grad_norm": 0.19982464611530304, "learning_rate": 9.89182285661449e-06, "loss": 0.0026, "step": 18900 }, { "epoch": 0.3195795273062201, "grad_norm": 0.46795204281806946, "learning_rate": 9.891517518919627e-06, "loss": 0.0036, "step": 18910 }, { "epoch": 0.3197485275850705, "grad_norm": 0.03222557529807091, "learning_rate": 9.891211755639037e-06, "loss": 0.0022, "step": 18920 }, { "epoch": 0.31991752786392097, "grad_norm": 0.1941870152950287, "learning_rate": 9.890905566799328e-06, "loss": 0.0044, "step": 18930 }, { "epoch": 0.32008652814277144, "grad_norm": 0.17623929679393768, "learning_rate": 9.890598952427137e-06, "loss": 0.0048, "step": 18940 }, { "epoch": 0.3202555284216219, "grad_norm": 0.014388482086360455, "learning_rate": 9.890291912549141e-06, "loss": 0.0032, "step": 18950 }, { "epoch": 0.32042452870047233, "grad_norm": 0.07924115657806396, "learning_rate": 9.889984447192056e-06, "loss": 0.0051, "step": 18960 }, { "epoch": 0.3205935289793228, "grad_norm": 0.11756382137537003, "learning_rate": 9.88967655638263e-06, "loss": 0.0046, "step": 18970 }, { "epoch": 0.3207625292581733, "grad_norm": 0.08961915224790573, "learning_rate": 9.889368240147654e-06, "loss": 0.0038, "step": 18980 }, { "epoch": 0.32093152953702375, "grad_norm": 0.16066645085811615, "learning_rate": 9.889059498513951e-06, "loss": 0.0033, "step": 18990 }, { "epoch": 0.32110052981587417, "grad_norm": 0.27878034114837646, "learning_rate": 9.888750331508383e-06, "loss": 0.0022, "step": 19000 }, { "epoch": 0.32126953009472464, "grad_norm": 0.1295364946126938, "learning_rate": 9.88844073915785e-06, "loss": 0.0026, "step": 19010 }, { "epoch": 0.3214385303735751, "grad_norm": 0.21508057415485382, "learning_rate": 9.888130721489288e-06, "loss": 0.0033, "step": 19020 }, { "epoch": 0.3216075306524256, "grad_norm": 0.17439287900924683, "learning_rate": 9.88782027852967e-06, "loss": 0.0033, "step": 19030 }, { "epoch": 0.32177653093127606, "grad_norm": 0.10152928531169891, "learning_rate": 9.887509410306005e-06, "loss": 0.0054, "step": 19040 }, { "epoch": 0.3219455312101265, "grad_norm": 0.06065661087632179, "learning_rate": 9.887198116845341e-06, "loss": 0.0023, "step": 19050 }, { "epoch": 0.32211453148897695, "grad_norm": 0.10322712361812592, "learning_rate": 9.88688639817476e-06, "loss": 0.0037, "step": 19060 }, { "epoch": 0.3222835317678274, "grad_norm": 0.23588989675045013, "learning_rate": 9.886574254321388e-06, "loss": 0.0035, "step": 19070 }, { "epoch": 0.3224525320466779, "grad_norm": 0.10690174996852875, "learning_rate": 9.886261685312378e-06, "loss": 0.0043, "step": 19080 }, { "epoch": 0.3226215323255283, "grad_norm": 0.18640218675136566, "learning_rate": 9.885948691174927e-06, "loss": 0.0029, "step": 19090 }, { "epoch": 0.3227905326043788, "grad_norm": 0.12204626947641373, "learning_rate": 9.885635271936268e-06, "loss": 0.0042, "step": 19100 }, { "epoch": 0.32295953288322926, "grad_norm": 0.09853368997573853, "learning_rate": 9.88532142762367e-06, "loss": 0.0039, "step": 19110 }, { "epoch": 0.32312853316207973, "grad_norm": 0.06033041328191757, "learning_rate": 9.885007158264435e-06, "loss": 0.0024, "step": 19120 }, { "epoch": 0.32329753344093015, "grad_norm": 0.03601764515042305, "learning_rate": 9.88469246388591e-06, "loss": 0.0021, "step": 19130 }, { "epoch": 0.3234665337197806, "grad_norm": 0.27800238132476807, "learning_rate": 9.884377344515473e-06, "loss": 0.0042, "step": 19140 }, { "epoch": 0.3236355339986311, "grad_norm": 0.37190690636634827, "learning_rate": 9.884061800180542e-06, "loss": 0.0044, "step": 19150 }, { "epoch": 0.32380453427748157, "grad_norm": 0.2755439281463623, "learning_rate": 9.88374583090857e-06, "loss": 0.0036, "step": 19160 }, { "epoch": 0.32397353455633204, "grad_norm": 0.334343284368515, "learning_rate": 9.88342943672705e-06, "loss": 0.0044, "step": 19170 }, { "epoch": 0.32414253483518246, "grad_norm": 0.12950769066810608, "learning_rate": 9.883112617663508e-06, "loss": 0.0031, "step": 19180 }, { "epoch": 0.32431153511403293, "grad_norm": 0.07064332067966461, "learning_rate": 9.882795373745508e-06, "loss": 0.0027, "step": 19190 }, { "epoch": 0.3244805353928834, "grad_norm": 0.2044609934091568, "learning_rate": 9.882477705000654e-06, "loss": 0.0051, "step": 19200 }, { "epoch": 0.3246495356717339, "grad_norm": 0.18798178434371948, "learning_rate": 9.882159611456584e-06, "loss": 0.0031, "step": 19210 }, { "epoch": 0.3248185359505843, "grad_norm": 0.10369912534952164, "learning_rate": 9.88184109314097e-06, "loss": 0.0041, "step": 19220 }, { "epoch": 0.32498753622943477, "grad_norm": 0.39374813437461853, "learning_rate": 9.881522150081531e-06, "loss": 0.0027, "step": 19230 }, { "epoch": 0.32515653650828524, "grad_norm": 0.1317203789949417, "learning_rate": 9.881202782306011e-06, "loss": 0.013, "step": 19240 }, { "epoch": 0.3253255367871357, "grad_norm": 0.08882055431604385, "learning_rate": 9.880882989842202e-06, "loss": 0.0032, "step": 19250 }, { "epoch": 0.32549453706598613, "grad_norm": 0.27533435821533203, "learning_rate": 9.880562772717923e-06, "loss": 0.004, "step": 19260 }, { "epoch": 0.3256635373448366, "grad_norm": 0.2720194458961487, "learning_rate": 9.880242130961035e-06, "loss": 0.0043, "step": 19270 }, { "epoch": 0.3258325376236871, "grad_norm": 0.06423293799161911, "learning_rate": 9.879921064599438e-06, "loss": 0.0029, "step": 19280 }, { "epoch": 0.32600153790253755, "grad_norm": 0.13293801248073578, "learning_rate": 9.879599573661063e-06, "loss": 0.0032, "step": 19290 }, { "epoch": 0.326170538181388, "grad_norm": 0.48314961791038513, "learning_rate": 9.879277658173883e-06, "loss": 0.0026, "step": 19300 }, { "epoch": 0.32633953846023844, "grad_norm": 0.1730279177427292, "learning_rate": 9.878955318165907e-06, "loss": 0.0043, "step": 19310 }, { "epoch": 0.3265085387390889, "grad_norm": 0.0645437240600586, "learning_rate": 9.87863255366518e-06, "loss": 0.0056, "step": 19320 }, { "epoch": 0.3266775390179394, "grad_norm": 0.3200550973415375, "learning_rate": 9.878309364699781e-06, "loss": 0.0049, "step": 19330 }, { "epoch": 0.32684653929678986, "grad_norm": 0.0750180333852768, "learning_rate": 9.877985751297831e-06, "loss": 0.0054, "step": 19340 }, { "epoch": 0.3270155395756403, "grad_norm": 0.14632025361061096, "learning_rate": 9.87766171348749e-06, "loss": 0.003, "step": 19350 }, { "epoch": 0.32718453985449075, "grad_norm": 0.07658658921718597, "learning_rate": 9.877337251296943e-06, "loss": 0.0026, "step": 19360 }, { "epoch": 0.3273535401333412, "grad_norm": 0.03774444758892059, "learning_rate": 9.877012364754425e-06, "loss": 0.0039, "step": 19370 }, { "epoch": 0.3275225404121917, "grad_norm": 0.38548749685287476, "learning_rate": 9.876687053888203e-06, "loss": 0.0044, "step": 19380 }, { "epoch": 0.3276915406910421, "grad_norm": 0.09892215579748154, "learning_rate": 9.87636131872658e-06, "loss": 0.0038, "step": 19390 }, { "epoch": 0.3278605409698926, "grad_norm": 0.167256698012352, "learning_rate": 9.876035159297894e-06, "loss": 0.0039, "step": 19400 }, { "epoch": 0.32802954124874306, "grad_norm": 0.19858378171920776, "learning_rate": 9.875708575630524e-06, "loss": 0.003, "step": 19410 }, { "epoch": 0.32819854152759353, "grad_norm": 0.05991840735077858, "learning_rate": 9.875381567752884e-06, "loss": 0.0026, "step": 19420 }, { "epoch": 0.328367541806444, "grad_norm": 0.2287953794002533, "learning_rate": 9.875054135693428e-06, "loss": 0.0063, "step": 19430 }, { "epoch": 0.3285365420852944, "grad_norm": 0.09663691371679306, "learning_rate": 9.87472627948064e-06, "loss": 0.0039, "step": 19440 }, { "epoch": 0.3287055423641449, "grad_norm": 0.13738645613193512, "learning_rate": 9.874397999143048e-06, "loss": 0.0022, "step": 19450 }, { "epoch": 0.32887454264299537, "grad_norm": 0.1286608725786209, "learning_rate": 9.874069294709215e-06, "loss": 0.0026, "step": 19460 }, { "epoch": 0.32904354292184584, "grad_norm": 0.12891557812690735, "learning_rate": 9.873740166207735e-06, "loss": 0.0025, "step": 19470 }, { "epoch": 0.32921254320069626, "grad_norm": 0.10180335491895676, "learning_rate": 9.873410613667247e-06, "loss": 0.0027, "step": 19480 }, { "epoch": 0.32938154347954673, "grad_norm": 0.06524354964494705, "learning_rate": 9.873080637116425e-06, "loss": 0.0028, "step": 19490 }, { "epoch": 0.3295505437583972, "grad_norm": 0.22627593576908112, "learning_rate": 9.872750236583976e-06, "loss": 0.0045, "step": 19500 }, { "epoch": 0.3297195440372477, "grad_norm": 0.18337315320968628, "learning_rate": 9.872419412098648e-06, "loss": 0.0046, "step": 19510 }, { "epoch": 0.3298885443160981, "grad_norm": 0.2440752238035202, "learning_rate": 9.872088163689222e-06, "loss": 0.0037, "step": 19520 }, { "epoch": 0.33005754459494857, "grad_norm": 0.17850267887115479, "learning_rate": 9.871756491384522e-06, "loss": 0.0039, "step": 19530 }, { "epoch": 0.33022654487379904, "grad_norm": 0.21067939698696136, "learning_rate": 9.871424395213402e-06, "loss": 0.003, "step": 19540 }, { "epoch": 0.3303955451526495, "grad_norm": 0.046429593116045, "learning_rate": 9.871091875204756e-06, "loss": 0.0027, "step": 19550 }, { "epoch": 0.33056454543149993, "grad_norm": 0.062241218984127045, "learning_rate": 9.870758931387519e-06, "loss": 0.0048, "step": 19560 }, { "epoch": 0.3307335457103504, "grad_norm": 0.055095065385103226, "learning_rate": 9.870425563790654e-06, "loss": 0.0036, "step": 19570 }, { "epoch": 0.3309025459892009, "grad_norm": 0.24390147626399994, "learning_rate": 9.870091772443166e-06, "loss": 0.0032, "step": 19580 }, { "epoch": 0.33107154626805135, "grad_norm": 0.04408691078424454, "learning_rate": 9.869757557374099e-06, "loss": 0.0034, "step": 19590 }, { "epoch": 0.3312405465469018, "grad_norm": 0.300178200006485, "learning_rate": 9.86942291861253e-06, "loss": 0.0035, "step": 19600 }, { "epoch": 0.33140954682575224, "grad_norm": 0.4546404778957367, "learning_rate": 9.869087856187574e-06, "loss": 0.0029, "step": 19610 }, { "epoch": 0.3315785471046027, "grad_norm": 0.18573160469532013, "learning_rate": 9.868752370128383e-06, "loss": 0.0035, "step": 19620 }, { "epoch": 0.3317475473834532, "grad_norm": 0.38010624051094055, "learning_rate": 9.868416460464145e-06, "loss": 0.0047, "step": 19630 }, { "epoch": 0.33191654766230366, "grad_norm": 0.2465725839138031, "learning_rate": 9.86808012722409e-06, "loss": 0.002, "step": 19640 }, { "epoch": 0.3320855479411541, "grad_norm": 0.07605840265750885, "learning_rate": 9.867743370437476e-06, "loss": 0.0033, "step": 19650 }, { "epoch": 0.33225454822000455, "grad_norm": 0.34282565116882324, "learning_rate": 9.867406190133602e-06, "loss": 0.0038, "step": 19660 }, { "epoch": 0.332423548498855, "grad_norm": 0.08095477521419525, "learning_rate": 9.867068586341808e-06, "loss": 0.0044, "step": 19670 }, { "epoch": 0.3325925487777055, "grad_norm": 0.22404147684574127, "learning_rate": 9.866730559091465e-06, "loss": 0.0035, "step": 19680 }, { "epoch": 0.3327615490565559, "grad_norm": 0.1436876505613327, "learning_rate": 9.866392108411985e-06, "loss": 0.0037, "step": 19690 }, { "epoch": 0.3329305493354064, "grad_norm": 0.103852778673172, "learning_rate": 9.86605323433281e-06, "loss": 0.0034, "step": 19700 }, { "epoch": 0.33309954961425686, "grad_norm": 0.35399582982063293, "learning_rate": 9.865713936883429e-06, "loss": 0.0042, "step": 19710 }, { "epoch": 0.33326854989310734, "grad_norm": 0.025398628786206245, "learning_rate": 9.865374216093363e-06, "loss": 0.0049, "step": 19720 }, { "epoch": 0.3334375501719578, "grad_norm": 0.16962707042694092, "learning_rate": 9.865034071992165e-06, "loss": 0.0079, "step": 19730 }, { "epoch": 0.3336065504508082, "grad_norm": 0.3511894643306732, "learning_rate": 9.86469350460943e-06, "loss": 0.0044, "step": 19740 }, { "epoch": 0.3337755507296587, "grad_norm": 0.012258529663085938, "learning_rate": 9.86435251397479e-06, "loss": 0.0044, "step": 19750 }, { "epoch": 0.33394455100850917, "grad_norm": 0.20916761457920074, "learning_rate": 9.864011100117916e-06, "loss": 0.0043, "step": 19760 }, { "epoch": 0.33411355128735964, "grad_norm": 0.24661147594451904, "learning_rate": 9.863669263068506e-06, "loss": 0.0042, "step": 19770 }, { "epoch": 0.33428255156621006, "grad_norm": 0.3546825349330902, "learning_rate": 9.863327002856308e-06, "loss": 0.004, "step": 19780 }, { "epoch": 0.33445155184506054, "grad_norm": 0.06682464480400085, "learning_rate": 9.862984319511095e-06, "loss": 0.0035, "step": 19790 }, { "epoch": 0.334620552123911, "grad_norm": 0.14287163317203522, "learning_rate": 9.862641213062688e-06, "loss": 0.0037, "step": 19800 }, { "epoch": 0.3347895524027615, "grad_norm": 0.19845755398273468, "learning_rate": 9.862297683540932e-06, "loss": 0.0026, "step": 19810 }, { "epoch": 0.3349585526816119, "grad_norm": 0.4925055205821991, "learning_rate": 9.86195373097572e-06, "loss": 0.0042, "step": 19820 }, { "epoch": 0.33512755296046237, "grad_norm": 0.14126281440258026, "learning_rate": 9.861609355396978e-06, "loss": 0.0039, "step": 19830 }, { "epoch": 0.33529655323931284, "grad_norm": 0.08112350106239319, "learning_rate": 9.861264556834665e-06, "loss": 0.0032, "step": 19840 }, { "epoch": 0.3354655535181633, "grad_norm": 0.07324805855751038, "learning_rate": 9.860919335318783e-06, "loss": 0.0032, "step": 19850 }, { "epoch": 0.3356345537970138, "grad_norm": 0.15092162787914276, "learning_rate": 9.860573690879367e-06, "loss": 0.0046, "step": 19860 }, { "epoch": 0.3358035540758642, "grad_norm": 0.07325884699821472, "learning_rate": 9.86022762354649e-06, "loss": 0.005, "step": 19870 }, { "epoch": 0.3359725543547147, "grad_norm": 0.27018794417381287, "learning_rate": 9.859881133350262e-06, "loss": 0.0041, "step": 19880 }, { "epoch": 0.33614155463356515, "grad_norm": 0.27263039350509644, "learning_rate": 9.859534220320828e-06, "loss": 0.004, "step": 19890 }, { "epoch": 0.3363105549124156, "grad_norm": 0.15515533089637756, "learning_rate": 9.859186884488372e-06, "loss": 0.0031, "step": 19900 }, { "epoch": 0.33647955519126604, "grad_norm": 0.10629349946975708, "learning_rate": 9.858839125883112e-06, "loss": 0.0036, "step": 19910 }, { "epoch": 0.3366485554701165, "grad_norm": 0.2211560159921646, "learning_rate": 9.858490944535311e-06, "loss": 0.0034, "step": 19920 }, { "epoch": 0.336817555748967, "grad_norm": 0.12569260597229004, "learning_rate": 9.858142340475254e-06, "loss": 0.005, "step": 19930 }, { "epoch": 0.33698655602781746, "grad_norm": 0.10294578224420547, "learning_rate": 9.857793313733277e-06, "loss": 0.0037, "step": 19940 }, { "epoch": 0.3371555563066679, "grad_norm": 0.4229937195777893, "learning_rate": 9.857443864339744e-06, "loss": 0.0033, "step": 19950 }, { "epoch": 0.33732455658551835, "grad_norm": 0.15797141194343567, "learning_rate": 9.85709399232506e-06, "loss": 0.0032, "step": 19960 }, { "epoch": 0.3374935568643688, "grad_norm": 0.23842860758304596, "learning_rate": 9.856743697719666e-06, "loss": 0.0044, "step": 19970 }, { "epoch": 0.3376625571432193, "grad_norm": 0.17768318951129913, "learning_rate": 9.85639298055404e-06, "loss": 0.0037, "step": 19980 }, { "epoch": 0.3378315574220698, "grad_norm": 0.011220584623515606, "learning_rate": 9.856041840858693e-06, "loss": 0.0029, "step": 19990 }, { "epoch": 0.3380005577009202, "grad_norm": 0.144292950630188, "learning_rate": 9.855690278664179e-06, "loss": 0.0041, "step": 20000 }, { "epoch": 0.33816955797977066, "grad_norm": 0.14557820558547974, "learning_rate": 9.855338294001083e-06, "loss": 0.0033, "step": 20010 }, { "epoch": 0.33833855825862114, "grad_norm": 0.08057982474565506, "learning_rate": 9.854985886900032e-06, "loss": 0.005, "step": 20020 }, { "epoch": 0.3385075585374716, "grad_norm": 0.10894902795553207, "learning_rate": 9.854633057391685e-06, "loss": 0.004, "step": 20030 }, { "epoch": 0.338676558816322, "grad_norm": 0.2983936071395874, "learning_rate": 9.854279805506741e-06, "loss": 0.0037, "step": 20040 }, { "epoch": 0.3388455590951725, "grad_norm": 0.12957635521888733, "learning_rate": 9.853926131275934e-06, "loss": 0.0038, "step": 20050 }, { "epoch": 0.339014559374023, "grad_norm": 0.17585639655590057, "learning_rate": 9.853572034730036e-06, "loss": 0.0037, "step": 20060 }, { "epoch": 0.33918355965287345, "grad_norm": 0.7723053097724915, "learning_rate": 9.853217515899857e-06, "loss": 0.0079, "step": 20070 }, { "epoch": 0.33935255993172386, "grad_norm": 0.10039085894823074, "learning_rate": 9.852862574816237e-06, "loss": 0.0036, "step": 20080 }, { "epoch": 0.33952156021057434, "grad_norm": 0.1289125382900238, "learning_rate": 9.852507211510063e-06, "loss": 0.0035, "step": 20090 }, { "epoch": 0.3396905604894248, "grad_norm": 0.18636161088943481, "learning_rate": 9.852151426012249e-06, "loss": 0.0042, "step": 20100 }, { "epoch": 0.3398595607682753, "grad_norm": 0.023119311779737473, "learning_rate": 9.851795218353751e-06, "loss": 0.0037, "step": 20110 }, { "epoch": 0.34002856104712575, "grad_norm": 0.09347156435251236, "learning_rate": 9.851438588565562e-06, "loss": 0.0018, "step": 20120 }, { "epoch": 0.34019756132597617, "grad_norm": 0.10636084526777267, "learning_rate": 9.851081536678711e-06, "loss": 0.0031, "step": 20130 }, { "epoch": 0.34036656160482665, "grad_norm": 0.0867602601647377, "learning_rate": 9.850724062724261e-06, "loss": 0.0031, "step": 20140 }, { "epoch": 0.3405355618836771, "grad_norm": 0.31015679240226746, "learning_rate": 9.850366166733316e-06, "loss": 0.0051, "step": 20150 }, { "epoch": 0.3407045621625276, "grad_norm": 0.11224543303251266, "learning_rate": 9.850007848737013e-06, "loss": 0.0035, "step": 20160 }, { "epoch": 0.340873562441378, "grad_norm": 0.14716555178165436, "learning_rate": 9.84964910876653e-06, "loss": 0.0024, "step": 20170 }, { "epoch": 0.3410425627202285, "grad_norm": 0.4090542197227478, "learning_rate": 9.849289946853077e-06, "loss": 0.005, "step": 20180 }, { "epoch": 0.34121156299907895, "grad_norm": 0.13513973355293274, "learning_rate": 9.848930363027901e-06, "loss": 0.0025, "step": 20190 }, { "epoch": 0.3413805632779294, "grad_norm": 0.3776029646396637, "learning_rate": 9.848570357322294e-06, "loss": 0.0056, "step": 20200 }, { "epoch": 0.34154956355677984, "grad_norm": 0.07301806658506393, "learning_rate": 9.848209929767571e-06, "loss": 0.005, "step": 20210 }, { "epoch": 0.3417185638356303, "grad_norm": 0.03997489809989929, "learning_rate": 9.847849080395094e-06, "loss": 0.0047, "step": 20220 }, { "epoch": 0.3418875641144808, "grad_norm": 0.33692535758018494, "learning_rate": 9.84748780923626e-06, "loss": 0.004, "step": 20230 }, { "epoch": 0.34205656439333126, "grad_norm": 0.09017347544431686, "learning_rate": 9.847126116322498e-06, "loss": 0.0037, "step": 20240 }, { "epoch": 0.34222556467218174, "grad_norm": 0.13404417037963867, "learning_rate": 9.84676400168528e-06, "loss": 0.0047, "step": 20250 }, { "epoch": 0.34239456495103215, "grad_norm": 0.11601067334413528, "learning_rate": 9.846401465356112e-06, "loss": 0.0036, "step": 20260 }, { "epoch": 0.3425635652298826, "grad_norm": 0.17043401300907135, "learning_rate": 9.846038507366536e-06, "loss": 0.0038, "step": 20270 }, { "epoch": 0.3427325655087331, "grad_norm": 0.31567105650901794, "learning_rate": 9.845675127748126e-06, "loss": 0.0045, "step": 20280 }, { "epoch": 0.3429015657875836, "grad_norm": 0.09197935461997986, "learning_rate": 9.845311326532504e-06, "loss": 0.0026, "step": 20290 }, { "epoch": 0.343070566066434, "grad_norm": 0.10412884503602982, "learning_rate": 9.844947103751324e-06, "loss": 0.0051, "step": 20300 }, { "epoch": 0.34323956634528446, "grad_norm": 0.16309162974357605, "learning_rate": 9.844582459436267e-06, "loss": 0.0035, "step": 20310 }, { "epoch": 0.34340856662413494, "grad_norm": 0.020650912076234818, "learning_rate": 9.844217393619066e-06, "loss": 0.0029, "step": 20320 }, { "epoch": 0.3435775669029854, "grad_norm": 0.06039857119321823, "learning_rate": 9.84385190633148e-06, "loss": 0.0025, "step": 20330 }, { "epoch": 0.3437465671818358, "grad_norm": 0.21292872726917267, "learning_rate": 9.843485997605309e-06, "loss": 0.0034, "step": 20340 }, { "epoch": 0.3439155674606863, "grad_norm": 0.19261842966079712, "learning_rate": 9.843119667472389e-06, "loss": 0.0028, "step": 20350 }, { "epoch": 0.3440845677395368, "grad_norm": 0.04296991974115372, "learning_rate": 9.842752915964592e-06, "loss": 0.0027, "step": 20360 }, { "epoch": 0.34425356801838725, "grad_norm": 0.4843842089176178, "learning_rate": 9.842385743113829e-06, "loss": 0.0055, "step": 20370 }, { "epoch": 0.34442256829723766, "grad_norm": 0.5954232811927795, "learning_rate": 9.842018148952044e-06, "loss": 0.0051, "step": 20380 }, { "epoch": 0.34459156857608814, "grad_norm": 0.10331881791353226, "learning_rate": 9.84165013351122e-06, "loss": 0.0033, "step": 20390 }, { "epoch": 0.3447605688549386, "grad_norm": 0.16192494332790375, "learning_rate": 9.841281696823377e-06, "loss": 0.0034, "step": 20400 }, { "epoch": 0.3449295691337891, "grad_norm": 0.10578694939613342, "learning_rate": 9.840912838920568e-06, "loss": 0.0028, "step": 20410 }, { "epoch": 0.34509856941263956, "grad_norm": 0.1332312375307083, "learning_rate": 9.840543559834888e-06, "loss": 0.0032, "step": 20420 }, { "epoch": 0.34526756969149, "grad_norm": 0.0674179196357727, "learning_rate": 9.840173859598465e-06, "loss": 0.0023, "step": 20430 }, { "epoch": 0.34543656997034045, "grad_norm": 0.22473275661468506, "learning_rate": 9.839803738243466e-06, "loss": 0.0042, "step": 20440 }, { "epoch": 0.3456055702491909, "grad_norm": 0.05025557801127434, "learning_rate": 9.83943319580209e-06, "loss": 0.0045, "step": 20450 }, { "epoch": 0.3457745705280414, "grad_norm": 0.03869166225194931, "learning_rate": 9.83906223230658e-06, "loss": 0.0029, "step": 20460 }, { "epoch": 0.3459435708068918, "grad_norm": 0.09461043030023575, "learning_rate": 9.838690847789211e-06, "loss": 0.0047, "step": 20470 }, { "epoch": 0.3461125710857423, "grad_norm": 0.2004118263721466, "learning_rate": 9.838319042282293e-06, "loss": 0.007, "step": 20480 }, { "epoch": 0.34628157136459276, "grad_norm": 0.12171358615159988, "learning_rate": 9.837946815818176e-06, "loss": 0.0041, "step": 20490 }, { "epoch": 0.34645057164344323, "grad_norm": 0.12448956072330475, "learning_rate": 9.837574168429247e-06, "loss": 0.0032, "step": 20500 }, { "epoch": 0.34661957192229365, "grad_norm": 0.17204810678958893, "learning_rate": 9.837201100147925e-06, "loss": 0.0033, "step": 20510 }, { "epoch": 0.3467885722011441, "grad_norm": 0.08777932077646255, "learning_rate": 9.836827611006672e-06, "loss": 0.0027, "step": 20520 }, { "epoch": 0.3469575724799946, "grad_norm": 0.14681030809879303, "learning_rate": 9.83645370103798e-06, "loss": 0.0032, "step": 20530 }, { "epoch": 0.34712657275884506, "grad_norm": 0.09471159428358078, "learning_rate": 9.836079370274385e-06, "loss": 0.0022, "step": 20540 }, { "epoch": 0.34729557303769554, "grad_norm": 0.09500308334827423, "learning_rate": 9.835704618748451e-06, "loss": 0.0019, "step": 20550 }, { "epoch": 0.34746457331654595, "grad_norm": 0.3364561200141907, "learning_rate": 9.835329446492788e-06, "loss": 0.0018, "step": 20560 }, { "epoch": 0.34763357359539643, "grad_norm": 0.192582905292511, "learning_rate": 9.834953853540035e-06, "loss": 0.0031, "step": 20570 }, { "epoch": 0.3478025738742469, "grad_norm": 0.05873227119445801, "learning_rate": 9.834577839922869e-06, "loss": 0.0029, "step": 20580 }, { "epoch": 0.3479715741530974, "grad_norm": 0.09160672128200531, "learning_rate": 9.834201405674008e-06, "loss": 0.004, "step": 20590 }, { "epoch": 0.3481405744319478, "grad_norm": 0.14228111505508423, "learning_rate": 9.833824550826203e-06, "loss": 0.0035, "step": 20600 }, { "epoch": 0.34830957471079826, "grad_norm": 0.05157274380326271, "learning_rate": 9.833447275412243e-06, "loss": 0.0028, "step": 20610 }, { "epoch": 0.34847857498964874, "grad_norm": 0.06791481375694275, "learning_rate": 9.833069579464949e-06, "loss": 0.0031, "step": 20620 }, { "epoch": 0.3486475752684992, "grad_norm": 0.03289634734392166, "learning_rate": 9.832691463017186e-06, "loss": 0.002, "step": 20630 }, { "epoch": 0.3488165755473496, "grad_norm": 0.11063172668218613, "learning_rate": 9.83231292610185e-06, "loss": 0.0039, "step": 20640 }, { "epoch": 0.3489855758262001, "grad_norm": 0.24149391055107117, "learning_rate": 9.831933968751877e-06, "loss": 0.0029, "step": 20650 }, { "epoch": 0.3491545761050506, "grad_norm": 0.19239841401576996, "learning_rate": 9.831554591000236e-06, "loss": 0.0028, "step": 20660 }, { "epoch": 0.34932357638390105, "grad_norm": 0.14838945865631104, "learning_rate": 9.831174792879938e-06, "loss": 0.0029, "step": 20670 }, { "epoch": 0.3494925766627515, "grad_norm": 0.258220911026001, "learning_rate": 9.830794574424026e-06, "loss": 0.0017, "step": 20680 }, { "epoch": 0.34966157694160194, "grad_norm": 0.2164478600025177, "learning_rate": 9.83041393566558e-06, "loss": 0.0026, "step": 20690 }, { "epoch": 0.3498305772204524, "grad_norm": 0.01935494877398014, "learning_rate": 9.830032876637714e-06, "loss": 0.0032, "step": 20700 }, { "epoch": 0.3499995774993029, "grad_norm": 0.22248001396656036, "learning_rate": 9.829651397373589e-06, "loss": 0.0065, "step": 20710 }, { "epoch": 0.35016857777815336, "grad_norm": 0.08571802824735641, "learning_rate": 9.829269497906393e-06, "loss": 0.0039, "step": 20720 }, { "epoch": 0.3503375780570038, "grad_norm": 0.060072824358940125, "learning_rate": 9.82888717826935e-06, "loss": 0.003, "step": 20730 }, { "epoch": 0.35050657833585425, "grad_norm": 0.06358876824378967, "learning_rate": 9.828504438495728e-06, "loss": 0.0027, "step": 20740 }, { "epoch": 0.3506755786147047, "grad_norm": 0.01844388246536255, "learning_rate": 9.828121278618824e-06, "loss": 0.0025, "step": 20750 }, { "epoch": 0.3508445788935552, "grad_norm": 0.07648131251335144, "learning_rate": 9.827737698671976e-06, "loss": 0.0026, "step": 20760 }, { "epoch": 0.3510135791724056, "grad_norm": 0.04285912215709686, "learning_rate": 9.827353698688557e-06, "loss": 0.0042, "step": 20770 }, { "epoch": 0.3511825794512561, "grad_norm": 0.0519292838871479, "learning_rate": 9.82696927870198e-06, "loss": 0.0023, "step": 20780 }, { "epoch": 0.35135157973010656, "grad_norm": 0.11216392368078232, "learning_rate": 9.826584438745685e-06, "loss": 0.0034, "step": 20790 }, { "epoch": 0.35152058000895703, "grad_norm": 0.029105642810463905, "learning_rate": 9.82619917885316e-06, "loss": 0.0024, "step": 20800 }, { "epoch": 0.3516895802878075, "grad_norm": 0.19928911328315735, "learning_rate": 9.825813499057922e-06, "loss": 0.0019, "step": 20810 }, { "epoch": 0.3518585805666579, "grad_norm": 0.04720110446214676, "learning_rate": 9.825427399393527e-06, "loss": 0.0018, "step": 20820 }, { "epoch": 0.3520275808455084, "grad_norm": 0.1498241126537323, "learning_rate": 9.825040879893571e-06, "loss": 0.0035, "step": 20830 }, { "epoch": 0.35219658112435887, "grad_norm": 0.10597819089889526, "learning_rate": 9.824653940591679e-06, "loss": 0.0055, "step": 20840 }, { "epoch": 0.35236558140320934, "grad_norm": 0.14786836504936218, "learning_rate": 9.82426658152152e-06, "loss": 0.0051, "step": 20850 }, { "epoch": 0.35253458168205976, "grad_norm": 0.09650187194347382, "learning_rate": 9.823878802716792e-06, "loss": 0.0031, "step": 20860 }, { "epoch": 0.35270358196091023, "grad_norm": 0.06578878313302994, "learning_rate": 9.823490604211238e-06, "loss": 0.0036, "step": 20870 }, { "epoch": 0.3528725822397607, "grad_norm": 0.09055297821760178, "learning_rate": 9.823101986038632e-06, "loss": 0.0042, "step": 20880 }, { "epoch": 0.3530415825186112, "grad_norm": 0.35521236062049866, "learning_rate": 9.822712948232782e-06, "loss": 0.003, "step": 20890 }, { "epoch": 0.3532105827974616, "grad_norm": 0.17009131610393524, "learning_rate": 9.82232349082754e-06, "loss": 0.0037, "step": 20900 }, { "epoch": 0.35337958307631206, "grad_norm": 0.052262403070926666, "learning_rate": 9.82193361385679e-06, "loss": 0.0028, "step": 20910 }, { "epoch": 0.35354858335516254, "grad_norm": 0.09378935396671295, "learning_rate": 9.821543317354451e-06, "loss": 0.0052, "step": 20920 }, { "epoch": 0.353717583634013, "grad_norm": 0.11036661267280579, "learning_rate": 9.821152601354484e-06, "loss": 0.0037, "step": 20930 }, { "epoch": 0.3538865839128635, "grad_norm": 0.05563582852482796, "learning_rate": 9.820761465890882e-06, "loss": 0.0031, "step": 20940 }, { "epoch": 0.3540555841917139, "grad_norm": 0.04262761399149895, "learning_rate": 9.820369910997674e-06, "loss": 0.0031, "step": 20950 }, { "epoch": 0.3542245844705644, "grad_norm": 0.059218067675828934, "learning_rate": 9.819977936708931e-06, "loss": 0.0036, "step": 20960 }, { "epoch": 0.35439358474941485, "grad_norm": 0.16169999539852142, "learning_rate": 9.819585543058752e-06, "loss": 0.0011, "step": 20970 }, { "epoch": 0.3545625850282653, "grad_norm": 0.07854767888784409, "learning_rate": 9.81919273008128e-06, "loss": 0.0024, "step": 20980 }, { "epoch": 0.35473158530711574, "grad_norm": 0.33886852860450745, "learning_rate": 9.818799497810691e-06, "loss": 0.0053, "step": 20990 }, { "epoch": 0.3549005855859662, "grad_norm": 0.10044369101524353, "learning_rate": 9.818405846281196e-06, "loss": 0.0047, "step": 21000 }, { "epoch": 0.3550695858648167, "grad_norm": 0.05758042261004448, "learning_rate": 9.81801177552705e-06, "loss": 0.0018, "step": 21010 }, { "epoch": 0.35523858614366716, "grad_norm": 0.18058626353740692, "learning_rate": 9.817617285582534e-06, "loss": 0.0042, "step": 21020 }, { "epoch": 0.3554075864225176, "grad_norm": 0.2607985734939575, "learning_rate": 9.817222376481972e-06, "loss": 0.0043, "step": 21030 }, { "epoch": 0.35557658670136805, "grad_norm": 0.030365170910954475, "learning_rate": 9.816827048259724e-06, "loss": 0.002, "step": 21040 }, { "epoch": 0.3557455869802185, "grad_norm": 0.12924058735370636, "learning_rate": 9.816431300950184e-06, "loss": 0.0038, "step": 21050 }, { "epoch": 0.355914587259069, "grad_norm": 0.193020299077034, "learning_rate": 9.816035134587785e-06, "loss": 0.0034, "step": 21060 }, { "epoch": 0.3560835875379194, "grad_norm": 0.13917414844036102, "learning_rate": 9.815638549206997e-06, "loss": 0.0057, "step": 21070 }, { "epoch": 0.3562525878167699, "grad_norm": 0.11824040114879608, "learning_rate": 9.81524154484232e-06, "loss": 0.0033, "step": 21080 }, { "epoch": 0.35642158809562036, "grad_norm": 0.04004885256290436, "learning_rate": 9.8148441215283e-06, "loss": 0.0028, "step": 21090 }, { "epoch": 0.35659058837447083, "grad_norm": 0.08424954116344452, "learning_rate": 9.814446279299512e-06, "loss": 0.0044, "step": 21100 }, { "epoch": 0.3567595886533213, "grad_norm": 0.34200525283813477, "learning_rate": 9.814048018190572e-06, "loss": 0.0046, "step": 21110 }, { "epoch": 0.3569285889321717, "grad_norm": 0.09797846525907516, "learning_rate": 9.813649338236129e-06, "loss": 0.0057, "step": 21120 }, { "epoch": 0.3570975892110222, "grad_norm": 0.036891937255859375, "learning_rate": 9.81325023947087e-06, "loss": 0.0022, "step": 21130 }, { "epoch": 0.35726658948987267, "grad_norm": 0.2412370890378952, "learning_rate": 9.81285072192952e-06, "loss": 0.0034, "step": 21140 }, { "epoch": 0.35743558976872314, "grad_norm": 0.12464900314807892, "learning_rate": 9.812450785646841e-06, "loss": 0.002, "step": 21150 }, { "epoch": 0.35760459004757356, "grad_norm": 0.08267778158187866, "learning_rate": 9.812050430657624e-06, "loss": 0.0028, "step": 21160 }, { "epoch": 0.35777359032642403, "grad_norm": 0.09670425951480865, "learning_rate": 9.811649656996706e-06, "loss": 0.0048, "step": 21170 }, { "epoch": 0.3579425906052745, "grad_norm": 0.02184341289103031, "learning_rate": 9.811248464698954e-06, "loss": 0.0048, "step": 21180 }, { "epoch": 0.358111590884125, "grad_norm": 0.12013767659664154, "learning_rate": 9.810846853799275e-06, "loss": 0.0043, "step": 21190 }, { "epoch": 0.3582805911629754, "grad_norm": 0.10406313091516495, "learning_rate": 9.81044482433261e-06, "loss": 0.0021, "step": 21200 }, { "epoch": 0.35844959144182587, "grad_norm": 0.083363838493824, "learning_rate": 9.810042376333939e-06, "loss": 0.0038, "step": 21210 }, { "epoch": 0.35861859172067634, "grad_norm": 0.08557987213134766, "learning_rate": 9.809639509838276e-06, "loss": 0.0026, "step": 21220 }, { "epoch": 0.3587875919995268, "grad_norm": 0.02609414793550968, "learning_rate": 9.809236224880672e-06, "loss": 0.0011, "step": 21230 }, { "epoch": 0.3589565922783773, "grad_norm": 0.15575864911079407, "learning_rate": 9.808832521496214e-06, "loss": 0.0047, "step": 21240 }, { "epoch": 0.3591255925572277, "grad_norm": 0.2338981032371521, "learning_rate": 9.808428399720029e-06, "loss": 0.0029, "step": 21250 }, { "epoch": 0.3592945928360782, "grad_norm": 0.18020887672901154, "learning_rate": 9.808023859587276e-06, "loss": 0.004, "step": 21260 }, { "epoch": 0.35946359311492865, "grad_norm": 0.07131248712539673, "learning_rate": 9.807618901133152e-06, "loss": 0.0026, "step": 21270 }, { "epoch": 0.3596325933937791, "grad_norm": 0.09082590788602829, "learning_rate": 9.807213524392889e-06, "loss": 0.002, "step": 21280 }, { "epoch": 0.35980159367262954, "grad_norm": 0.17046932876110077, "learning_rate": 9.806807729401756e-06, "loss": 0.003, "step": 21290 }, { "epoch": 0.35997059395148, "grad_norm": 0.155225470662117, "learning_rate": 9.806401516195066e-06, "loss": 0.0029, "step": 21300 }, { "epoch": 0.3601395942303305, "grad_norm": 0.03638127073645592, "learning_rate": 9.805994884808153e-06, "loss": 0.004, "step": 21310 }, { "epoch": 0.36030859450918096, "grad_norm": 0.12270484119653702, "learning_rate": 9.8055878352764e-06, "loss": 0.0022, "step": 21320 }, { "epoch": 0.3604775947880314, "grad_norm": 0.1206061840057373, "learning_rate": 9.805180367635222e-06, "loss": 0.003, "step": 21330 }, { "epoch": 0.36064659506688185, "grad_norm": 0.021965859457850456, "learning_rate": 9.804772481920071e-06, "loss": 0.0033, "step": 21340 }, { "epoch": 0.3608155953457323, "grad_norm": 0.14499451220035553, "learning_rate": 9.804364178166432e-06, "loss": 0.0041, "step": 21350 }, { "epoch": 0.3609845956245828, "grad_norm": 0.3501626253128052, "learning_rate": 9.803955456409834e-06, "loss": 0.0029, "step": 21360 }, { "epoch": 0.36115359590343327, "grad_norm": 0.19067667424678802, "learning_rate": 9.803546316685835e-06, "loss": 0.0158, "step": 21370 }, { "epoch": 0.3613225961822837, "grad_norm": 0.017335981130599976, "learning_rate": 9.803136759030034e-06, "loss": 0.0045, "step": 21380 }, { "epoch": 0.36149159646113416, "grad_norm": 0.04304839298129082, "learning_rate": 9.802726783478062e-06, "loss": 0.0027, "step": 21390 }, { "epoch": 0.36166059673998463, "grad_norm": 0.17117752134799957, "learning_rate": 9.802316390065589e-06, "loss": 0.0031, "step": 21400 }, { "epoch": 0.3618295970188351, "grad_norm": 0.09694608300924301, "learning_rate": 9.801905578828325e-06, "loss": 0.0033, "step": 21410 }, { "epoch": 0.3619985972976855, "grad_norm": 0.15579736232757568, "learning_rate": 9.801494349802008e-06, "loss": 0.0036, "step": 21420 }, { "epoch": 0.362167597576536, "grad_norm": 0.28429025411605835, "learning_rate": 9.80108270302242e-06, "loss": 0.0035, "step": 21430 }, { "epoch": 0.36233659785538647, "grad_norm": 0.01713419519364834, "learning_rate": 9.800670638525374e-06, "loss": 0.0031, "step": 21440 }, { "epoch": 0.36250559813423694, "grad_norm": 0.07234758883714676, "learning_rate": 9.800258156346722e-06, "loss": 0.0042, "step": 21450 }, { "epoch": 0.36267459841308736, "grad_norm": 0.08492135256528854, "learning_rate": 9.799845256522353e-06, "loss": 0.004, "step": 21460 }, { "epoch": 0.36284359869193783, "grad_norm": 0.18330654501914978, "learning_rate": 9.799431939088193e-06, "loss": 0.0027, "step": 21470 }, { "epoch": 0.3630125989707883, "grad_norm": 0.07547181844711304, "learning_rate": 9.799018204080198e-06, "loss": 0.0011, "step": 21480 }, { "epoch": 0.3631815992496388, "grad_norm": 0.044233452528715134, "learning_rate": 9.798604051534368e-06, "loss": 0.0024, "step": 21490 }, { "epoch": 0.36335059952848925, "grad_norm": 0.13739806413650513, "learning_rate": 9.798189481486738e-06, "loss": 0.0034, "step": 21500 }, { "epoch": 0.36351959980733967, "grad_norm": 0.024653052911162376, "learning_rate": 9.797774493973372e-06, "loss": 0.0019, "step": 21510 }, { "epoch": 0.36368860008619014, "grad_norm": 0.10872801393270493, "learning_rate": 9.797359089030381e-06, "loss": 0.0037, "step": 21520 }, { "epoch": 0.3638576003650406, "grad_norm": 0.17600472271442413, "learning_rate": 9.796943266693906e-06, "loss": 0.0022, "step": 21530 }, { "epoch": 0.3640266006438911, "grad_norm": 0.18142275512218475, "learning_rate": 9.796527027000123e-06, "loss": 0.0035, "step": 21540 }, { "epoch": 0.3641956009227415, "grad_norm": 0.08506744354963303, "learning_rate": 9.796110369985252e-06, "loss": 0.005, "step": 21550 }, { "epoch": 0.364364601201592, "grad_norm": 0.05040203779935837, "learning_rate": 9.795693295685538e-06, "loss": 0.0015, "step": 21560 }, { "epoch": 0.36453360148044245, "grad_norm": 0.060695599764585495, "learning_rate": 9.795275804137273e-06, "loss": 0.0022, "step": 21570 }, { "epoch": 0.3647026017592929, "grad_norm": 0.30580392479896545, "learning_rate": 9.79485789537678e-06, "loss": 0.0025, "step": 21580 }, { "epoch": 0.36487160203814334, "grad_norm": 0.20160751044750214, "learning_rate": 9.794439569440417e-06, "loss": 0.0025, "step": 21590 }, { "epoch": 0.3650406023169938, "grad_norm": 0.1361100822687149, "learning_rate": 9.794020826364584e-06, "loss": 0.0036, "step": 21600 }, { "epoch": 0.3652096025958443, "grad_norm": 0.06608045846223831, "learning_rate": 9.79360166618571e-06, "loss": 0.0032, "step": 21610 }, { "epoch": 0.36537860287469476, "grad_norm": 0.010083463042974472, "learning_rate": 9.793182088940266e-06, "loss": 0.0044, "step": 21620 }, { "epoch": 0.36554760315354523, "grad_norm": 0.09272641688585281, "learning_rate": 9.792762094664756e-06, "loss": 0.0034, "step": 21630 }, { "epoch": 0.36571660343239565, "grad_norm": 0.0999511182308197, "learning_rate": 9.792341683395723e-06, "loss": 0.003, "step": 21640 }, { "epoch": 0.3658856037112461, "grad_norm": 0.07313133031129837, "learning_rate": 9.791920855169744e-06, "loss": 0.0031, "step": 21650 }, { "epoch": 0.3660546039900966, "grad_norm": 0.07365882396697998, "learning_rate": 9.791499610023433e-06, "loss": 0.0026, "step": 21660 }, { "epoch": 0.36622360426894707, "grad_norm": 0.05429621785879135, "learning_rate": 9.79107794799344e-06, "loss": 0.0024, "step": 21670 }, { "epoch": 0.3663926045477975, "grad_norm": 0.13175803422927856, "learning_rate": 9.790655869116454e-06, "loss": 0.0033, "step": 21680 }, { "epoch": 0.36656160482664796, "grad_norm": 0.043493472039699554, "learning_rate": 9.790233373429195e-06, "loss": 0.0028, "step": 21690 }, { "epoch": 0.36673060510549843, "grad_norm": 0.10219153761863708, "learning_rate": 9.789810460968423e-06, "loss": 0.0035, "step": 21700 }, { "epoch": 0.3668996053843489, "grad_norm": 0.13374842703342438, "learning_rate": 9.789387131770936e-06, "loss": 0.003, "step": 21710 }, { "epoch": 0.3670686056631993, "grad_norm": 0.10844600200653076, "learning_rate": 9.78896338587356e-06, "loss": 0.0037, "step": 21720 }, { "epoch": 0.3672376059420498, "grad_norm": 0.03692597150802612, "learning_rate": 9.788539223313168e-06, "loss": 0.002, "step": 21730 }, { "epoch": 0.36740660622090027, "grad_norm": 0.06032506749033928, "learning_rate": 9.788114644126662e-06, "loss": 0.006, "step": 21740 }, { "epoch": 0.36757560649975074, "grad_norm": 0.017542537301778793, "learning_rate": 9.787689648350984e-06, "loss": 0.0017, "step": 21750 }, { "epoch": 0.36774460677860116, "grad_norm": 0.11518267542123795, "learning_rate": 9.78726423602311e-06, "loss": 0.0032, "step": 21760 }, { "epoch": 0.36791360705745163, "grad_norm": 0.25193750858306885, "learning_rate": 9.786838407180052e-06, "loss": 0.004, "step": 21770 }, { "epoch": 0.3680826073363021, "grad_norm": 0.5318714380264282, "learning_rate": 9.78641216185886e-06, "loss": 0.004, "step": 21780 }, { "epoch": 0.3682516076151526, "grad_norm": 0.16579753160476685, "learning_rate": 9.785985500096617e-06, "loss": 0.0024, "step": 21790 }, { "epoch": 0.36842060789400305, "grad_norm": 0.2203885316848755, "learning_rate": 9.78555842193045e-06, "loss": 0.0018, "step": 21800 }, { "epoch": 0.36858960817285347, "grad_norm": 0.09116717427968979, "learning_rate": 9.785130927397513e-06, "loss": 0.0067, "step": 21810 }, { "epoch": 0.36875860845170394, "grad_norm": 0.0862962082028389, "learning_rate": 9.784703016535e-06, "loss": 0.0036, "step": 21820 }, { "epoch": 0.3689276087305544, "grad_norm": 0.0602206252515316, "learning_rate": 9.784274689380142e-06, "loss": 0.0015, "step": 21830 }, { "epoch": 0.3690966090094049, "grad_norm": 0.11045362055301666, "learning_rate": 9.783845945970205e-06, "loss": 0.003, "step": 21840 }, { "epoch": 0.3692656092882553, "grad_norm": 0.12604741752147675, "learning_rate": 9.783416786342495e-06, "loss": 0.0032, "step": 21850 }, { "epoch": 0.3694346095671058, "grad_norm": 0.06938070058822632, "learning_rate": 9.782987210534348e-06, "loss": 0.003, "step": 21860 }, { "epoch": 0.36960360984595625, "grad_norm": 0.11994098871946335, "learning_rate": 9.782557218583138e-06, "loss": 0.0026, "step": 21870 }, { "epoch": 0.3697726101248067, "grad_norm": 0.43083980679512024, "learning_rate": 9.782126810526278e-06, "loss": 0.0031, "step": 21880 }, { "epoch": 0.36994161040365714, "grad_norm": 0.1065066009759903, "learning_rate": 9.781695986401217e-06, "loss": 0.0028, "step": 21890 }, { "epoch": 0.3701106106825076, "grad_norm": 0.04962790757417679, "learning_rate": 9.781264746245434e-06, "loss": 0.0032, "step": 21900 }, { "epoch": 0.3702796109613581, "grad_norm": 0.071550652384758, "learning_rate": 9.780833090096455e-06, "loss": 0.0024, "step": 21910 }, { "epoch": 0.37044861124020856, "grad_norm": 0.17351196706295013, "learning_rate": 9.780401017991833e-06, "loss": 0.004, "step": 21920 }, { "epoch": 0.37061761151905903, "grad_norm": 0.16681891679763794, "learning_rate": 9.77996852996916e-06, "loss": 0.0022, "step": 21930 }, { "epoch": 0.37078661179790945, "grad_norm": 0.078296959400177, "learning_rate": 9.779535626066067e-06, "loss": 0.004, "step": 21940 }, { "epoch": 0.3709556120767599, "grad_norm": 0.1080959290266037, "learning_rate": 9.779102306320219e-06, "loss": 0.0028, "step": 21950 }, { "epoch": 0.3711246123556104, "grad_norm": 0.19295884668827057, "learning_rate": 9.778668570769312e-06, "loss": 0.0036, "step": 21960 }, { "epoch": 0.37129361263446087, "grad_norm": 0.06714986264705658, "learning_rate": 9.778234419451087e-06, "loss": 0.0029, "step": 21970 }, { "epoch": 0.3714626129133113, "grad_norm": 0.15599657595157623, "learning_rate": 9.777799852403316e-06, "loss": 0.0033, "step": 21980 }, { "epoch": 0.37163161319216176, "grad_norm": 0.160843625664711, "learning_rate": 9.777364869663808e-06, "loss": 0.0047, "step": 21990 }, { "epoch": 0.37180061347101223, "grad_norm": 0.12721359729766846, "learning_rate": 9.776929471270411e-06, "loss": 0.0018, "step": 22000 }, { "epoch": 0.3719696137498627, "grad_norm": 0.08139853179454803, "learning_rate": 9.776493657261005e-06, "loss": 0.0044, "step": 22010 }, { "epoch": 0.3721386140287131, "grad_norm": 0.23349185287952423, "learning_rate": 9.776057427673508e-06, "loss": 0.0039, "step": 22020 }, { "epoch": 0.3723076143075636, "grad_norm": 0.13932804763317108, "learning_rate": 9.775620782545874e-06, "loss": 0.0043, "step": 22030 }, { "epoch": 0.37247661458641407, "grad_norm": 0.14340350031852722, "learning_rate": 9.775183721916094e-06, "loss": 0.0036, "step": 22040 }, { "epoch": 0.37264561486526454, "grad_norm": 0.14557211101055145, "learning_rate": 9.774746245822193e-06, "loss": 0.0025, "step": 22050 }, { "epoch": 0.372814615144115, "grad_norm": 0.06957050412893295, "learning_rate": 9.774308354302236e-06, "loss": 0.0024, "step": 22060 }, { "epoch": 0.37298361542296543, "grad_norm": 0.059090156108140945, "learning_rate": 9.77387004739432e-06, "loss": 0.0028, "step": 22070 }, { "epoch": 0.3731526157018159, "grad_norm": 0.42358991503715515, "learning_rate": 9.773431325136577e-06, "loss": 0.0051, "step": 22080 }, { "epoch": 0.3733216159806664, "grad_norm": 0.0769067034125328, "learning_rate": 9.772992187567183e-06, "loss": 0.0047, "step": 22090 }, { "epoch": 0.37349061625951685, "grad_norm": 0.15835249423980713, "learning_rate": 9.772552634724345e-06, "loss": 0.0024, "step": 22100 }, { "epoch": 0.37365961653836727, "grad_norm": 0.04665343463420868, "learning_rate": 9.772112666646302e-06, "loss": 0.0029, "step": 22110 }, { "epoch": 0.37382861681721774, "grad_norm": 0.0746210366487503, "learning_rate": 9.771672283371337e-06, "loss": 0.0033, "step": 22120 }, { "epoch": 0.3739976170960682, "grad_norm": 0.022807767614722252, "learning_rate": 9.771231484937762e-06, "loss": 0.002, "step": 22130 }, { "epoch": 0.3741666173749187, "grad_norm": 0.014665382914245129, "learning_rate": 9.770790271383931e-06, "loss": 0.0027, "step": 22140 }, { "epoch": 0.3743356176537691, "grad_norm": 0.09234453737735748, "learning_rate": 9.770348642748231e-06, "loss": 0.0021, "step": 22150 }, { "epoch": 0.3745046179326196, "grad_norm": 0.0824594646692276, "learning_rate": 9.769906599069088e-06, "loss": 0.0033, "step": 22160 }, { "epoch": 0.37467361821147005, "grad_norm": 0.0873909667134285, "learning_rate": 9.76946414038496e-06, "loss": 0.0043, "step": 22170 }, { "epoch": 0.3748426184903205, "grad_norm": 0.10788635909557343, "learning_rate": 9.769021266734343e-06, "loss": 0.0015, "step": 22180 }, { "epoch": 0.375011618769171, "grad_norm": 0.10120099037885666, "learning_rate": 9.768577978155769e-06, "loss": 0.0029, "step": 22190 }, { "epoch": 0.3751806190480214, "grad_norm": 0.09097573161125183, "learning_rate": 9.768134274687806e-06, "loss": 0.0031, "step": 22200 }, { "epoch": 0.3753496193268719, "grad_norm": 0.11442361027002335, "learning_rate": 9.767690156369059e-06, "loss": 0.0026, "step": 22210 }, { "epoch": 0.37551861960572236, "grad_norm": 0.19592778384685516, "learning_rate": 9.767245623238169e-06, "loss": 0.0027, "step": 22220 }, { "epoch": 0.37568761988457283, "grad_norm": 0.09860577434301376, "learning_rate": 9.766800675333812e-06, "loss": 0.0033, "step": 22230 }, { "epoch": 0.37585662016342325, "grad_norm": 0.06487202644348145, "learning_rate": 9.7663553126947e-06, "loss": 0.0027, "step": 22240 }, { "epoch": 0.3760256204422737, "grad_norm": 0.24659186601638794, "learning_rate": 9.765909535359584e-06, "loss": 0.0025, "step": 22250 }, { "epoch": 0.3761946207211242, "grad_norm": 0.05501728877425194, "learning_rate": 9.765463343367245e-06, "loss": 0.0029, "step": 22260 }, { "epoch": 0.37636362099997467, "grad_norm": 0.04128387197852135, "learning_rate": 9.765016736756506e-06, "loss": 0.0024, "step": 22270 }, { "epoch": 0.3765326212788251, "grad_norm": 0.4396528899669647, "learning_rate": 9.764569715566224e-06, "loss": 0.0033, "step": 22280 }, { "epoch": 0.37670162155767556, "grad_norm": 0.09631740301847458, "learning_rate": 9.764122279835293e-06, "loss": 0.0037, "step": 22290 }, { "epoch": 0.37687062183652603, "grad_norm": 0.04098780080676079, "learning_rate": 9.76367442960264e-06, "loss": 0.0029, "step": 22300 }, { "epoch": 0.3770396221153765, "grad_norm": 0.03507139906287193, "learning_rate": 9.763226164907231e-06, "loss": 0.0034, "step": 22310 }, { "epoch": 0.377208622394227, "grad_norm": 0.06853745877742767, "learning_rate": 9.762777485788069e-06, "loss": 0.0017, "step": 22320 }, { "epoch": 0.3773776226730774, "grad_norm": 0.12593597173690796, "learning_rate": 9.762328392284188e-06, "loss": 0.0032, "step": 22330 }, { "epoch": 0.37754662295192787, "grad_norm": 0.12185325473546982, "learning_rate": 9.761878884434663e-06, "loss": 0.0023, "step": 22340 }, { "epoch": 0.37771562323077834, "grad_norm": 0.024328766390681267, "learning_rate": 9.761428962278603e-06, "loss": 0.0024, "step": 22350 }, { "epoch": 0.3778846235096288, "grad_norm": 0.23012223839759827, "learning_rate": 9.760978625855154e-06, "loss": 0.0019, "step": 22360 }, { "epoch": 0.37805362378847923, "grad_norm": 0.03638390824198723, "learning_rate": 9.760527875203498e-06, "loss": 0.0041, "step": 22370 }, { "epoch": 0.3782226240673297, "grad_norm": 0.10978236794471741, "learning_rate": 9.76007671036285e-06, "loss": 0.0025, "step": 22380 }, { "epoch": 0.3783916243461802, "grad_norm": 0.02126036025583744, "learning_rate": 9.759625131372466e-06, "loss": 0.0025, "step": 22390 }, { "epoch": 0.37856062462503065, "grad_norm": 0.04835692048072815, "learning_rate": 9.759173138271634e-06, "loss": 0.0027, "step": 22400 }, { "epoch": 0.37872962490388107, "grad_norm": 0.12906229496002197, "learning_rate": 9.75872073109968e-06, "loss": 0.0028, "step": 22410 }, { "epoch": 0.37889862518273154, "grad_norm": 0.3933924436569214, "learning_rate": 9.758267909895966e-06, "loss": 0.0021, "step": 22420 }, { "epoch": 0.379067625461582, "grad_norm": 0.08228648453950882, "learning_rate": 9.757814674699891e-06, "loss": 0.0037, "step": 22430 }, { "epoch": 0.3792366257404325, "grad_norm": 0.1131923496723175, "learning_rate": 9.757361025550887e-06, "loss": 0.0032, "step": 22440 }, { "epoch": 0.3794056260192829, "grad_norm": 0.11813594400882721, "learning_rate": 9.756906962488421e-06, "loss": 0.0029, "step": 22450 }, { "epoch": 0.3795746262981334, "grad_norm": 0.05745278298854828, "learning_rate": 9.756452485552005e-06, "loss": 0.0016, "step": 22460 }, { "epoch": 0.37974362657698385, "grad_norm": 0.07140239328145981, "learning_rate": 9.755997594781175e-06, "loss": 0.0034, "step": 22470 }, { "epoch": 0.3799126268558343, "grad_norm": 0.06921084225177765, "learning_rate": 9.755542290215512e-06, "loss": 0.003, "step": 22480 }, { "epoch": 0.3800816271346848, "grad_norm": 0.050568293780088425, "learning_rate": 9.755086571894629e-06, "loss": 0.0024, "step": 22490 }, { "epoch": 0.3802506274135352, "grad_norm": 0.19204530119895935, "learning_rate": 9.754630439858175e-06, "loss": 0.0035, "step": 22500 }, { "epoch": 0.3804196276923857, "grad_norm": 0.016198497265577316, "learning_rate": 9.754173894145835e-06, "loss": 0.0018, "step": 22510 }, { "epoch": 0.38058862797123616, "grad_norm": 0.14448107779026031, "learning_rate": 9.753716934797333e-06, "loss": 0.003, "step": 22520 }, { "epoch": 0.38075762825008663, "grad_norm": 0.11803316324949265, "learning_rate": 9.753259561852424e-06, "loss": 0.0044, "step": 22530 }, { "epoch": 0.38092662852893705, "grad_norm": 0.3361509144306183, "learning_rate": 9.752801775350904e-06, "loss": 0.0043, "step": 22540 }, { "epoch": 0.3810956288077875, "grad_norm": 0.09761517494916916, "learning_rate": 9.752343575332602e-06, "loss": 0.0036, "step": 22550 }, { "epoch": 0.381264629086638, "grad_norm": 0.05435268580913544, "learning_rate": 9.75188496183738e-06, "loss": 0.0015, "step": 22560 }, { "epoch": 0.38143362936548847, "grad_norm": 0.15113788843154907, "learning_rate": 9.751425934905148e-06, "loss": 0.0027, "step": 22570 }, { "epoch": 0.3816026296443389, "grad_norm": 0.21489611268043518, "learning_rate": 9.750966494575835e-06, "loss": 0.0021, "step": 22580 }, { "epoch": 0.38177162992318936, "grad_norm": 0.12411852180957794, "learning_rate": 9.750506640889418e-06, "loss": 0.0036, "step": 22590 }, { "epoch": 0.38194063020203983, "grad_norm": 0.06043042987585068, "learning_rate": 9.750046373885909e-06, "loss": 0.003, "step": 22600 }, { "epoch": 0.3821096304808903, "grad_norm": 0.04677782580256462, "learning_rate": 9.74958569360535e-06, "loss": 0.0026, "step": 22610 }, { "epoch": 0.3822786307597408, "grad_norm": 0.1353975385427475, "learning_rate": 9.749124600087822e-06, "loss": 0.0024, "step": 22620 }, { "epoch": 0.3824476310385912, "grad_norm": 0.19661273062229156, "learning_rate": 9.748663093373445e-06, "loss": 0.0042, "step": 22630 }, { "epoch": 0.38261663131744167, "grad_norm": 0.0980839729309082, "learning_rate": 9.748201173502372e-06, "loss": 0.0033, "step": 22640 }, { "epoch": 0.38278563159629214, "grad_norm": 0.17554502189159393, "learning_rate": 9.747738840514789e-06, "loss": 0.0024, "step": 22650 }, { "epoch": 0.3829546318751426, "grad_norm": 0.09802938252687454, "learning_rate": 9.747276094450924e-06, "loss": 0.0022, "step": 22660 }, { "epoch": 0.38312363215399303, "grad_norm": 0.11497267335653305, "learning_rate": 9.74681293535104e-06, "loss": 0.004, "step": 22670 }, { "epoch": 0.3832926324328435, "grad_norm": 0.17812427878379822, "learning_rate": 9.74634936325543e-06, "loss": 0.007, "step": 22680 }, { "epoch": 0.383461632711694, "grad_norm": 0.21699292957782745, "learning_rate": 9.745885378204427e-06, "loss": 0.002, "step": 22690 }, { "epoch": 0.38363063299054445, "grad_norm": 0.18406936526298523, "learning_rate": 9.745420980238405e-06, "loss": 0.0044, "step": 22700 }, { "epoch": 0.38379963326939487, "grad_norm": 0.039109811186790466, "learning_rate": 9.744956169397764e-06, "loss": 0.0017, "step": 22710 }, { "epoch": 0.38396863354824534, "grad_norm": 0.23905335366725922, "learning_rate": 9.744490945722947e-06, "loss": 0.003, "step": 22720 }, { "epoch": 0.3841376338270958, "grad_norm": 0.17291510105133057, "learning_rate": 9.74402530925443e-06, "loss": 0.0041, "step": 22730 }, { "epoch": 0.3843066341059463, "grad_norm": 0.21678727865219116, "learning_rate": 9.743559260032727e-06, "loss": 0.0031, "step": 22740 }, { "epoch": 0.38447563438479676, "grad_norm": 0.23328426480293274, "learning_rate": 9.743092798098383e-06, "loss": 0.0052, "step": 22750 }, { "epoch": 0.3846446346636472, "grad_norm": 0.19061240553855896, "learning_rate": 9.742625923491986e-06, "loss": 0.0027, "step": 22760 }, { "epoch": 0.38481363494249765, "grad_norm": 0.03589209169149399, "learning_rate": 9.742158636254155e-06, "loss": 0.003, "step": 22770 }, { "epoch": 0.3849826352213481, "grad_norm": 0.03044353425502777, "learning_rate": 9.741690936425545e-06, "loss": 0.0024, "step": 22780 }, { "epoch": 0.3851516355001986, "grad_norm": 0.09414897114038467, "learning_rate": 9.741222824046853e-06, "loss": 0.002, "step": 22790 }, { "epoch": 0.385320635779049, "grad_norm": 0.09415625780820847, "learning_rate": 9.740754299158799e-06, "loss": 0.0024, "step": 22800 }, { "epoch": 0.3854896360578995, "grad_norm": 0.1006363034248352, "learning_rate": 9.740285361802154e-06, "loss": 0.003, "step": 22810 }, { "epoch": 0.38565863633674996, "grad_norm": 0.21563363075256348, "learning_rate": 9.739816012017715e-06, "loss": 0.0042, "step": 22820 }, { "epoch": 0.38582763661560043, "grad_norm": 0.31482458114624023, "learning_rate": 9.73934624984632e-06, "loss": 0.0038, "step": 22830 }, { "epoch": 0.38599663689445085, "grad_norm": 0.13808608055114746, "learning_rate": 9.738876075328836e-06, "loss": 0.0033, "step": 22840 }, { "epoch": 0.3861656371733013, "grad_norm": 0.0663992166519165, "learning_rate": 9.738405488506173e-06, "loss": 0.0031, "step": 22850 }, { "epoch": 0.3863346374521518, "grad_norm": 0.0355796180665493, "learning_rate": 9.737934489419276e-06, "loss": 0.0029, "step": 22860 }, { "epoch": 0.38650363773100227, "grad_norm": 0.12239819765090942, "learning_rate": 9.737463078109121e-06, "loss": 0.003, "step": 22870 }, { "epoch": 0.38667263800985274, "grad_norm": 0.08600953966379166, "learning_rate": 9.736991254616725e-06, "loss": 0.0017, "step": 22880 }, { "epoch": 0.38684163828870316, "grad_norm": 0.08243556320667267, "learning_rate": 9.73651901898314e-06, "loss": 0.0027, "step": 22890 }, { "epoch": 0.38701063856755363, "grad_norm": 0.1894107311964035, "learning_rate": 9.73604637124945e-06, "loss": 0.0034, "step": 22900 }, { "epoch": 0.3871796388464041, "grad_norm": 0.09860440343618393, "learning_rate": 9.73557331145678e-06, "loss": 0.0029, "step": 22910 }, { "epoch": 0.3873486391252546, "grad_norm": 0.09986410290002823, "learning_rate": 9.735099839646286e-06, "loss": 0.0031, "step": 22920 }, { "epoch": 0.387517639404105, "grad_norm": 0.0639401376247406, "learning_rate": 9.734625955859164e-06, "loss": 0.0016, "step": 22930 }, { "epoch": 0.38768663968295547, "grad_norm": 0.06922442466020584, "learning_rate": 9.734151660136645e-06, "loss": 0.003, "step": 22940 }, { "epoch": 0.38785563996180594, "grad_norm": 0.020561840385198593, "learning_rate": 9.733676952519995e-06, "loss": 0.0044, "step": 22950 }, { "epoch": 0.3880246402406564, "grad_norm": 0.09675197303295135, "learning_rate": 9.733201833050513e-06, "loss": 0.0019, "step": 22960 }, { "epoch": 0.38819364051950683, "grad_norm": 0.0909353494644165, "learning_rate": 9.73272630176954e-06, "loss": 0.005, "step": 22970 }, { "epoch": 0.3883626407983573, "grad_norm": 0.07992875576019287, "learning_rate": 9.732250358718448e-06, "loss": 0.0026, "step": 22980 }, { "epoch": 0.3885316410772078, "grad_norm": 0.0822548121213913, "learning_rate": 9.731774003938645e-06, "loss": 0.0042, "step": 22990 }, { "epoch": 0.38870064135605825, "grad_norm": 0.09815090894699097, "learning_rate": 9.73129723747158e-06, "loss": 0.0039, "step": 23000 }, { "epoch": 0.3888696416349087, "grad_norm": 0.15999223291873932, "learning_rate": 9.73082005935873e-06, "loss": 0.0038, "step": 23010 }, { "epoch": 0.38903864191375914, "grad_norm": 0.14762641489505768, "learning_rate": 9.730342469641614e-06, "loss": 0.0036, "step": 23020 }, { "epoch": 0.3892076421926096, "grad_norm": 0.1708577424287796, "learning_rate": 9.729864468361786e-06, "loss": 0.0024, "step": 23030 }, { "epoch": 0.3893766424714601, "grad_norm": 0.12733496725559235, "learning_rate": 9.72938605556083e-06, "loss": 0.0033, "step": 23040 }, { "epoch": 0.38954564275031056, "grad_norm": 0.20779503881931305, "learning_rate": 9.728907231280373e-06, "loss": 0.003, "step": 23050 }, { "epoch": 0.389714643029161, "grad_norm": 0.17399100959300995, "learning_rate": 9.728427995562076e-06, "loss": 0.0016, "step": 23060 }, { "epoch": 0.38988364330801145, "grad_norm": 0.022260790690779686, "learning_rate": 9.727948348447632e-06, "loss": 0.0022, "step": 23070 }, { "epoch": 0.3900526435868619, "grad_norm": 0.1329025775194168, "learning_rate": 9.727468289978774e-06, "loss": 0.0035, "step": 23080 }, { "epoch": 0.3902216438657124, "grad_norm": 0.12162362039089203, "learning_rate": 9.726987820197271e-06, "loss": 0.0034, "step": 23090 }, { "epoch": 0.3903906441445628, "grad_norm": 0.12319975346326828, "learning_rate": 9.726506939144925e-06, "loss": 0.0037, "step": 23100 }, { "epoch": 0.3905596444234133, "grad_norm": 0.2242601066827774, "learning_rate": 9.726025646863574e-06, "loss": 0.0038, "step": 23110 }, { "epoch": 0.39072864470226376, "grad_norm": 0.23295743763446808, "learning_rate": 9.725543943395094e-06, "loss": 0.0023, "step": 23120 }, { "epoch": 0.39089764498111423, "grad_norm": 0.20486986637115479, "learning_rate": 9.725061828781396e-06, "loss": 0.0031, "step": 23130 }, { "epoch": 0.39106664525996465, "grad_norm": 0.10565141588449478, "learning_rate": 9.724579303064425e-06, "loss": 0.002, "step": 23140 }, { "epoch": 0.3912356455388151, "grad_norm": 0.07504316419363022, "learning_rate": 9.724096366286162e-06, "loss": 0.0038, "step": 23150 }, { "epoch": 0.3914046458176656, "grad_norm": 0.11156802624464035, "learning_rate": 9.723613018488629e-06, "loss": 0.0034, "step": 23160 }, { "epoch": 0.39157364609651607, "grad_norm": 0.09637878090143204, "learning_rate": 9.723129259713876e-06, "loss": 0.0029, "step": 23170 }, { "epoch": 0.39174264637536654, "grad_norm": 0.18587429821491241, "learning_rate": 9.722645090003992e-06, "loss": 0.0035, "step": 23180 }, { "epoch": 0.39191164665421696, "grad_norm": 0.08628827333450317, "learning_rate": 9.722160509401104e-06, "loss": 0.0035, "step": 23190 }, { "epoch": 0.39208064693306743, "grad_norm": 0.060949645936489105, "learning_rate": 9.721675517947373e-06, "loss": 0.002, "step": 23200 }, { "epoch": 0.3922496472119179, "grad_norm": 0.13335323333740234, "learning_rate": 9.721190115684994e-06, "loss": 0.0026, "step": 23210 }, { "epoch": 0.3924186474907684, "grad_norm": 0.10148081183433533, "learning_rate": 9.720704302656201e-06, "loss": 0.0044, "step": 23220 }, { "epoch": 0.3925876477696188, "grad_norm": 0.11652295291423798, "learning_rate": 9.72021807890326e-06, "loss": 0.0023, "step": 23230 }, { "epoch": 0.39275664804846927, "grad_norm": 0.16628910601139069, "learning_rate": 9.719731444468478e-06, "loss": 0.0063, "step": 23240 }, { "epoch": 0.39292564832731974, "grad_norm": 0.08759075403213501, "learning_rate": 9.719244399394192e-06, "loss": 0.0035, "step": 23250 }, { "epoch": 0.3930946486061702, "grad_norm": 0.20623010396957397, "learning_rate": 9.718756943722779e-06, "loss": 0.0031, "step": 23260 }, { "epoch": 0.39326364888502063, "grad_norm": 0.15852336585521698, "learning_rate": 9.718269077496645e-06, "loss": 0.0029, "step": 23270 }, { "epoch": 0.3934326491638711, "grad_norm": 0.1342359483242035, "learning_rate": 9.717780800758245e-06, "loss": 0.0033, "step": 23280 }, { "epoch": 0.3936016494427216, "grad_norm": 0.05740807205438614, "learning_rate": 9.717292113550055e-06, "loss": 0.0045, "step": 23290 }, { "epoch": 0.39377064972157205, "grad_norm": 0.08184656500816345, "learning_rate": 9.716803015914597e-06, "loss": 0.0026, "step": 23300 }, { "epoch": 0.3939396500004225, "grad_norm": 0.10684201121330261, "learning_rate": 9.716313507894422e-06, "loss": 0.0026, "step": 23310 }, { "epoch": 0.39410865027927294, "grad_norm": 0.19640758633613586, "learning_rate": 9.715823589532121e-06, "loss": 0.0034, "step": 23320 }, { "epoch": 0.3942776505581234, "grad_norm": 0.059934161603450775, "learning_rate": 9.715333260870319e-06, "loss": 0.0021, "step": 23330 }, { "epoch": 0.3944466508369739, "grad_norm": 0.08852091431617737, "learning_rate": 9.714842521951676e-06, "loss": 0.0022, "step": 23340 }, { "epoch": 0.39461565111582436, "grad_norm": 0.1293201893568039, "learning_rate": 9.714351372818891e-06, "loss": 0.002, "step": 23350 }, { "epoch": 0.3947846513946748, "grad_norm": 0.08041828870773315, "learning_rate": 9.713859813514695e-06, "loss": 0.0022, "step": 23360 }, { "epoch": 0.39495365167352525, "grad_norm": 0.0484473817050457, "learning_rate": 9.713367844081856e-06, "loss": 0.0042, "step": 23370 }, { "epoch": 0.3951226519523757, "grad_norm": 0.06702147424221039, "learning_rate": 9.712875464563177e-06, "loss": 0.0033, "step": 23380 }, { "epoch": 0.3952916522312262, "grad_norm": 0.08013001084327698, "learning_rate": 9.712382675001499e-06, "loss": 0.0028, "step": 23390 }, { "epoch": 0.3954606525100766, "grad_norm": 0.035971228033304214, "learning_rate": 9.711889475439696e-06, "loss": 0.0033, "step": 23400 }, { "epoch": 0.3956296527889271, "grad_norm": 0.060159262269735336, "learning_rate": 9.711395865920679e-06, "loss": 0.0028, "step": 23410 }, { "epoch": 0.39579865306777756, "grad_norm": 0.15951140224933624, "learning_rate": 9.710901846487394e-06, "loss": 0.0039, "step": 23420 }, { "epoch": 0.39596765334662803, "grad_norm": 0.18661275506019592, "learning_rate": 9.710407417182824e-06, "loss": 0.003, "step": 23430 }, { "epoch": 0.3961366536254785, "grad_norm": 0.19150663912296295, "learning_rate": 9.709912578049986e-06, "loss": 0.0048, "step": 23440 }, { "epoch": 0.3963056539043289, "grad_norm": 0.02610347792506218, "learning_rate": 9.709417329131933e-06, "loss": 0.0022, "step": 23450 }, { "epoch": 0.3964746541831794, "grad_norm": 0.16545186936855316, "learning_rate": 9.708921670471755e-06, "loss": 0.0039, "step": 23460 }, { "epoch": 0.39664365446202987, "grad_norm": 0.07641608268022537, "learning_rate": 9.708425602112576e-06, "loss": 0.002, "step": 23470 }, { "epoch": 0.39681265474088034, "grad_norm": 0.04412617161870003, "learning_rate": 9.707929124097559e-06, "loss": 0.002, "step": 23480 }, { "epoch": 0.39698165501973076, "grad_norm": 0.15296491980552673, "learning_rate": 9.707432236469897e-06, "loss": 0.0034, "step": 23490 }, { "epoch": 0.39715065529858123, "grad_norm": 0.07034008949995041, "learning_rate": 9.70693493927282e-06, "loss": 0.0038, "step": 23500 }, { "epoch": 0.3973196555774317, "grad_norm": 0.13851159811019897, "learning_rate": 9.706437232549601e-06, "loss": 0.0035, "step": 23510 }, { "epoch": 0.3974886558562822, "grad_norm": 0.13486424088478088, "learning_rate": 9.705939116343538e-06, "loss": 0.0028, "step": 23520 }, { "epoch": 0.3976576561351326, "grad_norm": 0.0902167558670044, "learning_rate": 9.70544059069797e-06, "loss": 0.0043, "step": 23530 }, { "epoch": 0.39782665641398307, "grad_norm": 0.0866253599524498, "learning_rate": 9.704941655656274e-06, "loss": 0.0035, "step": 23540 }, { "epoch": 0.39799565669283354, "grad_norm": 0.06707198917865753, "learning_rate": 9.704442311261856e-06, "loss": 0.0035, "step": 23550 }, { "epoch": 0.398164656971684, "grad_norm": 0.11433298140764236, "learning_rate": 9.703942557558166e-06, "loss": 0.0038, "step": 23560 }, { "epoch": 0.3983336572505345, "grad_norm": 0.14999189972877502, "learning_rate": 9.70344239458868e-06, "loss": 0.0045, "step": 23570 }, { "epoch": 0.3985026575293849, "grad_norm": 0.08040648698806763, "learning_rate": 9.702941822396918e-06, "loss": 0.0024, "step": 23580 }, { "epoch": 0.3986716578082354, "grad_norm": 0.05942732095718384, "learning_rate": 9.702440841026433e-06, "loss": 0.0024, "step": 23590 }, { "epoch": 0.39884065808708585, "grad_norm": 0.055566608905792236, "learning_rate": 9.701939450520808e-06, "loss": 0.0038, "step": 23600 }, { "epoch": 0.3990096583659363, "grad_norm": 0.10234656929969788, "learning_rate": 9.70143765092367e-06, "loss": 0.0047, "step": 23610 }, { "epoch": 0.39917865864478674, "grad_norm": 0.04624078422784805, "learning_rate": 9.700935442278678e-06, "loss": 0.0024, "step": 23620 }, { "epoch": 0.3993476589236372, "grad_norm": 0.09292671829462051, "learning_rate": 9.700432824629526e-06, "loss": 0.0026, "step": 23630 }, { "epoch": 0.3995166592024877, "grad_norm": 0.12578187882900238, "learning_rate": 9.699929798019944e-06, "loss": 0.0041, "step": 23640 }, { "epoch": 0.39968565948133816, "grad_norm": 0.08320990949869156, "learning_rate": 9.699426362493698e-06, "loss": 0.0033, "step": 23650 }, { "epoch": 0.3998546597601886, "grad_norm": 0.38111990690231323, "learning_rate": 9.698922518094588e-06, "loss": 0.0086, "step": 23660 }, { "epoch": 0.40002366003903905, "grad_norm": 0.10134875774383545, "learning_rate": 9.698418264866455e-06, "loss": 0.0041, "step": 23670 }, { "epoch": 0.4001926603178895, "grad_norm": 0.1464819759130478, "learning_rate": 9.697913602853165e-06, "loss": 0.0028, "step": 23680 }, { "epoch": 0.40036166059674, "grad_norm": 0.06998094916343689, "learning_rate": 9.697408532098633e-06, "loss": 0.0036, "step": 23690 }, { "epoch": 0.40053066087559047, "grad_norm": 0.14025792479515076, "learning_rate": 9.696903052646798e-06, "loss": 0.0035, "step": 23700 }, { "epoch": 0.4006996611544409, "grad_norm": 0.09845133870840073, "learning_rate": 9.69639716454164e-06, "loss": 0.0037, "step": 23710 }, { "epoch": 0.40086866143329136, "grad_norm": 0.11155934631824493, "learning_rate": 9.695890867827173e-06, "loss": 0.0038, "step": 23720 }, { "epoch": 0.40103766171214184, "grad_norm": 0.20432689785957336, "learning_rate": 9.69538416254745e-06, "loss": 0.0049, "step": 23730 }, { "epoch": 0.4012066619909923, "grad_norm": 0.011552118696272373, "learning_rate": 9.694877048746556e-06, "loss": 0.0036, "step": 23740 }, { "epoch": 0.4013756622698427, "grad_norm": 0.053301118314266205, "learning_rate": 9.69436952646861e-06, "loss": 0.0023, "step": 23750 }, { "epoch": 0.4015446625486932, "grad_norm": 0.09333498030900955, "learning_rate": 9.693861595757771e-06, "loss": 0.0019, "step": 23760 }, { "epoch": 0.40171366282754367, "grad_norm": 0.06840863823890686, "learning_rate": 9.693353256658231e-06, "loss": 0.0029, "step": 23770 }, { "epoch": 0.40188266310639414, "grad_norm": 0.02834627963602543, "learning_rate": 9.692844509214216e-06, "loss": 0.0012, "step": 23780 }, { "epoch": 0.40205166338524456, "grad_norm": 0.16074758768081665, "learning_rate": 9.692335353469994e-06, "loss": 0.0031, "step": 23790 }, { "epoch": 0.40222066366409504, "grad_norm": 0.0556454062461853, "learning_rate": 9.69182578946986e-06, "loss": 0.0035, "step": 23800 }, { "epoch": 0.4023896639429455, "grad_norm": 0.1124231368303299, "learning_rate": 9.691315817258151e-06, "loss": 0.0036, "step": 23810 }, { "epoch": 0.402558664221796, "grad_norm": 0.09787849336862564, "learning_rate": 9.690805436879234e-06, "loss": 0.0069, "step": 23820 }, { "epoch": 0.4027276645006464, "grad_norm": 0.06661677360534668, "learning_rate": 9.690294648377519e-06, "loss": 0.0029, "step": 23830 }, { "epoch": 0.40289666477949687, "grad_norm": 0.1677347719669342, "learning_rate": 9.689783451797444e-06, "loss": 0.0019, "step": 23840 }, { "epoch": 0.40306566505834734, "grad_norm": 0.09581337124109268, "learning_rate": 9.689271847183483e-06, "loss": 0.0016, "step": 23850 }, { "epoch": 0.4032346653371978, "grad_norm": 0.05288088321685791, "learning_rate": 9.688759834580154e-06, "loss": 0.0024, "step": 23860 }, { "epoch": 0.4034036656160483, "grad_norm": 0.192446768283844, "learning_rate": 9.688247414032002e-06, "loss": 0.0025, "step": 23870 }, { "epoch": 0.4035726658948987, "grad_norm": 0.11666693538427353, "learning_rate": 9.687734585583609e-06, "loss": 0.004, "step": 23880 }, { "epoch": 0.4037416661737492, "grad_norm": 0.10450201481580734, "learning_rate": 9.687221349279596e-06, "loss": 0.0038, "step": 23890 }, { "epoch": 0.40391066645259965, "grad_norm": 0.07050692290067673, "learning_rate": 9.686707705164613e-06, "loss": 0.0032, "step": 23900 }, { "epoch": 0.4040796667314501, "grad_norm": 0.16578009724617004, "learning_rate": 9.686193653283354e-06, "loss": 0.0034, "step": 23910 }, { "epoch": 0.40424866701030054, "grad_norm": 0.15062333643436432, "learning_rate": 9.685679193680542e-06, "loss": 0.0027, "step": 23920 }, { "epoch": 0.404417667289151, "grad_norm": 0.06264977157115936, "learning_rate": 9.685164326400936e-06, "loss": 0.0016, "step": 23930 }, { "epoch": 0.4045866675680015, "grad_norm": 0.32454973459243774, "learning_rate": 9.684649051489335e-06, "loss": 0.0028, "step": 23940 }, { "epoch": 0.40475566784685196, "grad_norm": 0.08096280694007874, "learning_rate": 9.684133368990567e-06, "loss": 0.0028, "step": 23950 }, { "epoch": 0.4049246681257024, "grad_norm": 0.21466168761253357, "learning_rate": 9.683617278949501e-06, "loss": 0.0031, "step": 23960 }, { "epoch": 0.40509366840455285, "grad_norm": 0.16156120598316193, "learning_rate": 9.68310078141104e-06, "loss": 0.0027, "step": 23970 }, { "epoch": 0.4052626686834033, "grad_norm": 0.09032265841960907, "learning_rate": 9.682583876420121e-06, "loss": 0.0028, "step": 23980 }, { "epoch": 0.4054316689622538, "grad_norm": 0.25376391410827637, "learning_rate": 9.68206656402172e-06, "loss": 0.0024, "step": 23990 }, { "epoch": 0.4056006692411043, "grad_norm": 0.23052674531936646, "learning_rate": 9.681548844260839e-06, "loss": 0.0041, "step": 24000 }, { "epoch": 0.4057696695199547, "grad_norm": 0.1080017164349556, "learning_rate": 9.681030717182527e-06, "loss": 0.0037, "step": 24010 }, { "epoch": 0.40593866979880516, "grad_norm": 0.07792221754789352, "learning_rate": 9.680512182831861e-06, "loss": 0.0033, "step": 24020 }, { "epoch": 0.40610767007765564, "grad_norm": 0.24158845841884613, "learning_rate": 9.67999324125396e-06, "loss": 0.0043, "step": 24030 }, { "epoch": 0.4062766703565061, "grad_norm": 0.135731503367424, "learning_rate": 9.67947389249397e-06, "loss": 0.003, "step": 24040 }, { "epoch": 0.4064456706353565, "grad_norm": 0.07237989455461502, "learning_rate": 9.678954136597079e-06, "loss": 0.0035, "step": 24050 }, { "epoch": 0.406614670914207, "grad_norm": 0.04045663774013519, "learning_rate": 9.678433973608508e-06, "loss": 0.0033, "step": 24060 }, { "epoch": 0.40678367119305747, "grad_norm": 0.8070462346076965, "learning_rate": 9.677913403573516e-06, "loss": 0.0027, "step": 24070 }, { "epoch": 0.40695267147190795, "grad_norm": 0.2314949482679367, "learning_rate": 9.677392426537391e-06, "loss": 0.0038, "step": 24080 }, { "epoch": 0.40712167175075836, "grad_norm": 0.04682336747646332, "learning_rate": 9.676871042545462e-06, "loss": 0.0018, "step": 24090 }, { "epoch": 0.40729067202960884, "grad_norm": 0.11390011757612228, "learning_rate": 9.676349251643094e-06, "loss": 0.0026, "step": 24100 }, { "epoch": 0.4074596723084593, "grad_norm": 0.07852821052074432, "learning_rate": 9.675827053875682e-06, "loss": 0.0028, "step": 24110 }, { "epoch": 0.4076286725873098, "grad_norm": 0.13662543892860413, "learning_rate": 9.675304449288662e-06, "loss": 0.0026, "step": 24120 }, { "epoch": 0.40779767286616025, "grad_norm": 0.6542212963104248, "learning_rate": 9.674781437927501e-06, "loss": 0.003, "step": 24130 }, { "epoch": 0.40796667314501067, "grad_norm": 0.15434002876281738, "learning_rate": 9.674258019837708e-06, "loss": 0.0035, "step": 24140 }, { "epoch": 0.40813567342386115, "grad_norm": 0.20408910512924194, "learning_rate": 9.673734195064818e-06, "loss": 0.0057, "step": 24150 }, { "epoch": 0.4083046737027116, "grad_norm": 0.15168999135494232, "learning_rate": 9.673209963654409e-06, "loss": 0.0039, "step": 24160 }, { "epoch": 0.4084736739815621, "grad_norm": 0.018376335501670837, "learning_rate": 9.672685325652091e-06, "loss": 0.0037, "step": 24170 }, { "epoch": 0.4086426742604125, "grad_norm": 0.1481887549161911, "learning_rate": 9.672160281103509e-06, "loss": 0.003, "step": 24180 }, { "epoch": 0.408811674539263, "grad_norm": 0.23799261450767517, "learning_rate": 9.671634830054347e-06, "loss": 0.0027, "step": 24190 }, { "epoch": 0.40898067481811345, "grad_norm": 0.05132792517542839, "learning_rate": 9.671108972550318e-06, "loss": 0.0026, "step": 24200 }, { "epoch": 0.4091496750969639, "grad_norm": 0.05585364252328873, "learning_rate": 9.670582708637179e-06, "loss": 0.0027, "step": 24210 }, { "epoch": 0.40931867537581434, "grad_norm": 0.11600292474031448, "learning_rate": 9.670056038360713e-06, "loss": 0.0019, "step": 24220 }, { "epoch": 0.4094876756546648, "grad_norm": 0.12043534219264984, "learning_rate": 9.669528961766746e-06, "loss": 0.0013, "step": 24230 }, { "epoch": 0.4096566759335153, "grad_norm": 0.0870441123843193, "learning_rate": 9.669001478901135e-06, "loss": 0.0021, "step": 24240 }, { "epoch": 0.40982567621236576, "grad_norm": 0.12131226807832718, "learning_rate": 9.668473589809772e-06, "loss": 0.0079, "step": 24250 }, { "epoch": 0.40999467649121624, "grad_norm": 0.05351598933339119, "learning_rate": 9.667945294538588e-06, "loss": 0.0033, "step": 24260 }, { "epoch": 0.41016367677006665, "grad_norm": 0.22449712455272675, "learning_rate": 9.667416593133546e-06, "loss": 0.0042, "step": 24270 }, { "epoch": 0.4103326770489171, "grad_norm": 0.03976152464747429, "learning_rate": 9.666887485640647e-06, "loss": 0.0025, "step": 24280 }, { "epoch": 0.4105016773277676, "grad_norm": 0.058581795543432236, "learning_rate": 9.666357972105926e-06, "loss": 0.0025, "step": 24290 }, { "epoch": 0.4106706776066181, "grad_norm": 0.15372496843338013, "learning_rate": 9.665828052575452e-06, "loss": 0.0049, "step": 24300 }, { "epoch": 0.4108396778854685, "grad_norm": 0.3144594132900238, "learning_rate": 9.665297727095329e-06, "loss": 0.002, "step": 24310 }, { "epoch": 0.41100867816431896, "grad_norm": 0.04498997703194618, "learning_rate": 9.664766995711702e-06, "loss": 0.0024, "step": 24320 }, { "epoch": 0.41117767844316944, "grad_norm": 0.12133470922708511, "learning_rate": 9.664235858470744e-06, "loss": 0.0028, "step": 24330 }, { "epoch": 0.4113466787220199, "grad_norm": 0.08657453209161758, "learning_rate": 9.663704315418669e-06, "loss": 0.002, "step": 24340 }, { "epoch": 0.4115156790008703, "grad_norm": 0.18983878195285797, "learning_rate": 9.66317236660172e-06, "loss": 0.0049, "step": 24350 }, { "epoch": 0.4116846792797208, "grad_norm": 0.11060302704572678, "learning_rate": 9.662640012066185e-06, "loss": 0.0032, "step": 24360 }, { "epoch": 0.4118536795585713, "grad_norm": 0.04116309806704521, "learning_rate": 9.662107251858375e-06, "loss": 0.0036, "step": 24370 }, { "epoch": 0.41202267983742175, "grad_norm": 0.11360263079404831, "learning_rate": 9.661574086024645e-06, "loss": 0.0042, "step": 24380 }, { "epoch": 0.4121916801162722, "grad_norm": 0.038537509739398956, "learning_rate": 9.661040514611386e-06, "loss": 0.003, "step": 24390 }, { "epoch": 0.41236068039512264, "grad_norm": 0.13446074724197388, "learning_rate": 9.660506537665019e-06, "loss": 0.0037, "step": 24400 }, { "epoch": 0.4125296806739731, "grad_norm": 0.05725783109664917, "learning_rate": 9.659972155232002e-06, "loss": 0.003, "step": 24410 }, { "epoch": 0.4126986809528236, "grad_norm": 0.09010022133588791, "learning_rate": 9.659437367358828e-06, "loss": 0.0035, "step": 24420 }, { "epoch": 0.41286768123167406, "grad_norm": 0.12016736716032028, "learning_rate": 9.658902174092029e-06, "loss": 0.0037, "step": 24430 }, { "epoch": 0.4130366815105245, "grad_norm": 0.5030607581138611, "learning_rate": 9.658366575478168e-06, "loss": 0.0026, "step": 24440 }, { "epoch": 0.41320568178937495, "grad_norm": 0.04519381374120712, "learning_rate": 9.657830571563845e-06, "loss": 0.003, "step": 24450 }, { "epoch": 0.4133746820682254, "grad_norm": 0.12689000368118286, "learning_rate": 9.657294162395693e-06, "loss": 0.0029, "step": 24460 }, { "epoch": 0.4135436823470759, "grad_norm": 0.17634360492229462, "learning_rate": 9.656757348020384e-06, "loss": 0.003, "step": 24470 }, { "epoch": 0.4137126826259263, "grad_norm": 0.06941474229097366, "learning_rate": 9.656220128484624e-06, "loss": 0.0037, "step": 24480 }, { "epoch": 0.4138816829047768, "grad_norm": 0.2054547667503357, "learning_rate": 9.655682503835154e-06, "loss": 0.0026, "step": 24490 }, { "epoch": 0.41405068318362725, "grad_norm": 0.028921693563461304, "learning_rate": 9.655144474118748e-06, "loss": 0.0031, "step": 24500 }, { "epoch": 0.41421968346247773, "grad_norm": 0.13475167751312256, "learning_rate": 9.654606039382216e-06, "loss": 0.0061, "step": 24510 }, { "epoch": 0.4143886837413282, "grad_norm": 0.08045545965433121, "learning_rate": 9.654067199672408e-06, "loss": 0.0033, "step": 24520 }, { "epoch": 0.4145576840201786, "grad_norm": 0.13410677015781403, "learning_rate": 9.653527955036205e-06, "loss": 0.0039, "step": 24530 }, { "epoch": 0.4147266842990291, "grad_norm": 0.15886163711547852, "learning_rate": 9.652988305520521e-06, "loss": 0.0034, "step": 24540 }, { "epoch": 0.41489568457787956, "grad_norm": 0.1001209244132042, "learning_rate": 9.652448251172314e-06, "loss": 0.0027, "step": 24550 }, { "epoch": 0.41506468485673004, "grad_norm": 0.14588508009910583, "learning_rate": 9.651907792038565e-06, "loss": 0.0028, "step": 24560 }, { "epoch": 0.41523368513558045, "grad_norm": 0.07480347901582718, "learning_rate": 9.651366928166297e-06, "loss": 0.0017, "step": 24570 }, { "epoch": 0.41540268541443093, "grad_norm": 0.19968412816524506, "learning_rate": 9.650825659602572e-06, "loss": 0.0037, "step": 24580 }, { "epoch": 0.4155716856932814, "grad_norm": 0.25834447145462036, "learning_rate": 9.650283986394482e-06, "loss": 0.0044, "step": 24590 }, { "epoch": 0.4157406859721319, "grad_norm": 0.23417113721370697, "learning_rate": 9.649741908589151e-06, "loss": 0.004, "step": 24600 }, { "epoch": 0.4159096862509823, "grad_norm": 0.06393170356750488, "learning_rate": 9.649199426233748e-06, "loss": 0.0022, "step": 24610 }, { "epoch": 0.41607868652983276, "grad_norm": 0.12674586474895477, "learning_rate": 9.648656539375469e-06, "loss": 0.003, "step": 24620 }, { "epoch": 0.41624768680868324, "grad_norm": 0.04374230280518532, "learning_rate": 9.648113248061548e-06, "loss": 0.0024, "step": 24630 }, { "epoch": 0.4164166870875337, "grad_norm": 0.14106465876102448, "learning_rate": 9.64756955233925e-06, "loss": 0.0034, "step": 24640 }, { "epoch": 0.4165856873663841, "grad_norm": 0.11494036763906479, "learning_rate": 9.647025452255888e-06, "loss": 0.0044, "step": 24650 }, { "epoch": 0.4167546876452346, "grad_norm": 0.07348564267158508, "learning_rate": 9.646480947858794e-06, "loss": 0.0026, "step": 24660 }, { "epoch": 0.4169236879240851, "grad_norm": 0.0473347082734108, "learning_rate": 9.645936039195346e-06, "loss": 0.0027, "step": 24670 }, { "epoch": 0.41709268820293555, "grad_norm": 0.1133309155702591, "learning_rate": 9.645390726312951e-06, "loss": 0.0037, "step": 24680 }, { "epoch": 0.417261688481786, "grad_norm": 0.05429339036345482, "learning_rate": 9.644845009259055e-06, "loss": 0.002, "step": 24690 }, { "epoch": 0.41743068876063644, "grad_norm": 0.08009226620197296, "learning_rate": 9.64429888808114e-06, "loss": 0.0034, "step": 24700 }, { "epoch": 0.4175996890394869, "grad_norm": 0.03397831693291664, "learning_rate": 9.643752362826718e-06, "loss": 0.0021, "step": 24710 }, { "epoch": 0.4177686893183374, "grad_norm": 0.06480947881937027, "learning_rate": 9.643205433543343e-06, "loss": 0.0041, "step": 24720 }, { "epoch": 0.41793768959718786, "grad_norm": 0.04352713003754616, "learning_rate": 9.642658100278598e-06, "loss": 0.0024, "step": 24730 }, { "epoch": 0.4181066898760383, "grad_norm": 0.12258494645357132, "learning_rate": 9.642110363080101e-06, "loss": 0.0019, "step": 24740 }, { "epoch": 0.41827569015488875, "grad_norm": 0.04337688535451889, "learning_rate": 9.641562221995515e-06, "loss": 0.0021, "step": 24750 }, { "epoch": 0.4184446904337392, "grad_norm": 0.010867961682379246, "learning_rate": 9.641013677072524e-06, "loss": 0.0024, "step": 24760 }, { "epoch": 0.4186136907125897, "grad_norm": 0.18787983059883118, "learning_rate": 9.640464728358858e-06, "loss": 0.003, "step": 24770 }, { "epoch": 0.4187826909914401, "grad_norm": 0.16595998406410217, "learning_rate": 9.639915375902277e-06, "loss": 0.0026, "step": 24780 }, { "epoch": 0.4189516912702906, "grad_norm": 0.058716196566820145, "learning_rate": 9.639365619750577e-06, "loss": 0.0043, "step": 24790 }, { "epoch": 0.41912069154914106, "grad_norm": 0.07995070517063141, "learning_rate": 9.63881545995159e-06, "loss": 0.003, "step": 24800 }, { "epoch": 0.41928969182799153, "grad_norm": 0.05202421545982361, "learning_rate": 9.638264896553182e-06, "loss": 0.0127, "step": 24810 }, { "epoch": 0.419458692106842, "grad_norm": 0.13160359859466553, "learning_rate": 9.637713929603257e-06, "loss": 0.0034, "step": 24820 }, { "epoch": 0.4196276923856924, "grad_norm": 0.08078698068857193, "learning_rate": 9.637162559149748e-06, "loss": 0.0038, "step": 24830 }, { "epoch": 0.4197966926645429, "grad_norm": 0.08044783771038055, "learning_rate": 9.63661078524063e-06, "loss": 0.0031, "step": 24840 }, { "epoch": 0.41996569294339336, "grad_norm": 0.11060716956853867, "learning_rate": 9.636058607923907e-06, "loss": 0.0046, "step": 24850 }, { "epoch": 0.42013469322224384, "grad_norm": 0.22302891314029694, "learning_rate": 9.635506027247624e-06, "loss": 0.0035, "step": 24860 }, { "epoch": 0.42030369350109426, "grad_norm": 0.19783833622932434, "learning_rate": 9.634953043259858e-06, "loss": 0.004, "step": 24870 }, { "epoch": 0.42047269377994473, "grad_norm": 0.05085299164056778, "learning_rate": 9.63439965600872e-06, "loss": 0.008, "step": 24880 }, { "epoch": 0.4206416940587952, "grad_norm": 0.11641545593738556, "learning_rate": 9.633845865542356e-06, "loss": 0.0039, "step": 24890 }, { "epoch": 0.4208106943376457, "grad_norm": 0.06848058104515076, "learning_rate": 9.633291671908952e-06, "loss": 0.0042, "step": 24900 }, { "epoch": 0.4209796946164961, "grad_norm": 0.1078142449259758, "learning_rate": 9.632737075156721e-06, "loss": 0.0034, "step": 24910 }, { "epoch": 0.42114869489534656, "grad_norm": 0.2299955040216446, "learning_rate": 9.632182075333923e-06, "loss": 0.0047, "step": 24920 }, { "epoch": 0.42131769517419704, "grad_norm": 0.036223720759153366, "learning_rate": 9.631626672488838e-06, "loss": 0.0024, "step": 24930 }, { "epoch": 0.4214866954530475, "grad_norm": 0.04823785647749901, "learning_rate": 9.631070866669791e-06, "loss": 0.0028, "step": 24940 }, { "epoch": 0.421655695731898, "grad_norm": 0.06334184855222702, "learning_rate": 9.630514657925143e-06, "loss": 0.0032, "step": 24950 }, { "epoch": 0.4218246960107484, "grad_norm": 0.14863380789756775, "learning_rate": 9.629958046303282e-06, "loss": 0.003, "step": 24960 }, { "epoch": 0.4219936962895989, "grad_norm": 0.30553850531578064, "learning_rate": 9.629401031852639e-06, "loss": 0.0042, "step": 24970 }, { "epoch": 0.42216269656844935, "grad_norm": 0.11199302971363068, "learning_rate": 9.628843614621678e-06, "loss": 0.0033, "step": 24980 }, { "epoch": 0.4223316968472998, "grad_norm": 0.057078536599874496, "learning_rate": 9.628285794658894e-06, "loss": 0.0017, "step": 24990 }, { "epoch": 0.42250069712615024, "grad_norm": 0.445557177066803, "learning_rate": 9.627727572012821e-06, "loss": 0.0032, "step": 25000 }, { "epoch": 0.4226696974050007, "grad_norm": 0.3197193145751953, "learning_rate": 9.627168946732028e-06, "loss": 0.0035, "step": 25010 }, { "epoch": 0.4228386976838512, "grad_norm": 0.020992321893572807, "learning_rate": 9.626609918865117e-06, "loss": 0.0027, "step": 25020 }, { "epoch": 0.42300769796270166, "grad_norm": 0.08679168671369553, "learning_rate": 9.626050488460727e-06, "loss": 0.0023, "step": 25030 }, { "epoch": 0.4231766982415521, "grad_norm": 0.08856311440467834, "learning_rate": 9.62549065556753e-06, "loss": 0.0033, "step": 25040 }, { "epoch": 0.42334569852040255, "grad_norm": 0.10782845318317413, "learning_rate": 9.624930420234235e-06, "loss": 0.0039, "step": 25050 }, { "epoch": 0.423514698799253, "grad_norm": 0.14478425681591034, "learning_rate": 9.624369782509586e-06, "loss": 0.002, "step": 25060 }, { "epoch": 0.4236836990781035, "grad_norm": 0.1290246993303299, "learning_rate": 9.62380874244236e-06, "loss": 0.003, "step": 25070 }, { "epoch": 0.42385269935695397, "grad_norm": 0.11611910164356232, "learning_rate": 9.62324730008137e-06, "loss": 0.0017, "step": 25080 }, { "epoch": 0.4240216996358044, "grad_norm": 0.08613353967666626, "learning_rate": 9.622685455475466e-06, "loss": 0.003, "step": 25090 }, { "epoch": 0.42419069991465486, "grad_norm": 0.06108185276389122, "learning_rate": 9.622123208673528e-06, "loss": 0.0019, "step": 25100 }, { "epoch": 0.42435970019350533, "grad_norm": 0.09997698664665222, "learning_rate": 9.62156055972448e-06, "loss": 0.0026, "step": 25110 }, { "epoch": 0.4245287004723558, "grad_norm": 0.11060816794633865, "learning_rate": 9.620997508677268e-06, "loss": 0.0029, "step": 25120 }, { "epoch": 0.4246977007512062, "grad_norm": 0.1168910413980484, "learning_rate": 9.620434055580882e-06, "loss": 0.0031, "step": 25130 }, { "epoch": 0.4248667010300567, "grad_norm": 0.08170294016599655, "learning_rate": 9.61987020048435e-06, "loss": 0.0025, "step": 25140 }, { "epoch": 0.42503570130890717, "grad_norm": 0.2618868052959442, "learning_rate": 9.619305943436725e-06, "loss": 0.0024, "step": 25150 }, { "epoch": 0.42520470158775764, "grad_norm": 0.028670979663729668, "learning_rate": 9.618741284487103e-06, "loss": 0.0038, "step": 25160 }, { "epoch": 0.42537370186660806, "grad_norm": 0.16440579295158386, "learning_rate": 9.61817622368461e-06, "loss": 0.0037, "step": 25170 }, { "epoch": 0.42554270214545853, "grad_norm": 0.11584070324897766, "learning_rate": 9.61761076107841e-06, "loss": 0.0032, "step": 25180 }, { "epoch": 0.425711702424309, "grad_norm": 0.2152673900127411, "learning_rate": 9.617044896717703e-06, "loss": 0.0038, "step": 25190 }, { "epoch": 0.4258807027031595, "grad_norm": 0.028066881000995636, "learning_rate": 9.616478630651718e-06, "loss": 0.0032, "step": 25200 }, { "epoch": 0.42604970298200995, "grad_norm": 0.19883617758750916, "learning_rate": 9.615911962929725e-06, "loss": 0.0037, "step": 25210 }, { "epoch": 0.42621870326086037, "grad_norm": 0.15827932953834534, "learning_rate": 9.615344893601026e-06, "loss": 0.0033, "step": 25220 }, { "epoch": 0.42638770353971084, "grad_norm": 0.07252488285303116, "learning_rate": 9.61477742271496e-06, "loss": 0.0037, "step": 25230 }, { "epoch": 0.4265567038185613, "grad_norm": 0.28482991456985474, "learning_rate": 9.614209550320899e-06, "loss": 0.0018, "step": 25240 }, { "epoch": 0.4267257040974118, "grad_norm": 0.016567599028348923, "learning_rate": 9.61364127646825e-06, "loss": 0.0037, "step": 25250 }, { "epoch": 0.4268947043762622, "grad_norm": 0.11594730615615845, "learning_rate": 9.613072601206459e-06, "loss": 0.0032, "step": 25260 }, { "epoch": 0.4270637046551127, "grad_norm": 0.12419804185628891, "learning_rate": 9.612503524584997e-06, "loss": 0.0054, "step": 25270 }, { "epoch": 0.42723270493396315, "grad_norm": 0.06168608367443085, "learning_rate": 9.611934046653384e-06, "loss": 0.0025, "step": 25280 }, { "epoch": 0.4274017052128136, "grad_norm": 0.0676799863576889, "learning_rate": 9.61136416746116e-06, "loss": 0.0018, "step": 25290 }, { "epoch": 0.42757070549166404, "grad_norm": 0.08644477277994156, "learning_rate": 9.610793887057914e-06, "loss": 0.0049, "step": 25300 }, { "epoch": 0.4277397057705145, "grad_norm": 0.12501251697540283, "learning_rate": 9.610223205493259e-06, "loss": 0.0042, "step": 25310 }, { "epoch": 0.427908706049365, "grad_norm": 0.022200902923941612, "learning_rate": 9.609652122816847e-06, "loss": 0.0026, "step": 25320 }, { "epoch": 0.42807770632821546, "grad_norm": 0.19189688563346863, "learning_rate": 9.609080639078367e-06, "loss": 0.0019, "step": 25330 }, { "epoch": 0.4282467066070659, "grad_norm": 0.08449879288673401, "learning_rate": 9.60850875432754e-06, "loss": 0.0027, "step": 25340 }, { "epoch": 0.42841570688591635, "grad_norm": 0.5087281465530396, "learning_rate": 9.607936468614122e-06, "loss": 0.0036, "step": 25350 }, { "epoch": 0.4285847071647668, "grad_norm": 0.0032684323377907276, "learning_rate": 9.607363781987906e-06, "loss": 0.0021, "step": 25360 }, { "epoch": 0.4287537074436173, "grad_norm": 0.15610577166080475, "learning_rate": 9.606790694498717e-06, "loss": 0.0037, "step": 25370 }, { "epoch": 0.42892270772246777, "grad_norm": 0.02182059921324253, "learning_rate": 9.606217206196418e-06, "loss": 0.0022, "step": 25380 }, { "epoch": 0.4290917080013182, "grad_norm": 0.08796156942844391, "learning_rate": 9.605643317130903e-06, "loss": 0.0043, "step": 25390 }, { "epoch": 0.42926070828016866, "grad_norm": 0.07977686822414398, "learning_rate": 9.605069027352108e-06, "loss": 0.0024, "step": 25400 }, { "epoch": 0.42942970855901913, "grad_norm": 0.08197636157274246, "learning_rate": 9.604494336909994e-06, "loss": 0.0029, "step": 25410 }, { "epoch": 0.4295987088378696, "grad_norm": 0.062118370085954666, "learning_rate": 9.60391924585456e-06, "loss": 0.0033, "step": 25420 }, { "epoch": 0.42976770911672, "grad_norm": 0.13398393988609314, "learning_rate": 9.603343754235849e-06, "loss": 0.0032, "step": 25430 }, { "epoch": 0.4299367093955705, "grad_norm": 0.1671101152896881, "learning_rate": 9.602767862103925e-06, "loss": 0.0041, "step": 25440 }, { "epoch": 0.43010570967442097, "grad_norm": 0.02333880588412285, "learning_rate": 9.602191569508899e-06, "loss": 0.003, "step": 25450 }, { "epoch": 0.43027470995327144, "grad_norm": 0.05902350693941116, "learning_rate": 9.601614876500906e-06, "loss": 0.0028, "step": 25460 }, { "epoch": 0.43044371023212186, "grad_norm": 0.09615401923656464, "learning_rate": 9.601037783130123e-06, "loss": 0.0021, "step": 25470 }, { "epoch": 0.43061271051097233, "grad_norm": 0.11068742722272873, "learning_rate": 9.600460289446762e-06, "loss": 0.0027, "step": 25480 }, { "epoch": 0.4307817107898228, "grad_norm": 0.08340872824192047, "learning_rate": 9.599882395501066e-06, "loss": 0.0025, "step": 25490 }, { "epoch": 0.4309507110686733, "grad_norm": 0.4351450800895691, "learning_rate": 9.599304101343314e-06, "loss": 0.005, "step": 25500 }, { "epoch": 0.43111971134752375, "grad_norm": 0.07268258184194565, "learning_rate": 9.59872540702382e-06, "loss": 0.0032, "step": 25510 }, { "epoch": 0.43128871162637417, "grad_norm": 0.15467266738414764, "learning_rate": 9.598146312592938e-06, "loss": 0.003, "step": 25520 }, { "epoch": 0.43145771190522464, "grad_norm": 0.158308744430542, "learning_rate": 9.597566818101046e-06, "loss": 0.0022, "step": 25530 }, { "epoch": 0.4316267121840751, "grad_norm": 0.06377403438091278, "learning_rate": 9.596986923598564e-06, "loss": 0.0033, "step": 25540 }, { "epoch": 0.4317957124629256, "grad_norm": 0.17815333604812622, "learning_rate": 9.596406629135947e-06, "loss": 0.003, "step": 25550 }, { "epoch": 0.431964712741776, "grad_norm": 0.10275933146476746, "learning_rate": 9.595825934763684e-06, "loss": 0.0037, "step": 25560 }, { "epoch": 0.4321337130206265, "grad_norm": 0.06398187577724457, "learning_rate": 9.595244840532296e-06, "loss": 0.0011, "step": 25570 }, { "epoch": 0.43230271329947695, "grad_norm": 0.06439651548862457, "learning_rate": 9.594663346492344e-06, "loss": 0.003, "step": 25580 }, { "epoch": 0.4324717135783274, "grad_norm": 0.019858231768012047, "learning_rate": 9.594081452694419e-06, "loss": 0.0027, "step": 25590 }, { "epoch": 0.43264071385717784, "grad_norm": 0.33359295129776, "learning_rate": 9.593499159189147e-06, "loss": 0.003, "step": 25600 }, { "epoch": 0.4328097141360283, "grad_norm": 0.13325978815555573, "learning_rate": 9.592916466027195e-06, "loss": 0.0043, "step": 25610 }, { "epoch": 0.4329787144148788, "grad_norm": 0.05250495672225952, "learning_rate": 9.592333373259256e-06, "loss": 0.0024, "step": 25620 }, { "epoch": 0.43314771469372926, "grad_norm": 0.09613601863384247, "learning_rate": 9.591749880936063e-06, "loss": 0.0012, "step": 25630 }, { "epoch": 0.43331671497257973, "grad_norm": 0.056316081434488297, "learning_rate": 9.591165989108384e-06, "loss": 0.0042, "step": 25640 }, { "epoch": 0.43348571525143015, "grad_norm": 0.08078259974718094, "learning_rate": 9.590581697827017e-06, "loss": 0.0015, "step": 25650 }, { "epoch": 0.4336547155302806, "grad_norm": 0.23672063648700714, "learning_rate": 9.589997007142802e-06, "loss": 0.0028, "step": 25660 }, { "epoch": 0.4338237158091311, "grad_norm": 0.028148150071501732, "learning_rate": 9.589411917106608e-06, "loss": 0.0018, "step": 25670 }, { "epoch": 0.43399271608798157, "grad_norm": 0.11672691255807877, "learning_rate": 9.58882642776934e-06, "loss": 0.0021, "step": 25680 }, { "epoch": 0.434161716366832, "grad_norm": 0.10163123905658722, "learning_rate": 9.588240539181942e-06, "loss": 0.0026, "step": 25690 }, { "epoch": 0.43433071664568246, "grad_norm": 0.06551125645637512, "learning_rate": 9.587654251395385e-06, "loss": 0.0093, "step": 25700 }, { "epoch": 0.43449971692453293, "grad_norm": 0.04481912776827812, "learning_rate": 9.587067564460679e-06, "loss": 0.0034, "step": 25710 }, { "epoch": 0.4346687172033834, "grad_norm": 0.13800600171089172, "learning_rate": 9.586480478428872e-06, "loss": 0.0048, "step": 25720 }, { "epoch": 0.4348377174822338, "grad_norm": 0.09977370500564575, "learning_rate": 9.58589299335104e-06, "loss": 0.0012, "step": 25730 }, { "epoch": 0.4350067177610843, "grad_norm": 0.0055341655388474464, "learning_rate": 9.585305109278299e-06, "loss": 0.0031, "step": 25740 }, { "epoch": 0.43517571803993477, "grad_norm": 0.16339772939682007, "learning_rate": 9.584716826261797e-06, "loss": 0.0051, "step": 25750 }, { "epoch": 0.43534471831878524, "grad_norm": 0.10466916114091873, "learning_rate": 9.584128144352717e-06, "loss": 0.0036, "step": 25760 }, { "epoch": 0.4355137185976357, "grad_norm": 0.13969078660011292, "learning_rate": 9.58353906360228e-06, "loss": 0.0018, "step": 25770 }, { "epoch": 0.43568271887648613, "grad_norm": 0.10388576984405518, "learning_rate": 9.582949584061736e-06, "loss": 0.0026, "step": 25780 }, { "epoch": 0.4358517191553366, "grad_norm": 0.031867023557424545, "learning_rate": 9.582359705782371e-06, "loss": 0.003, "step": 25790 }, { "epoch": 0.4360207194341871, "grad_norm": 0.2640696167945862, "learning_rate": 9.581769428815512e-06, "loss": 0.0039, "step": 25800 }, { "epoch": 0.43618971971303755, "grad_norm": 0.07732165604829788, "learning_rate": 9.581178753212514e-06, "loss": 0.0035, "step": 25810 }, { "epoch": 0.43635871999188797, "grad_norm": 0.0999775156378746, "learning_rate": 9.580587679024766e-06, "loss": 0.0025, "step": 25820 }, { "epoch": 0.43652772027073844, "grad_norm": 0.04713275283575058, "learning_rate": 9.579996206303696e-06, "loss": 0.0026, "step": 25830 }, { "epoch": 0.4366967205495889, "grad_norm": 0.11159471422433853, "learning_rate": 9.579404335100768e-06, "loss": 0.0033, "step": 25840 }, { "epoch": 0.4368657208284394, "grad_norm": 0.16928553581237793, "learning_rate": 9.578812065467475e-06, "loss": 0.0027, "step": 25850 }, { "epoch": 0.4370347211072898, "grad_norm": 0.03889685869216919, "learning_rate": 9.578219397455346e-06, "loss": 0.0054, "step": 25860 }, { "epoch": 0.4372037213861403, "grad_norm": 0.10739434510469437, "learning_rate": 9.577626331115947e-06, "loss": 0.0023, "step": 25870 }, { "epoch": 0.43737272166499075, "grad_norm": 0.23763492703437805, "learning_rate": 9.577032866500879e-06, "loss": 0.0027, "step": 25880 }, { "epoch": 0.4375417219438412, "grad_norm": 0.1545303910970688, "learning_rate": 9.576439003661776e-06, "loss": 0.0026, "step": 25890 }, { "epoch": 0.4377107222226917, "grad_norm": 0.08356676250696182, "learning_rate": 9.575844742650305e-06, "loss": 0.0023, "step": 25900 }, { "epoch": 0.4378797225015421, "grad_norm": 0.1397261917591095, "learning_rate": 9.575250083518173e-06, "loss": 0.0031, "step": 25910 }, { "epoch": 0.4380487227803926, "grad_norm": 0.122606061398983, "learning_rate": 9.574655026317114e-06, "loss": 0.0027, "step": 25920 }, { "epoch": 0.43821772305924306, "grad_norm": 0.11674268543720245, "learning_rate": 9.574059571098903e-06, "loss": 0.0022, "step": 25930 }, { "epoch": 0.43838672333809353, "grad_norm": 0.07671211659908295, "learning_rate": 9.573463717915349e-06, "loss": 0.0022, "step": 25940 }, { "epoch": 0.43855572361694395, "grad_norm": 0.1337328404188156, "learning_rate": 9.572867466818291e-06, "loss": 0.0019, "step": 25950 }, { "epoch": 0.4387247238957944, "grad_norm": 0.09576038271188736, "learning_rate": 9.572270817859607e-06, "loss": 0.0031, "step": 25960 }, { "epoch": 0.4388937241746449, "grad_norm": 0.021519066765904427, "learning_rate": 9.571673771091212e-06, "loss": 0.0014, "step": 25970 }, { "epoch": 0.43906272445349537, "grad_norm": 0.01436109934002161, "learning_rate": 9.571076326565045e-06, "loss": 0.0028, "step": 25980 }, { "epoch": 0.4392317247323458, "grad_norm": 0.1742318868637085, "learning_rate": 9.570478484333093e-06, "loss": 0.0024, "step": 25990 }, { "epoch": 0.43940072501119626, "grad_norm": 0.05830789729952812, "learning_rate": 9.569880244447368e-06, "loss": 0.0029, "step": 26000 }, { "epoch": 0.43956972529004673, "grad_norm": 0.12230129539966583, "learning_rate": 9.569281606959917e-06, "loss": 0.0023, "step": 26010 }, { "epoch": 0.4397387255688972, "grad_norm": 0.033528443425893784, "learning_rate": 9.56868257192283e-06, "loss": 0.0029, "step": 26020 }, { "epoch": 0.4399077258477476, "grad_norm": 0.18434548377990723, "learning_rate": 9.568083139388224e-06, "loss": 0.0036, "step": 26030 }, { "epoch": 0.4400767261265981, "grad_norm": 0.14409662783145905, "learning_rate": 9.56748330940825e-06, "loss": 0.0047, "step": 26040 }, { "epoch": 0.44024572640544857, "grad_norm": 0.06758307665586472, "learning_rate": 9.5668830820351e-06, "loss": 0.0027, "step": 26050 }, { "epoch": 0.44041472668429904, "grad_norm": 0.09830377250909805, "learning_rate": 9.566282457320994e-06, "loss": 0.0031, "step": 26060 }, { "epoch": 0.4405837269631495, "grad_norm": 0.09198874235153198, "learning_rate": 9.56568143531819e-06, "loss": 0.0026, "step": 26070 }, { "epoch": 0.44075272724199993, "grad_norm": 0.02963336929678917, "learning_rate": 9.56508001607898e-06, "loss": 0.0051, "step": 26080 }, { "epoch": 0.4409217275208504, "grad_norm": 0.07761721312999725, "learning_rate": 9.56447819965569e-06, "loss": 0.0038, "step": 26090 }, { "epoch": 0.4410907277997009, "grad_norm": 0.2935218811035156, "learning_rate": 9.563875986100682e-06, "loss": 0.0034, "step": 26100 }, { "epoch": 0.44125972807855135, "grad_norm": 0.22003482282161713, "learning_rate": 9.563273375466351e-06, "loss": 0.0027, "step": 26110 }, { "epoch": 0.44142872835740177, "grad_norm": 0.05516747757792473, "learning_rate": 9.562670367805127e-06, "loss": 0.0031, "step": 26120 }, { "epoch": 0.44159772863625224, "grad_norm": 0.06797683238983154, "learning_rate": 9.562066963169472e-06, "loss": 0.0033, "step": 26130 }, { "epoch": 0.4417667289151027, "grad_norm": 0.07337122410535812, "learning_rate": 9.56146316161189e-06, "loss": 0.0033, "step": 26140 }, { "epoch": 0.4419357291939532, "grad_norm": 0.14874686300754547, "learning_rate": 9.56085896318491e-06, "loss": 0.0035, "step": 26150 }, { "epoch": 0.4421047294728036, "grad_norm": 0.16632644832134247, "learning_rate": 9.560254367941104e-06, "loss": 0.0031, "step": 26160 }, { "epoch": 0.4422737297516541, "grad_norm": 0.08548545092344284, "learning_rate": 9.559649375933073e-06, "loss": 0.0023, "step": 26170 }, { "epoch": 0.44244273003050455, "grad_norm": 0.24352887272834778, "learning_rate": 9.559043987213451e-06, "loss": 0.0019, "step": 26180 }, { "epoch": 0.442611730309355, "grad_norm": 0.03823421150445938, "learning_rate": 9.558438201834918e-06, "loss": 0.0024, "step": 26190 }, { "epoch": 0.4427807305882055, "grad_norm": 0.2297850400209427, "learning_rate": 9.557832019850172e-06, "loss": 0.0041, "step": 26200 }, { "epoch": 0.4429497308670559, "grad_norm": 0.06309180706739426, "learning_rate": 9.557225441311959e-06, "loss": 0.0024, "step": 26210 }, { "epoch": 0.4431187311459064, "grad_norm": 0.1206027939915657, "learning_rate": 9.55661846627305e-06, "loss": 0.0026, "step": 26220 }, { "epoch": 0.44328773142475686, "grad_norm": 0.052939776331186295, "learning_rate": 9.55601109478626e-06, "loss": 0.0029, "step": 26230 }, { "epoch": 0.44345673170360733, "grad_norm": 0.05884641408920288, "learning_rate": 9.55540332690443e-06, "loss": 0.0037, "step": 26240 }, { "epoch": 0.44362573198245775, "grad_norm": 0.0946325957775116, "learning_rate": 9.554795162680436e-06, "loss": 0.0048, "step": 26250 }, { "epoch": 0.4437947322613082, "grad_norm": 0.09997012466192245, "learning_rate": 9.554186602167197e-06, "loss": 0.0026, "step": 26260 }, { "epoch": 0.4439637325401587, "grad_norm": 0.023280398920178413, "learning_rate": 9.553577645417657e-06, "loss": 0.0028, "step": 26270 }, { "epoch": 0.44413273281900917, "grad_norm": 0.16902558505535126, "learning_rate": 9.5529682924848e-06, "loss": 0.0019, "step": 26280 }, { "epoch": 0.4443017330978596, "grad_norm": 0.06520242989063263, "learning_rate": 9.552358543421643e-06, "loss": 0.0029, "step": 26290 }, { "epoch": 0.44447073337671006, "grad_norm": 0.10236934572458267, "learning_rate": 9.551748398281233e-06, "loss": 0.002, "step": 26300 }, { "epoch": 0.44463973365556053, "grad_norm": 0.039478711783885956, "learning_rate": 9.551137857116661e-06, "loss": 0.0023, "step": 26310 }, { "epoch": 0.444808733934411, "grad_norm": 0.05753898248076439, "learning_rate": 9.550526919981045e-06, "loss": 0.0024, "step": 26320 }, { "epoch": 0.4449777342132615, "grad_norm": 0.06465510278940201, "learning_rate": 9.549915586927536e-06, "loss": 0.0017, "step": 26330 }, { "epoch": 0.4451467344921119, "grad_norm": 0.322773277759552, "learning_rate": 9.549303858009329e-06, "loss": 0.002, "step": 26340 }, { "epoch": 0.44531573477096237, "grad_norm": 0.16484954953193665, "learning_rate": 9.548691733279644e-06, "loss": 0.0029, "step": 26350 }, { "epoch": 0.44548473504981284, "grad_norm": 0.09908819943666458, "learning_rate": 9.548079212791739e-06, "loss": 0.0019, "step": 26360 }, { "epoch": 0.4456537353286633, "grad_norm": 0.08926418423652649, "learning_rate": 9.547466296598907e-06, "loss": 0.0018, "step": 26370 }, { "epoch": 0.44582273560751373, "grad_norm": 0.14144323766231537, "learning_rate": 9.546852984754474e-06, "loss": 0.0024, "step": 26380 }, { "epoch": 0.4459917358863642, "grad_norm": 0.09844768047332764, "learning_rate": 9.546239277311799e-06, "loss": 0.0023, "step": 26390 }, { "epoch": 0.4461607361652147, "grad_norm": 0.02975483052432537, "learning_rate": 9.545625174324282e-06, "loss": 0.0023, "step": 26400 }, { "epoch": 0.44632973644406515, "grad_norm": 0.11824537813663483, "learning_rate": 9.545010675845352e-06, "loss": 0.0035, "step": 26410 }, { "epoch": 0.44649873672291557, "grad_norm": 0.20971493422985077, "learning_rate": 9.544395781928471e-06, "loss": 0.0035, "step": 26420 }, { "epoch": 0.44666773700176604, "grad_norm": 0.06645579636096954, "learning_rate": 9.543780492627137e-06, "loss": 0.0026, "step": 26430 }, { "epoch": 0.4468367372806165, "grad_norm": 0.08542334288358688, "learning_rate": 9.543164807994886e-06, "loss": 0.0018, "step": 26440 }, { "epoch": 0.447005737559467, "grad_norm": 0.2023836076259613, "learning_rate": 9.542548728085286e-06, "loss": 0.0034, "step": 26450 }, { "epoch": 0.44717473783831746, "grad_norm": 0.052794910967350006, "learning_rate": 9.541932252951938e-06, "loss": 0.0011, "step": 26460 }, { "epoch": 0.4473437381171679, "grad_norm": 0.22299179434776306, "learning_rate": 9.541315382648476e-06, "loss": 0.0035, "step": 26470 }, { "epoch": 0.44751273839601835, "grad_norm": 0.06551665812730789, "learning_rate": 9.540698117228573e-06, "loss": 0.0021, "step": 26480 }, { "epoch": 0.4476817386748688, "grad_norm": 0.12201642245054245, "learning_rate": 9.540080456745934e-06, "loss": 0.0016, "step": 26490 }, { "epoch": 0.4478507389537193, "grad_norm": 0.08223738521337509, "learning_rate": 9.539462401254298e-06, "loss": 0.0019, "step": 26500 }, { "epoch": 0.4480197392325697, "grad_norm": 0.2150903344154358, "learning_rate": 9.538843950807438e-06, "loss": 0.0041, "step": 26510 }, { "epoch": 0.4481887395114202, "grad_norm": 0.1299905776977539, "learning_rate": 9.538225105459164e-06, "loss": 0.0036, "step": 26520 }, { "epoch": 0.44835773979027066, "grad_norm": 0.020779894664883614, "learning_rate": 9.537605865263318e-06, "loss": 0.0021, "step": 26530 }, { "epoch": 0.44852674006912113, "grad_norm": 0.04517320543527603, "learning_rate": 9.536986230273774e-06, "loss": 0.0034, "step": 26540 }, { "epoch": 0.44869574034797155, "grad_norm": 0.12831257283687592, "learning_rate": 9.536366200544448e-06, "loss": 0.0033, "step": 26550 }, { "epoch": 0.448864740626822, "grad_norm": 0.13628539443016052, "learning_rate": 9.535745776129284e-06, "loss": 0.0031, "step": 26560 }, { "epoch": 0.4490337409056725, "grad_norm": 0.09149462729692459, "learning_rate": 9.53512495708226e-06, "loss": 0.0023, "step": 26570 }, { "epoch": 0.44920274118452297, "grad_norm": 0.07784470915794373, "learning_rate": 9.534503743457392e-06, "loss": 0.0025, "step": 26580 }, { "epoch": 0.44937174146337344, "grad_norm": 0.15072597563266754, "learning_rate": 9.533882135308727e-06, "loss": 0.0039, "step": 26590 }, { "epoch": 0.44954074174222386, "grad_norm": 0.10618377476930618, "learning_rate": 9.53326013269035e-06, "loss": 0.0043, "step": 26600 }, { "epoch": 0.44970974202107433, "grad_norm": 0.0775023028254509, "learning_rate": 9.532637735656379e-06, "loss": 0.0025, "step": 26610 }, { "epoch": 0.4498787422999248, "grad_norm": 0.10800741612911224, "learning_rate": 9.532014944260962e-06, "loss": 0.0028, "step": 26620 }, { "epoch": 0.4500477425787753, "grad_norm": 0.04061252623796463, "learning_rate": 9.531391758558286e-06, "loss": 0.0034, "step": 26630 }, { "epoch": 0.4502167428576257, "grad_norm": 0.004851445555686951, "learning_rate": 9.530768178602573e-06, "loss": 0.0032, "step": 26640 }, { "epoch": 0.45038574313647617, "grad_norm": 0.0552886426448822, "learning_rate": 9.530144204448076e-06, "loss": 0.0018, "step": 26650 }, { "epoch": 0.45055474341532664, "grad_norm": 0.14879478514194489, "learning_rate": 9.529519836149083e-06, "loss": 0.0021, "step": 26660 }, { "epoch": 0.4507237436941771, "grad_norm": 0.06371227651834488, "learning_rate": 9.528895073759921e-06, "loss": 0.0025, "step": 26670 }, { "epoch": 0.45089274397302753, "grad_norm": 1.3603676557540894, "learning_rate": 9.528269917334942e-06, "loss": 0.0047, "step": 26680 }, { "epoch": 0.451061744251878, "grad_norm": 0.04017437994480133, "learning_rate": 9.527644366928542e-06, "loss": 0.0027, "step": 26690 }, { "epoch": 0.4512307445307285, "grad_norm": 0.12146834284067154, "learning_rate": 9.527018422595144e-06, "loss": 0.005, "step": 26700 }, { "epoch": 0.45139974480957895, "grad_norm": 0.0878925770521164, "learning_rate": 9.52639208438921e-06, "loss": 0.0032, "step": 26710 }, { "epoch": 0.45156874508842937, "grad_norm": 0.04248873144388199, "learning_rate": 9.525765352365236e-06, "loss": 0.0024, "step": 26720 }, { "epoch": 0.45173774536727984, "grad_norm": 0.11376402527093887, "learning_rate": 9.525138226577744e-06, "loss": 0.0036, "step": 26730 }, { "epoch": 0.4519067456461303, "grad_norm": 0.1192072182893753, "learning_rate": 9.524510707081304e-06, "loss": 0.0023, "step": 26740 }, { "epoch": 0.4520757459249808, "grad_norm": 0.049709390848875046, "learning_rate": 9.52388279393051e-06, "loss": 0.0011, "step": 26750 }, { "epoch": 0.45224474620383126, "grad_norm": 0.1261884570121765, "learning_rate": 9.523254487179997e-06, "loss": 0.0028, "step": 26760 }, { "epoch": 0.4524137464826817, "grad_norm": 0.07239934056997299, "learning_rate": 9.522625786884424e-06, "loss": 0.0022, "step": 26770 }, { "epoch": 0.45258274676153215, "grad_norm": 0.010885490104556084, "learning_rate": 9.521996693098496e-06, "loss": 0.0028, "step": 26780 }, { "epoch": 0.4527517470403826, "grad_norm": 0.21607480943202972, "learning_rate": 9.521367205876946e-06, "loss": 0.002, "step": 26790 }, { "epoch": 0.4529207473192331, "grad_norm": 0.21466338634490967, "learning_rate": 9.520737325274544e-06, "loss": 0.0046, "step": 26800 }, { "epoch": 0.4530897475980835, "grad_norm": 0.21052201092243195, "learning_rate": 9.52010705134609e-06, "loss": 0.0019, "step": 26810 }, { "epoch": 0.453258747876934, "grad_norm": 0.07716619968414307, "learning_rate": 9.519476384146421e-06, "loss": 0.0051, "step": 26820 }, { "epoch": 0.45342774815578446, "grad_norm": 0.22380301356315613, "learning_rate": 9.518845323730413e-06, "loss": 0.0033, "step": 26830 }, { "epoch": 0.45359674843463493, "grad_norm": 0.035338010638952255, "learning_rate": 9.518213870152964e-06, "loss": 0.0024, "step": 26840 }, { "epoch": 0.45376574871348535, "grad_norm": 0.17700858414173126, "learning_rate": 9.517582023469019e-06, "loss": 0.0021, "step": 26850 }, { "epoch": 0.4539347489923358, "grad_norm": 0.08837666362524033, "learning_rate": 9.51694978373355e-06, "loss": 0.0033, "step": 26860 }, { "epoch": 0.4541037492711863, "grad_norm": 0.08255530148744583, "learning_rate": 9.516317151001562e-06, "loss": 0.0048, "step": 26870 }, { "epoch": 0.45427274955003677, "grad_norm": 0.18214929103851318, "learning_rate": 9.515684125328102e-06, "loss": 0.0029, "step": 26880 }, { "epoch": 0.45444174982888724, "grad_norm": 0.16345560550689697, "learning_rate": 9.515050706768243e-06, "loss": 0.0018, "step": 26890 }, { "epoch": 0.45461075010773766, "grad_norm": 0.05697440356016159, "learning_rate": 9.514416895377097e-06, "loss": 0.0017, "step": 26900 }, { "epoch": 0.45477975038658813, "grad_norm": 0.04156593605875969, "learning_rate": 9.513782691209808e-06, "loss": 0.0048, "step": 26910 }, { "epoch": 0.4549487506654386, "grad_norm": 0.10291267931461334, "learning_rate": 9.513148094321556e-06, "loss": 0.0028, "step": 26920 }, { "epoch": 0.4551177509442891, "grad_norm": 0.11824605613946915, "learning_rate": 9.512513104767553e-06, "loss": 0.0068, "step": 26930 }, { "epoch": 0.4552867512231395, "grad_norm": 0.14371328055858612, "learning_rate": 9.511877722603045e-06, "loss": 0.002, "step": 26940 }, { "epoch": 0.45545575150198997, "grad_norm": 0.054665789008140564, "learning_rate": 9.511241947883316e-06, "loss": 0.0025, "step": 26950 }, { "epoch": 0.45562475178084044, "grad_norm": 0.1597243696451187, "learning_rate": 9.51060578066368e-06, "loss": 0.0031, "step": 26960 }, { "epoch": 0.4557937520596909, "grad_norm": 0.027206890285015106, "learning_rate": 9.509969220999485e-06, "loss": 0.0033, "step": 26970 }, { "epoch": 0.45596275233854133, "grad_norm": 0.21192127466201782, "learning_rate": 9.509332268946118e-06, "loss": 0.0027, "step": 26980 }, { "epoch": 0.4561317526173918, "grad_norm": 0.17130592465400696, "learning_rate": 9.508694924558996e-06, "loss": 0.004, "step": 26990 }, { "epoch": 0.4563007528962423, "grad_norm": 0.08280270546674728, "learning_rate": 9.508057187893568e-06, "loss": 0.0024, "step": 27000 }, { "epoch": 0.45646975317509275, "grad_norm": 0.10956969857215881, "learning_rate": 9.507419059005325e-06, "loss": 0.0033, "step": 27010 }, { "epoch": 0.4566387534539432, "grad_norm": 0.07973030209541321, "learning_rate": 9.506780537949785e-06, "loss": 0.0024, "step": 27020 }, { "epoch": 0.45680775373279364, "grad_norm": 0.04599049314856529, "learning_rate": 9.5061416247825e-06, "loss": 0.003, "step": 27030 }, { "epoch": 0.4569767540116441, "grad_norm": 0.0669611468911171, "learning_rate": 9.505502319559062e-06, "loss": 0.0023, "step": 27040 }, { "epoch": 0.4571457542904946, "grad_norm": 0.18551360070705414, "learning_rate": 9.504862622335093e-06, "loss": 0.0031, "step": 27050 }, { "epoch": 0.45731475456934506, "grad_norm": 0.23695415258407593, "learning_rate": 9.50422253316625e-06, "loss": 0.0025, "step": 27060 }, { "epoch": 0.4574837548481955, "grad_norm": 0.23178884387016296, "learning_rate": 9.503582052108222e-06, "loss": 0.0039, "step": 27070 }, { "epoch": 0.45765275512704595, "grad_norm": 0.04792691767215729, "learning_rate": 9.502941179216734e-06, "loss": 0.002, "step": 27080 }, { "epoch": 0.4578217554058964, "grad_norm": 0.051211390644311905, "learning_rate": 9.502299914547547e-06, "loss": 0.0027, "step": 27090 }, { "epoch": 0.4579907556847469, "grad_norm": 0.08183260262012482, "learning_rate": 9.501658258156455e-06, "loss": 0.0027, "step": 27100 }, { "epoch": 0.4581597559635973, "grad_norm": 0.10788262635469437, "learning_rate": 9.501016210099284e-06, "loss": 0.0023, "step": 27110 }, { "epoch": 0.4583287562424478, "grad_norm": 0.05273312330245972, "learning_rate": 9.500373770431893e-06, "loss": 0.0018, "step": 27120 }, { "epoch": 0.45849775652129826, "grad_norm": 0.13591207563877106, "learning_rate": 9.499730939210178e-06, "loss": 0.0029, "step": 27130 }, { "epoch": 0.45866675680014873, "grad_norm": 0.13457539677619934, "learning_rate": 9.49908771649007e-06, "loss": 0.0041, "step": 27140 }, { "epoch": 0.4588357570789992, "grad_norm": 0.04396795853972435, "learning_rate": 9.498444102327534e-06, "loss": 0.0031, "step": 27150 }, { "epoch": 0.4590047573578496, "grad_norm": 0.03095605969429016, "learning_rate": 9.497800096778565e-06, "loss": 0.0015, "step": 27160 }, { "epoch": 0.4591737576367001, "grad_norm": 0.10350055247545242, "learning_rate": 9.497155699899194e-06, "loss": 0.0019, "step": 27170 }, { "epoch": 0.45934275791555057, "grad_norm": 0.07381950318813324, "learning_rate": 9.496510911745489e-06, "loss": 0.0023, "step": 27180 }, { "epoch": 0.45951175819440104, "grad_norm": 0.07345439493656158, "learning_rate": 9.495865732373549e-06, "loss": 0.0026, "step": 27190 }, { "epoch": 0.45968075847325146, "grad_norm": 0.10080543160438538, "learning_rate": 9.495220161839505e-06, "loss": 0.0029, "step": 27200 }, { "epoch": 0.45984975875210193, "grad_norm": 0.07151289284229279, "learning_rate": 9.49457420019953e-06, "loss": 0.0018, "step": 27210 }, { "epoch": 0.4600187590309524, "grad_norm": 0.08651597052812576, "learning_rate": 9.493927847509821e-06, "loss": 0.0028, "step": 27220 }, { "epoch": 0.4601877593098029, "grad_norm": 0.013782293535768986, "learning_rate": 9.493281103826614e-06, "loss": 0.0028, "step": 27230 }, { "epoch": 0.4603567595886533, "grad_norm": 0.14195019006729126, "learning_rate": 9.492633969206184e-06, "loss": 0.0032, "step": 27240 }, { "epoch": 0.46052575986750377, "grad_norm": 0.2378387153148651, "learning_rate": 9.49198644370483e-06, "loss": 0.0042, "step": 27250 }, { "epoch": 0.46069476014635424, "grad_norm": 0.06230916827917099, "learning_rate": 9.491338527378892e-06, "loss": 0.003, "step": 27260 }, { "epoch": 0.4608637604252047, "grad_norm": 0.10277201235294342, "learning_rate": 9.49069022028474e-06, "loss": 0.0029, "step": 27270 }, { "epoch": 0.4610327607040552, "grad_norm": 0.2792683243751526, "learning_rate": 9.490041522478781e-06, "loss": 0.0034, "step": 27280 }, { "epoch": 0.4612017609829056, "grad_norm": 0.07100880146026611, "learning_rate": 9.489392434017455e-06, "loss": 0.0015, "step": 27290 }, { "epoch": 0.4613707612617561, "grad_norm": 0.11998149007558823, "learning_rate": 9.488742954957236e-06, "loss": 0.0029, "step": 27300 }, { "epoch": 0.46153976154060655, "grad_norm": 0.09208308160305023, "learning_rate": 9.488093085354632e-06, "loss": 0.0025, "step": 27310 }, { "epoch": 0.461708761819457, "grad_norm": 0.11172691732645035, "learning_rate": 9.487442825266182e-06, "loss": 0.0019, "step": 27320 }, { "epoch": 0.46187776209830744, "grad_norm": 0.06742815673351288, "learning_rate": 9.486792174748467e-06, "loss": 0.002, "step": 27330 }, { "epoch": 0.4620467623771579, "grad_norm": 0.1170259416103363, "learning_rate": 9.486141133858092e-06, "loss": 0.0022, "step": 27340 }, { "epoch": 0.4622157626560084, "grad_norm": 0.02968103438615799, "learning_rate": 9.485489702651703e-06, "loss": 0.0011, "step": 27350 }, { "epoch": 0.46238476293485886, "grad_norm": 0.08396390825510025, "learning_rate": 9.484837881185977e-06, "loss": 0.0019, "step": 27360 }, { "epoch": 0.4625537632137093, "grad_norm": 0.120454341173172, "learning_rate": 9.484185669517625e-06, "loss": 0.0023, "step": 27370 }, { "epoch": 0.46272276349255975, "grad_norm": 0.04382321983575821, "learning_rate": 9.483533067703394e-06, "loss": 0.0015, "step": 27380 }, { "epoch": 0.4628917637714102, "grad_norm": 0.12909847497940063, "learning_rate": 9.482880075800064e-06, "loss": 0.0055, "step": 27390 }, { "epoch": 0.4630607640502607, "grad_norm": 0.17938898503780365, "learning_rate": 9.482226693864445e-06, "loss": 0.0042, "step": 27400 }, { "epoch": 0.4632297643291111, "grad_norm": 0.05244031921029091, "learning_rate": 9.481572921953388e-06, "loss": 0.0022, "step": 27410 }, { "epoch": 0.4633987646079616, "grad_norm": 0.02196231298148632, "learning_rate": 9.480918760123774e-06, "loss": 0.0025, "step": 27420 }, { "epoch": 0.46356776488681206, "grad_norm": 0.1432347148656845, "learning_rate": 9.480264208432517e-06, "loss": 0.0028, "step": 27430 }, { "epoch": 0.46373676516566253, "grad_norm": 0.1046581119298935, "learning_rate": 9.479609266936566e-06, "loss": 0.0035, "step": 27440 }, { "epoch": 0.463905765444513, "grad_norm": 0.018662603572010994, "learning_rate": 9.478953935692906e-06, "loss": 0.0032, "step": 27450 }, { "epoch": 0.4640747657233634, "grad_norm": 0.020825045183300972, "learning_rate": 9.47829821475855e-06, "loss": 0.0025, "step": 27460 }, { "epoch": 0.4642437660022139, "grad_norm": 0.07902387529611588, "learning_rate": 9.477642104190552e-06, "loss": 0.0023, "step": 27470 }, { "epoch": 0.46441276628106437, "grad_norm": 0.01250784657895565, "learning_rate": 9.476985604045998e-06, "loss": 0.0029, "step": 27480 }, { "epoch": 0.46458176655991484, "grad_norm": 0.039467018097639084, "learning_rate": 9.476328714382003e-06, "loss": 0.0025, "step": 27490 }, { "epoch": 0.46475076683876526, "grad_norm": 0.16227799654006958, "learning_rate": 9.475671435255722e-06, "loss": 0.0028, "step": 27500 }, { "epoch": 0.46491976711761573, "grad_norm": 0.21521276235580444, "learning_rate": 9.475013766724341e-06, "loss": 0.0033, "step": 27510 }, { "epoch": 0.4650887673964662, "grad_norm": 0.17984019219875336, "learning_rate": 9.474355708845082e-06, "loss": 0.0066, "step": 27520 }, { "epoch": 0.4652577676753167, "grad_norm": 0.1133570596575737, "learning_rate": 9.473697261675195e-06, "loss": 0.0034, "step": 27530 }, { "epoch": 0.4654267679541671, "grad_norm": 0.03497983515262604, "learning_rate": 9.473038425271973e-06, "loss": 0.0042, "step": 27540 }, { "epoch": 0.46559576823301757, "grad_norm": 0.03304870426654816, "learning_rate": 9.472379199692734e-06, "loss": 0.0032, "step": 27550 }, { "epoch": 0.46576476851186804, "grad_norm": 0.019517891108989716, "learning_rate": 9.471719584994836e-06, "loss": 0.0016, "step": 27560 }, { "epoch": 0.4659337687907185, "grad_norm": 0.05855069309473038, "learning_rate": 9.471059581235668e-06, "loss": 0.0018, "step": 27570 }, { "epoch": 0.466102769069569, "grad_norm": 0.13207753002643585, "learning_rate": 9.470399188472655e-06, "loss": 0.0032, "step": 27580 }, { "epoch": 0.4662717693484194, "grad_norm": 0.23710985481739044, "learning_rate": 9.469738406763252e-06, "loss": 0.0032, "step": 27590 }, { "epoch": 0.4664407696272699, "grad_norm": 0.0779089406132698, "learning_rate": 9.46907723616495e-06, "loss": 0.0031, "step": 27600 }, { "epoch": 0.46660976990612035, "grad_norm": 0.030405018478631973, "learning_rate": 9.468415676735276e-06, "loss": 0.0027, "step": 27610 }, { "epoch": 0.4667787701849708, "grad_norm": 0.062397025525569916, "learning_rate": 9.467753728531789e-06, "loss": 0.0019, "step": 27620 }, { "epoch": 0.46694777046382124, "grad_norm": 0.058743417263031006, "learning_rate": 9.46709139161208e-06, "loss": 0.0016, "step": 27630 }, { "epoch": 0.4671167707426717, "grad_norm": 0.13921846449375153, "learning_rate": 9.466428666033778e-06, "loss": 0.0027, "step": 27640 }, { "epoch": 0.4672857710215222, "grad_norm": 0.025780148804187775, "learning_rate": 9.46576555185454e-06, "loss": 0.0032, "step": 27650 }, { "epoch": 0.46745477130037266, "grad_norm": 0.02071690745651722, "learning_rate": 9.465102049132062e-06, "loss": 0.0023, "step": 27660 }, { "epoch": 0.4676237715792231, "grad_norm": 0.19302938878536224, "learning_rate": 9.464438157924071e-06, "loss": 0.0039, "step": 27670 }, { "epoch": 0.46779277185807355, "grad_norm": 0.05434525012969971, "learning_rate": 9.463773878288331e-06, "loss": 0.0023, "step": 27680 }, { "epoch": 0.467961772136924, "grad_norm": 0.0356968455016613, "learning_rate": 9.463109210282637e-06, "loss": 0.0019, "step": 27690 }, { "epoch": 0.4681307724157745, "grad_norm": 0.08952018618583679, "learning_rate": 9.462444153964816e-06, "loss": 0.0024, "step": 27700 }, { "epoch": 0.46829977269462497, "grad_norm": 0.10880987346172333, "learning_rate": 9.461778709392732e-06, "loss": 0.0024, "step": 27710 }, { "epoch": 0.4684687729734754, "grad_norm": 0.009157329797744751, "learning_rate": 9.461112876624283e-06, "loss": 0.0031, "step": 27720 }, { "epoch": 0.46863777325232586, "grad_norm": 0.06396792829036713, "learning_rate": 9.460446655717401e-06, "loss": 0.0025, "step": 27730 }, { "epoch": 0.46880677353117634, "grad_norm": 0.0792291983962059, "learning_rate": 9.459780046730046e-06, "loss": 0.0029, "step": 27740 }, { "epoch": 0.4689757738100268, "grad_norm": 0.17137271165847778, "learning_rate": 9.45911304972022e-06, "loss": 0.0023, "step": 27750 }, { "epoch": 0.4691447740888772, "grad_norm": 0.16492405533790588, "learning_rate": 9.458445664745954e-06, "loss": 0.0041, "step": 27760 }, { "epoch": 0.4693137743677277, "grad_norm": 0.08875801414251328, "learning_rate": 9.457777891865312e-06, "loss": 0.0019, "step": 27770 }, { "epoch": 0.46948277464657817, "grad_norm": 0.035520289093256, "learning_rate": 9.457109731136396e-06, "loss": 0.0023, "step": 27780 }, { "epoch": 0.46965177492542864, "grad_norm": 0.03769254311919212, "learning_rate": 9.456441182617339e-06, "loss": 0.0032, "step": 27790 }, { "epoch": 0.46982077520427906, "grad_norm": 0.022853732109069824, "learning_rate": 9.455772246366306e-06, "loss": 0.0029, "step": 27800 }, { "epoch": 0.46998977548312953, "grad_norm": 0.08009804040193558, "learning_rate": 9.4551029224415e-06, "loss": 0.0017, "step": 27810 }, { "epoch": 0.47015877576198, "grad_norm": 0.1740979701280594, "learning_rate": 9.454433210901152e-06, "loss": 0.0039, "step": 27820 }, { "epoch": 0.4703277760408305, "grad_norm": 0.014562606811523438, "learning_rate": 9.453763111803536e-06, "loss": 0.0036, "step": 27830 }, { "epoch": 0.47049677631968095, "grad_norm": 0.1988329291343689, "learning_rate": 9.453092625206947e-06, "loss": 0.0039, "step": 27840 }, { "epoch": 0.47066577659853137, "grad_norm": 0.09475607424974442, "learning_rate": 9.452421751169724e-06, "loss": 0.0031, "step": 27850 }, { "epoch": 0.47083477687738184, "grad_norm": 0.05441892519593239, "learning_rate": 9.451750489750238e-06, "loss": 0.0031, "step": 27860 }, { "epoch": 0.4710037771562323, "grad_norm": 0.1322525590658188, "learning_rate": 9.45107884100689e-06, "loss": 0.0019, "step": 27870 }, { "epoch": 0.4711727774350828, "grad_norm": 0.23945532739162445, "learning_rate": 9.450406804998116e-06, "loss": 0.0034, "step": 27880 }, { "epoch": 0.4713417777139332, "grad_norm": 0.09466557949781418, "learning_rate": 9.449734381782388e-06, "loss": 0.0018, "step": 27890 }, { "epoch": 0.4715107779927837, "grad_norm": 0.10149737447500229, "learning_rate": 9.449061571418208e-06, "loss": 0.0021, "step": 27900 }, { "epoch": 0.47167977827163415, "grad_norm": 0.11542762815952301, "learning_rate": 9.448388373964118e-06, "loss": 0.0027, "step": 27910 }, { "epoch": 0.4718487785504846, "grad_norm": 0.09331060945987701, "learning_rate": 9.447714789478684e-06, "loss": 0.0028, "step": 27920 }, { "epoch": 0.47201777882933504, "grad_norm": 0.21507014334201813, "learning_rate": 9.447040818020514e-06, "loss": 0.0031, "step": 27930 }, { "epoch": 0.4721867791081855, "grad_norm": 0.18056412041187286, "learning_rate": 9.446366459648246e-06, "loss": 0.0024, "step": 27940 }, { "epoch": 0.472355779387036, "grad_norm": 0.17236560583114624, "learning_rate": 9.445691714420553e-06, "loss": 0.0059, "step": 27950 }, { "epoch": 0.47252477966588646, "grad_norm": 0.06868729740381241, "learning_rate": 9.445016582396143e-06, "loss": 0.0023, "step": 27960 }, { "epoch": 0.47269377994473694, "grad_norm": 0.04553517326712608, "learning_rate": 9.44434106363375e-06, "loss": 0.0024, "step": 27970 }, { "epoch": 0.47286278022358735, "grad_norm": 0.14323654770851135, "learning_rate": 9.443665158192154e-06, "loss": 0.0034, "step": 27980 }, { "epoch": 0.4730317805024378, "grad_norm": 0.08195087313652039, "learning_rate": 9.442988866130159e-06, "loss": 0.0029, "step": 27990 }, { "epoch": 0.4732007807812883, "grad_norm": 0.04136926308274269, "learning_rate": 9.442312187506605e-06, "loss": 0.0026, "step": 28000 }, { "epoch": 0.4733697810601388, "grad_norm": 0.09844597429037094, "learning_rate": 9.441635122380367e-06, "loss": 0.003, "step": 28010 }, { "epoch": 0.4735387813389892, "grad_norm": 0.02087082341313362, "learning_rate": 9.440957670810354e-06, "loss": 0.0026, "step": 28020 }, { "epoch": 0.47370778161783966, "grad_norm": 0.14074547588825226, "learning_rate": 9.440279832855507e-06, "loss": 0.0018, "step": 28030 }, { "epoch": 0.47387678189669014, "grad_norm": 0.11037290841341019, "learning_rate": 9.4396016085748e-06, "loss": 0.0035, "step": 28040 }, { "epoch": 0.4740457821755406, "grad_norm": 0.13119235634803772, "learning_rate": 9.438922998027242e-06, "loss": 0.0043, "step": 28050 }, { "epoch": 0.474214782454391, "grad_norm": 0.27193567156791687, "learning_rate": 9.438244001271878e-06, "loss": 0.0019, "step": 28060 }, { "epoch": 0.4743837827332415, "grad_norm": 0.061256974935531616, "learning_rate": 9.437564618367781e-06, "loss": 0.0023, "step": 28070 }, { "epoch": 0.47455278301209197, "grad_norm": 0.06362022459506989, "learning_rate": 9.436884849374062e-06, "loss": 0.004, "step": 28080 }, { "epoch": 0.47472178329094245, "grad_norm": 0.09642446041107178, "learning_rate": 9.436204694349863e-06, "loss": 0.0019, "step": 28090 }, { "epoch": 0.47489078356979286, "grad_norm": 0.06854920089244843, "learning_rate": 9.435524153354363e-06, "loss": 0.0068, "step": 28100 }, { "epoch": 0.47505978384864334, "grad_norm": 0.1118801087141037, "learning_rate": 9.43484322644677e-06, "loss": 0.0048, "step": 28110 }, { "epoch": 0.4752287841274938, "grad_norm": 0.10855915397405624, "learning_rate": 9.434161913686331e-06, "loss": 0.0021, "step": 28120 }, { "epoch": 0.4753977844063443, "grad_norm": 0.1947670429944992, "learning_rate": 9.433480215132319e-06, "loss": 0.002, "step": 28130 }, { "epoch": 0.47556678468519475, "grad_norm": 0.2410636693239212, "learning_rate": 9.43279813084405e-06, "loss": 0.0034, "step": 28140 }, { "epoch": 0.47573578496404517, "grad_norm": 0.1882900595664978, "learning_rate": 9.432115660880862e-06, "loss": 0.0022, "step": 28150 }, { "epoch": 0.47590478524289564, "grad_norm": 0.17083005607128143, "learning_rate": 9.431432805302141e-06, "loss": 0.0023, "step": 28160 }, { "epoch": 0.4760737855217461, "grad_norm": 0.15488651394844055, "learning_rate": 9.430749564167294e-06, "loss": 0.0041, "step": 28170 }, { "epoch": 0.4762427858005966, "grad_norm": 0.05121308192610741, "learning_rate": 9.430065937535769e-06, "loss": 0.0025, "step": 28180 }, { "epoch": 0.476411786079447, "grad_norm": 0.07457858324050903, "learning_rate": 9.429381925467042e-06, "loss": 0.0018, "step": 28190 }, { "epoch": 0.4765807863582975, "grad_norm": 0.16733551025390625, "learning_rate": 9.428697528020626e-06, "loss": 0.0018, "step": 28200 }, { "epoch": 0.47674978663714795, "grad_norm": 0.05104765668511391, "learning_rate": 9.428012745256068e-06, "loss": 0.0031, "step": 28210 }, { "epoch": 0.4769187869159984, "grad_norm": 0.1416759192943573, "learning_rate": 9.427327577232948e-06, "loss": 0.0013, "step": 28220 }, { "epoch": 0.47708778719484884, "grad_norm": 0.08597423881292343, "learning_rate": 9.426642024010877e-06, "loss": 0.0013, "step": 28230 }, { "epoch": 0.4772567874736993, "grad_norm": 0.11198081821203232, "learning_rate": 9.425956085649502e-06, "loss": 0.0034, "step": 28240 }, { "epoch": 0.4774257877525498, "grad_norm": 0.14100314676761627, "learning_rate": 9.425269762208504e-06, "loss": 0.0023, "step": 28250 }, { "epoch": 0.47759478803140026, "grad_norm": 0.041467033326625824, "learning_rate": 9.424583053747596e-06, "loss": 0.0034, "step": 28260 }, { "epoch": 0.47776378831025074, "grad_norm": 0.020220177248120308, "learning_rate": 9.423895960326524e-06, "loss": 0.0036, "step": 28270 }, { "epoch": 0.47793278858910115, "grad_norm": 0.030264656990766525, "learning_rate": 9.423208482005068e-06, "loss": 0.0018, "step": 28280 }, { "epoch": 0.4781017888679516, "grad_norm": 0.05260373651981354, "learning_rate": 9.422520618843045e-06, "loss": 0.003, "step": 28290 }, { "epoch": 0.4782707891468021, "grad_norm": 0.05647336319088936, "learning_rate": 9.4218323709003e-06, "loss": 0.0044, "step": 28300 }, { "epoch": 0.4784397894256526, "grad_norm": 0.2026774287223816, "learning_rate": 9.421143738236715e-06, "loss": 0.0034, "step": 28310 }, { "epoch": 0.478608789704503, "grad_norm": 0.07637016475200653, "learning_rate": 9.420454720912203e-06, "loss": 0.0046, "step": 28320 }, { "epoch": 0.47877778998335346, "grad_norm": 0.3951351046562195, "learning_rate": 9.419765318986713e-06, "loss": 0.0024, "step": 28330 }, { "epoch": 0.47894679026220394, "grad_norm": 0.03452563285827637, "learning_rate": 9.419075532520225e-06, "loss": 0.0034, "step": 28340 }, { "epoch": 0.4791157905410544, "grad_norm": 0.017014149576425552, "learning_rate": 9.418385361572758e-06, "loss": 0.0025, "step": 28350 }, { "epoch": 0.4792847908199048, "grad_norm": 0.1143660619854927, "learning_rate": 9.417694806204353e-06, "loss": 0.002, "step": 28360 }, { "epoch": 0.4794537910987553, "grad_norm": 0.1327093243598938, "learning_rate": 9.417003866475099e-06, "loss": 0.0025, "step": 28370 }, { "epoch": 0.4796227913776058, "grad_norm": 0.06873395293951035, "learning_rate": 9.416312542445105e-06, "loss": 0.0031, "step": 28380 }, { "epoch": 0.47979179165645625, "grad_norm": 0.18769247829914093, "learning_rate": 9.415620834174524e-06, "loss": 0.0032, "step": 28390 }, { "epoch": 0.4799607919353067, "grad_norm": 0.05309249460697174, "learning_rate": 9.414928741723535e-06, "loss": 0.0018, "step": 28400 }, { "epoch": 0.48012979221415714, "grad_norm": 0.07335963100194931, "learning_rate": 9.414236265152355e-06, "loss": 0.0026, "step": 28410 }, { "epoch": 0.4802987924930076, "grad_norm": 0.02062637358903885, "learning_rate": 9.413543404521233e-06, "loss": 0.0012, "step": 28420 }, { "epoch": 0.4804677927718581, "grad_norm": 0.2966996431350708, "learning_rate": 9.41285015989045e-06, "loss": 0.0034, "step": 28430 }, { "epoch": 0.48063679305070856, "grad_norm": 0.11095491051673889, "learning_rate": 9.412156531320323e-06, "loss": 0.0026, "step": 28440 }, { "epoch": 0.480805793329559, "grad_norm": 0.09372120350599289, "learning_rate": 9.4114625188712e-06, "loss": 0.0023, "step": 28450 }, { "epoch": 0.48097479360840945, "grad_norm": 0.05419611930847168, "learning_rate": 9.410768122603464e-06, "loss": 0.0043, "step": 28460 }, { "epoch": 0.4811437938872599, "grad_norm": 0.08735162019729614, "learning_rate": 9.410073342577532e-06, "loss": 0.002, "step": 28470 }, { "epoch": 0.4813127941661104, "grad_norm": 0.1224854364991188, "learning_rate": 9.40937817885385e-06, "loss": 0.0026, "step": 28480 }, { "epoch": 0.4814817944449608, "grad_norm": 0.03866802901029587, "learning_rate": 9.408682631492902e-06, "loss": 0.0019, "step": 28490 }, { "epoch": 0.4816507947238113, "grad_norm": 0.06974063068628311, "learning_rate": 9.407986700555206e-06, "loss": 0.0043, "step": 28500 }, { "epoch": 0.48181979500266175, "grad_norm": 0.06814968585968018, "learning_rate": 9.40729038610131e-06, "loss": 0.0022, "step": 28510 }, { "epoch": 0.48198879528151223, "grad_norm": 0.022219954058527946, "learning_rate": 9.406593688191796e-06, "loss": 0.0019, "step": 28520 }, { "epoch": 0.4821577955603627, "grad_norm": 0.1143420934677124, "learning_rate": 9.405896606887282e-06, "loss": 0.0036, "step": 28530 }, { "epoch": 0.4823267958392131, "grad_norm": 0.12258830666542053, "learning_rate": 9.405199142248414e-06, "loss": 0.006, "step": 28540 }, { "epoch": 0.4824957961180636, "grad_norm": 0.3413972556591034, "learning_rate": 9.404501294335878e-06, "loss": 0.0025, "step": 28550 }, { "epoch": 0.48266479639691406, "grad_norm": 0.15560147166252136, "learning_rate": 9.403803063210389e-06, "loss": 0.0015, "step": 28560 }, { "epoch": 0.48283379667576454, "grad_norm": 0.04303400591015816, "learning_rate": 9.403104448932696e-06, "loss": 0.0023, "step": 28570 }, { "epoch": 0.48300279695461495, "grad_norm": 0.10534984618425369, "learning_rate": 9.402405451563583e-06, "loss": 0.0028, "step": 28580 }, { "epoch": 0.4831717972334654, "grad_norm": 0.0458071269094944, "learning_rate": 9.401706071163866e-06, "loss": 0.0027, "step": 28590 }, { "epoch": 0.4833407975123159, "grad_norm": 0.017634080722928047, "learning_rate": 9.401006307794394e-06, "loss": 0.0014, "step": 28600 }, { "epoch": 0.4835097977911664, "grad_norm": 0.07521199434995651, "learning_rate": 9.400306161516049e-06, "loss": 0.0019, "step": 28610 }, { "epoch": 0.4836787980700168, "grad_norm": 0.10925111174583435, "learning_rate": 9.399605632389748e-06, "loss": 0.0033, "step": 28620 }, { "epoch": 0.48384779834886726, "grad_norm": 0.01852216199040413, "learning_rate": 9.39890472047644e-06, "loss": 0.0021, "step": 28630 }, { "epoch": 0.48401679862771774, "grad_norm": 0.09024424850940704, "learning_rate": 9.39820342583711e-06, "loss": 0.0028, "step": 28640 }, { "epoch": 0.4841857989065682, "grad_norm": 0.08193477988243103, "learning_rate": 9.39750174853277e-06, "loss": 0.0017, "step": 28650 }, { "epoch": 0.4843547991854187, "grad_norm": 0.08656947314739227, "learning_rate": 9.396799688624473e-06, "loss": 0.0023, "step": 28660 }, { "epoch": 0.4845237994642691, "grad_norm": 0.08420667797327042, "learning_rate": 9.396097246173299e-06, "loss": 0.0016, "step": 28670 }, { "epoch": 0.4846927997431196, "grad_norm": 0.13190427422523499, "learning_rate": 9.395394421240366e-06, "loss": 0.0016, "step": 28680 }, { "epoch": 0.48486180002197005, "grad_norm": 0.13366137444972992, "learning_rate": 9.394691213886823e-06, "loss": 0.0014, "step": 28690 }, { "epoch": 0.4850308003008205, "grad_norm": 0.0756131261587143, "learning_rate": 9.393987624173849e-06, "loss": 0.0032, "step": 28700 }, { "epoch": 0.48519980057967094, "grad_norm": 0.034523122012615204, "learning_rate": 9.393283652162664e-06, "loss": 0.0032, "step": 28710 }, { "epoch": 0.4853688008585214, "grad_norm": 0.05466236546635628, "learning_rate": 9.392579297914515e-06, "loss": 0.0028, "step": 28720 }, { "epoch": 0.4855378011373719, "grad_norm": 0.16884277760982513, "learning_rate": 9.391874561490685e-06, "loss": 0.0026, "step": 28730 }, { "epoch": 0.48570680141622236, "grad_norm": 0.11400584131479263, "learning_rate": 9.391169442952488e-06, "loss": 0.0038, "step": 28740 }, { "epoch": 0.4858758016950728, "grad_norm": 0.04285375028848648, "learning_rate": 9.390463942361276e-06, "loss": 0.0019, "step": 28750 }, { "epoch": 0.48604480197392325, "grad_norm": 0.6482422947883606, "learning_rate": 9.389758059778427e-06, "loss": 0.0043, "step": 28760 }, { "epoch": 0.4862138022527737, "grad_norm": 0.04816224426031113, "learning_rate": 9.389051795265359e-06, "loss": 0.0029, "step": 28770 }, { "epoch": 0.4863828025316242, "grad_norm": 0.031575001776218414, "learning_rate": 9.388345148883517e-06, "loss": 0.0013, "step": 28780 }, { "epoch": 0.4865518028104746, "grad_norm": 0.06918296962976456, "learning_rate": 9.387638120694387e-06, "loss": 0.0042, "step": 28790 }, { "epoch": 0.4867208030893251, "grad_norm": 0.0319158211350441, "learning_rate": 9.386930710759482e-06, "loss": 0.0048, "step": 28800 }, { "epoch": 0.48688980336817556, "grad_norm": 0.11078489571809769, "learning_rate": 9.386222919140349e-06, "loss": 0.0024, "step": 28810 }, { "epoch": 0.48705880364702603, "grad_norm": 0.16688930988311768, "learning_rate": 9.385514745898569e-06, "loss": 0.0033, "step": 28820 }, { "epoch": 0.4872278039258765, "grad_norm": 0.11452493071556091, "learning_rate": 9.384806191095761e-06, "loss": 0.0021, "step": 28830 }, { "epoch": 0.4873968042047269, "grad_norm": 0.1142348200082779, "learning_rate": 9.384097254793567e-06, "loss": 0.0038, "step": 28840 }, { "epoch": 0.4875658044835774, "grad_norm": 0.08110999315977097, "learning_rate": 9.383387937053671e-06, "loss": 0.0029, "step": 28850 }, { "epoch": 0.48773480476242786, "grad_norm": 0.08010591566562653, "learning_rate": 9.382678237937788e-06, "loss": 0.0028, "step": 28860 }, { "epoch": 0.48790380504127834, "grad_norm": 0.025130346417427063, "learning_rate": 9.381968157507662e-06, "loss": 0.0015, "step": 28870 }, { "epoch": 0.48807280532012876, "grad_norm": 0.08091887086629868, "learning_rate": 9.381257695825076e-06, "loss": 0.0049, "step": 28880 }, { "epoch": 0.48824180559897923, "grad_norm": 0.07320184260606766, "learning_rate": 9.380546852951841e-06, "loss": 0.0026, "step": 28890 }, { "epoch": 0.4884108058778297, "grad_norm": 0.08067914098501205, "learning_rate": 9.379835628949809e-06, "loss": 0.0031, "step": 28900 }, { "epoch": 0.4885798061566802, "grad_norm": 0.03999374806880951, "learning_rate": 9.379124023880854e-06, "loss": 0.0018, "step": 28910 }, { "epoch": 0.4887488064355306, "grad_norm": 0.07922464609146118, "learning_rate": 9.378412037806891e-06, "loss": 0.0034, "step": 28920 }, { "epoch": 0.48891780671438106, "grad_norm": 0.13327257335186005, "learning_rate": 9.377699670789869e-06, "loss": 0.0021, "step": 28930 }, { "epoch": 0.48908680699323154, "grad_norm": 0.10939807444810867, "learning_rate": 9.376986922891765e-06, "loss": 0.0018, "step": 28940 }, { "epoch": 0.489255807272082, "grad_norm": 0.07308842241764069, "learning_rate": 9.376273794174591e-06, "loss": 0.0038, "step": 28950 }, { "epoch": 0.4894248075509325, "grad_norm": 0.1312398463487625, "learning_rate": 9.375560284700394e-06, "loss": 0.0043, "step": 28960 }, { "epoch": 0.4895938078297829, "grad_norm": 0.05857406184077263, "learning_rate": 9.37484639453125e-06, "loss": 0.0023, "step": 28970 }, { "epoch": 0.4897628081086334, "grad_norm": 0.15974996984004974, "learning_rate": 9.374132123729274e-06, "loss": 0.0019, "step": 28980 }, { "epoch": 0.48993180838748385, "grad_norm": 0.17144306004047394, "learning_rate": 9.373417472356612e-06, "loss": 0.004, "step": 28990 }, { "epoch": 0.4901008086663343, "grad_norm": 0.08801712095737457, "learning_rate": 9.372702440475438e-06, "loss": 0.0021, "step": 29000 }, { "epoch": 0.49026980894518474, "grad_norm": 0.07644796371459961, "learning_rate": 9.371987028147966e-06, "loss": 0.0019, "step": 29010 }, { "epoch": 0.4904388092240352, "grad_norm": 0.11791327595710754, "learning_rate": 9.37127123543644e-06, "loss": 0.0021, "step": 29020 }, { "epoch": 0.4906078095028857, "grad_norm": 0.11407289654016495, "learning_rate": 9.370555062403136e-06, "loss": 0.0033, "step": 29030 }, { "epoch": 0.49077680978173616, "grad_norm": 0.12483616918325424, "learning_rate": 9.369838509110368e-06, "loss": 0.0031, "step": 29040 }, { "epoch": 0.4909458100605866, "grad_norm": 0.031986385583877563, "learning_rate": 9.369121575620476e-06, "loss": 0.0029, "step": 29050 }, { "epoch": 0.49111481033943705, "grad_norm": 0.18724696338176727, "learning_rate": 9.368404261995838e-06, "loss": 0.0047, "step": 29060 }, { "epoch": 0.4912838106182875, "grad_norm": 0.09730450809001923, "learning_rate": 9.367686568298867e-06, "loss": 0.0054, "step": 29070 }, { "epoch": 0.491452810897138, "grad_norm": 0.10546234995126724, "learning_rate": 9.366968494591997e-06, "loss": 0.0044, "step": 29080 }, { "epoch": 0.49162181117598847, "grad_norm": 0.025785325095057487, "learning_rate": 9.366250040937713e-06, "loss": 0.0025, "step": 29090 }, { "epoch": 0.4917908114548389, "grad_norm": 0.06503026932477951, "learning_rate": 9.36553120739852e-06, "loss": 0.0029, "step": 29100 }, { "epoch": 0.49195981173368936, "grad_norm": 0.047761380672454834, "learning_rate": 9.36481199403696e-06, "loss": 0.0027, "step": 29110 }, { "epoch": 0.49212881201253983, "grad_norm": 0.05150453746318817, "learning_rate": 9.364092400915609e-06, "loss": 0.0016, "step": 29120 }, { "epoch": 0.4922978122913903, "grad_norm": 0.011763407848775387, "learning_rate": 9.363372428097075e-06, "loss": 0.0021, "step": 29130 }, { "epoch": 0.4924668125702407, "grad_norm": 0.16468170285224915, "learning_rate": 9.362652075643998e-06, "loss": 0.0031, "step": 29140 }, { "epoch": 0.4926358128490912, "grad_norm": 0.12033814936876297, "learning_rate": 9.361931343619053e-06, "loss": 0.002, "step": 29150 }, { "epoch": 0.49280481312794167, "grad_norm": 0.10892493277788162, "learning_rate": 9.361210232084946e-06, "loss": 0.0045, "step": 29160 }, { "epoch": 0.49297381340679214, "grad_norm": 0.03691662847995758, "learning_rate": 9.36048874110442e-06, "loss": 0.0037, "step": 29170 }, { "epoch": 0.49314281368564256, "grad_norm": 0.05275307223200798, "learning_rate": 9.359766870740246e-06, "loss": 0.0015, "step": 29180 }, { "epoch": 0.49331181396449303, "grad_norm": 0.02527184970676899, "learning_rate": 9.35904462105523e-06, "loss": 0.0017, "step": 29190 }, { "epoch": 0.4934808142433435, "grad_norm": 0.12803104519844055, "learning_rate": 9.358321992112212e-06, "loss": 0.0027, "step": 29200 }, { "epoch": 0.493649814522194, "grad_norm": 0.11234404146671295, "learning_rate": 9.357598983974066e-06, "loss": 0.0015, "step": 29210 }, { "epoch": 0.49381881480104445, "grad_norm": 0.04649844393134117, "learning_rate": 9.356875596703693e-06, "loss": 0.0029, "step": 29220 }, { "epoch": 0.49398781507989487, "grad_norm": 0.11385609209537506, "learning_rate": 9.356151830364035e-06, "loss": 0.0019, "step": 29230 }, { "epoch": 0.49415681535874534, "grad_norm": 0.12828443944454193, "learning_rate": 9.355427685018061e-06, "loss": 0.0043, "step": 29240 }, { "epoch": 0.4943258156375958, "grad_norm": 0.4393269121646881, "learning_rate": 9.354703160728774e-06, "loss": 0.0046, "step": 29250 }, { "epoch": 0.4944948159164463, "grad_norm": 0.2788711190223694, "learning_rate": 9.353978257559216e-06, "loss": 0.0027, "step": 29260 }, { "epoch": 0.4946638161952967, "grad_norm": 0.0992347002029419, "learning_rate": 9.353252975572453e-06, "loss": 0.0026, "step": 29270 }, { "epoch": 0.4948328164741472, "grad_norm": 0.08816232532262802, "learning_rate": 9.35252731483159e-06, "loss": 0.0026, "step": 29280 }, { "epoch": 0.49500181675299765, "grad_norm": 0.05659950524568558, "learning_rate": 9.351801275399761e-06, "loss": 0.0024, "step": 29290 }, { "epoch": 0.4951708170318481, "grad_norm": 0.1687520444393158, "learning_rate": 9.351074857340137e-06, "loss": 0.0039, "step": 29300 }, { "epoch": 0.49533981731069854, "grad_norm": 0.07601363956928253, "learning_rate": 9.350348060715917e-06, "loss": 0.0019, "step": 29310 }, { "epoch": 0.495508817589549, "grad_norm": 0.09154291450977325, "learning_rate": 9.34962088559034e-06, "loss": 0.0014, "step": 29320 }, { "epoch": 0.4956778178683995, "grad_norm": 0.10395083576440811, "learning_rate": 9.34889333202667e-06, "loss": 0.0026, "step": 29330 }, { "epoch": 0.49584681814724996, "grad_norm": 0.07902251183986664, "learning_rate": 9.34816540008821e-06, "loss": 0.0026, "step": 29340 }, { "epoch": 0.49601581842610043, "grad_norm": 0.18695111572742462, "learning_rate": 9.347437089838294e-06, "loss": 0.0039, "step": 29350 }, { "epoch": 0.49618481870495085, "grad_norm": 0.10042164474725723, "learning_rate": 9.346708401340285e-06, "loss": 0.0033, "step": 29360 }, { "epoch": 0.4963538189838013, "grad_norm": 0.0964827910065651, "learning_rate": 9.345979334657587e-06, "loss": 0.0028, "step": 29370 }, { "epoch": 0.4965228192626518, "grad_norm": 0.05580337718129158, "learning_rate": 9.34524988985363e-06, "loss": 0.0026, "step": 29380 }, { "epoch": 0.49669181954150227, "grad_norm": 0.06996355950832367, "learning_rate": 9.344520066991878e-06, "loss": 0.0017, "step": 29390 }, { "epoch": 0.4968608198203527, "grad_norm": 0.14571979641914368, "learning_rate": 9.34378986613583e-06, "loss": 0.0033, "step": 29400 }, { "epoch": 0.49702982009920316, "grad_norm": 0.04337066411972046, "learning_rate": 9.343059287349019e-06, "loss": 0.0015, "step": 29410 }, { "epoch": 0.49719882037805363, "grad_norm": 0.06206294894218445, "learning_rate": 9.342328330695005e-06, "loss": 0.003, "step": 29420 }, { "epoch": 0.4973678206569041, "grad_norm": 0.036904092878103256, "learning_rate": 9.34159699623739e-06, "loss": 0.0025, "step": 29430 }, { "epoch": 0.4975368209357545, "grad_norm": 0.24361184239387512, "learning_rate": 9.3408652840398e-06, "loss": 0.0031, "step": 29440 }, { "epoch": 0.497705821214605, "grad_norm": 0.045649971812963486, "learning_rate": 9.340133194165899e-06, "loss": 0.0034, "step": 29450 }, { "epoch": 0.49787482149345547, "grad_norm": 0.23143626749515533, "learning_rate": 9.33940072667938e-06, "loss": 0.0046, "step": 29460 }, { "epoch": 0.49804382177230594, "grad_norm": 0.012344293296337128, "learning_rate": 9.338667881643973e-06, "loss": 0.0021, "step": 29470 }, { "epoch": 0.4982128220511564, "grad_norm": 0.08606468141078949, "learning_rate": 9.33793465912344e-06, "loss": 0.0026, "step": 29480 }, { "epoch": 0.49838182233000683, "grad_norm": 0.15866480767726898, "learning_rate": 9.337201059181572e-06, "loss": 0.0019, "step": 29490 }, { "epoch": 0.4985508226088573, "grad_norm": 0.10740579664707184, "learning_rate": 9.336467081882202e-06, "loss": 0.0031, "step": 29500 }, { "epoch": 0.4987198228877078, "grad_norm": 0.06434217095375061, "learning_rate": 9.335732727289182e-06, "loss": 0.003, "step": 29510 }, { "epoch": 0.49888882316655825, "grad_norm": 0.1749943196773529, "learning_rate": 9.334997995466408e-06, "loss": 0.0027, "step": 29520 }, { "epoch": 0.49905782344540867, "grad_norm": 0.16625481843948364, "learning_rate": 9.334262886477804e-06, "loss": 0.0016, "step": 29530 }, { "epoch": 0.49922682372425914, "grad_norm": 0.19593088328838348, "learning_rate": 9.33352740038733e-06, "loss": 0.0015, "step": 29540 }, { "epoch": 0.4993958240031096, "grad_norm": 0.07355517894029617, "learning_rate": 9.332791537258977e-06, "loss": 0.002, "step": 29550 }, { "epoch": 0.4995648242819601, "grad_norm": 0.07157208025455475, "learning_rate": 9.332055297156763e-06, "loss": 0.0027, "step": 29560 }, { "epoch": 0.4997338245608105, "grad_norm": 0.10493667423725128, "learning_rate": 9.331318680144754e-06, "loss": 0.0043, "step": 29570 }, { "epoch": 0.499902824839661, "grad_norm": 0.10629831999540329, "learning_rate": 9.33058168628703e-06, "loss": 0.0026, "step": 29580 }, { "epoch": 0.5000718251185114, "grad_norm": 0.115446075797081, "learning_rate": 9.329844315647721e-06, "loss": 0.0026, "step": 29590 }, { "epoch": 0.5002408253973619, "grad_norm": 0.08104785531759262, "learning_rate": 9.329106568290976e-06, "loss": 0.0032, "step": 29600 }, { "epoch": 0.5004098256762124, "grad_norm": 0.07581508904695511, "learning_rate": 9.328368444280983e-06, "loss": 0.0025, "step": 29610 }, { "epoch": 0.5005788259550629, "grad_norm": 0.1760362833738327, "learning_rate": 9.327629943681966e-06, "loss": 0.0041, "step": 29620 }, { "epoch": 0.5007478262339133, "grad_norm": 0.0963960513472557, "learning_rate": 9.326891066558174e-06, "loss": 0.0047, "step": 29630 }, { "epoch": 0.5009168265127637, "grad_norm": 0.14027276635169983, "learning_rate": 9.326151812973898e-06, "loss": 0.0018, "step": 29640 }, { "epoch": 0.5010858267916142, "grad_norm": 0.2154625505208969, "learning_rate": 9.325412182993452e-06, "loss": 0.0023, "step": 29650 }, { "epoch": 0.5012548270704646, "grad_norm": 0.06819602102041245, "learning_rate": 9.324672176681189e-06, "loss": 0.0023, "step": 29660 }, { "epoch": 0.5014238273493151, "grad_norm": 0.047595296055078506, "learning_rate": 9.323931794101493e-06, "loss": 0.0014, "step": 29670 }, { "epoch": 0.5015928276281656, "grad_norm": 0.14946642518043518, "learning_rate": 9.32319103531878e-06, "loss": 0.0019, "step": 29680 }, { "epoch": 0.5017618279070161, "grad_norm": 0.10646338760852814, "learning_rate": 9.322449900397502e-06, "loss": 0.0024, "step": 29690 }, { "epoch": 0.5019308281858665, "grad_norm": 0.10761136561632156, "learning_rate": 9.32170838940214e-06, "loss": 0.003, "step": 29700 }, { "epoch": 0.502099828464717, "grad_norm": 0.15118083357810974, "learning_rate": 9.320966502397208e-06, "loss": 0.0034, "step": 29710 }, { "epoch": 0.5022688287435674, "grad_norm": 0.04990677163004875, "learning_rate": 9.320224239447256e-06, "loss": 0.0024, "step": 29720 }, { "epoch": 0.5024378290224178, "grad_norm": 0.10470911860466003, "learning_rate": 9.319481600616862e-06, "loss": 0.0022, "step": 29730 }, { "epoch": 0.5026068293012683, "grad_norm": 0.08429449051618576, "learning_rate": 9.318738585970642e-06, "loss": 0.0038, "step": 29740 }, { "epoch": 0.5027758295801188, "grad_norm": 0.056416355073451996, "learning_rate": 9.317995195573237e-06, "loss": 0.0027, "step": 29750 }, { "epoch": 0.5029448298589693, "grad_norm": 0.1960950344800949, "learning_rate": 9.31725142948933e-06, "loss": 0.0027, "step": 29760 }, { "epoch": 0.5031138301378197, "grad_norm": 0.06585361063480377, "learning_rate": 9.316507287783634e-06, "loss": 0.0029, "step": 29770 }, { "epoch": 0.5032828304166702, "grad_norm": 0.03832526504993439, "learning_rate": 9.315762770520887e-06, "loss": 0.0023, "step": 29780 }, { "epoch": 0.5034518306955207, "grad_norm": 0.07613620162010193, "learning_rate": 9.31501787776587e-06, "loss": 0.0015, "step": 29790 }, { "epoch": 0.5036208309743712, "grad_norm": 0.0848083421587944, "learning_rate": 9.31427260958339e-06, "loss": 0.0025, "step": 29800 }, { "epoch": 0.5037898312532215, "grad_norm": 0.12784676253795624, "learning_rate": 9.31352696603829e-06, "loss": 0.0025, "step": 29810 }, { "epoch": 0.503958831532072, "grad_norm": 0.05923425406217575, "learning_rate": 9.312780947195446e-06, "loss": 0.002, "step": 29820 }, { "epoch": 0.5041278318109225, "grad_norm": 0.0461299754679203, "learning_rate": 9.312034553119761e-06, "loss": 0.0031, "step": 29830 }, { "epoch": 0.5042968320897729, "grad_norm": 0.19783492386341095, "learning_rate": 9.311287783876181e-06, "loss": 0.0025, "step": 29840 }, { "epoch": 0.5044658323686234, "grad_norm": 0.02259715087711811, "learning_rate": 9.310540639529673e-06, "loss": 0.002, "step": 29850 }, { "epoch": 0.5046348326474739, "grad_norm": 0.11579112708568573, "learning_rate": 9.309793120145245e-06, "loss": 0.0024, "step": 29860 }, { "epoch": 0.5048038329263244, "grad_norm": 0.35851532220840454, "learning_rate": 9.309045225787933e-06, "loss": 0.0031, "step": 29870 }, { "epoch": 0.5049728332051748, "grad_norm": 0.09024239331483841, "learning_rate": 9.308296956522809e-06, "loss": 0.0035, "step": 29880 }, { "epoch": 0.5051418334840252, "grad_norm": 0.04510599747300148, "learning_rate": 9.307548312414975e-06, "loss": 0.0026, "step": 29890 }, { "epoch": 0.5053108337628757, "grad_norm": 0.013859073631465435, "learning_rate": 9.306799293529569e-06, "loss": 0.0019, "step": 29900 }, { "epoch": 0.5054798340417261, "grad_norm": 0.018246594816446304, "learning_rate": 9.306049899931755e-06, "loss": 0.0015, "step": 29910 }, { "epoch": 0.5056488343205766, "grad_norm": 0.02640846185386181, "learning_rate": 9.305300131686739e-06, "loss": 0.0023, "step": 29920 }, { "epoch": 0.5058178345994271, "grad_norm": 0.05299566313624382, "learning_rate": 9.30454998885975e-06, "loss": 0.0018, "step": 29930 }, { "epoch": 0.5059868348782776, "grad_norm": 0.06415662169456482, "learning_rate": 9.303799471516057e-06, "loss": 0.0021, "step": 29940 }, { "epoch": 0.506155835157128, "grad_norm": 0.05302102863788605, "learning_rate": 9.303048579720959e-06, "loss": 0.0026, "step": 29950 }, { "epoch": 0.5063248354359785, "grad_norm": 0.06410418450832367, "learning_rate": 9.302297313539783e-06, "loss": 0.0024, "step": 29960 }, { "epoch": 0.506493835714829, "grad_norm": 0.02581281214952469, "learning_rate": 9.301545673037898e-06, "loss": 0.0041, "step": 29970 }, { "epoch": 0.5066628359936793, "grad_norm": 0.11411970108747482, "learning_rate": 9.300793658280696e-06, "loss": 0.0023, "step": 29980 }, { "epoch": 0.5068318362725298, "grad_norm": 0.10526223480701447, "learning_rate": 9.300041269333609e-06, "loss": 0.0025, "step": 29990 }, { "epoch": 0.5070008365513803, "grad_norm": 0.06469008326530457, "learning_rate": 9.299288506262097e-06, "loss": 0.0032, "step": 30000 }, { "epoch": 0.5071698368302308, "grad_norm": 0.10594971477985382, "learning_rate": 9.298535369131654e-06, "loss": 0.0024, "step": 30010 }, { "epoch": 0.5073388371090812, "grad_norm": 0.12533098459243774, "learning_rate": 9.297781858007808e-06, "loss": 0.0032, "step": 30020 }, { "epoch": 0.5075078373879317, "grad_norm": 0.12551948428153992, "learning_rate": 9.297027972956116e-06, "loss": 0.0018, "step": 30030 }, { "epoch": 0.5076768376667822, "grad_norm": 0.035782407969236374, "learning_rate": 9.296273714042172e-06, "loss": 0.0037, "step": 30040 }, { "epoch": 0.5078458379456326, "grad_norm": 0.17860619723796844, "learning_rate": 9.295519081331598e-06, "loss": 0.0038, "step": 30050 }, { "epoch": 0.5080148382244831, "grad_norm": 0.1742449700832367, "learning_rate": 9.294764074890051e-06, "loss": 0.0031, "step": 30060 }, { "epoch": 0.5081838385033335, "grad_norm": 0.09593408554792404, "learning_rate": 9.294008694783223e-06, "loss": 0.0019, "step": 30070 }, { "epoch": 0.508352838782184, "grad_norm": 0.014105924405157566, "learning_rate": 9.293252941076832e-06, "loss": 0.0016, "step": 30080 }, { "epoch": 0.5085218390610344, "grad_norm": 0.1020912230014801, "learning_rate": 9.292496813836634e-06, "loss": 0.0022, "step": 30090 }, { "epoch": 0.5086908393398849, "grad_norm": 0.12688863277435303, "learning_rate": 9.291740313128416e-06, "loss": 0.0033, "step": 30100 }, { "epoch": 0.5088598396187354, "grad_norm": 0.0973917618393898, "learning_rate": 9.290983439017998e-06, "loss": 0.0024, "step": 30110 }, { "epoch": 0.5090288398975858, "grad_norm": 0.14230580627918243, "learning_rate": 9.290226191571228e-06, "loss": 0.0023, "step": 30120 }, { "epoch": 0.5091978401764363, "grad_norm": 0.13736513257026672, "learning_rate": 9.289468570853995e-06, "loss": 0.0063, "step": 30130 }, { "epoch": 0.5093668404552868, "grad_norm": 0.3065262734889984, "learning_rate": 9.288710576932211e-06, "loss": 0.0029, "step": 30140 }, { "epoch": 0.5095358407341372, "grad_norm": 0.03719579428434372, "learning_rate": 9.287952209871829e-06, "loss": 0.0034, "step": 30150 }, { "epoch": 0.5097048410129876, "grad_norm": 0.12979014217853546, "learning_rate": 9.287193469738828e-06, "loss": 0.0032, "step": 30160 }, { "epoch": 0.5098738412918381, "grad_norm": 0.025296248495578766, "learning_rate": 9.286434356599225e-06, "loss": 0.003, "step": 30170 }, { "epoch": 0.5100428415706886, "grad_norm": 0.02095586247742176, "learning_rate": 9.285674870519064e-06, "loss": 0.0024, "step": 30180 }, { "epoch": 0.510211841849539, "grad_norm": 0.0356566496193409, "learning_rate": 9.284915011564423e-06, "loss": 0.0015, "step": 30190 }, { "epoch": 0.5103808421283895, "grad_norm": 0.10899782180786133, "learning_rate": 9.284154779801417e-06, "loss": 0.0021, "step": 30200 }, { "epoch": 0.51054984240724, "grad_norm": 0.11044955253601074, "learning_rate": 9.28339417529619e-06, "loss": 0.0026, "step": 30210 }, { "epoch": 0.5107188426860905, "grad_norm": 0.05205506831407547, "learning_rate": 9.282633198114913e-06, "loss": 0.0023, "step": 30220 }, { "epoch": 0.5108878429649409, "grad_norm": 0.09147424250841141, "learning_rate": 9.281871848323797e-06, "loss": 0.0021, "step": 30230 }, { "epoch": 0.5110568432437913, "grad_norm": 0.07484003156423569, "learning_rate": 9.281110125989087e-06, "loss": 0.002, "step": 30240 }, { "epoch": 0.5112258435226418, "grad_norm": 0.09416460990905762, "learning_rate": 9.280348031177053e-06, "loss": 0.0025, "step": 30250 }, { "epoch": 0.5113948438014922, "grad_norm": 0.13577759265899658, "learning_rate": 9.279585563954002e-06, "loss": 0.0023, "step": 30260 }, { "epoch": 0.5115638440803427, "grad_norm": 0.08064518868923187, "learning_rate": 9.27882272438627e-06, "loss": 0.0035, "step": 30270 }, { "epoch": 0.5117328443591932, "grad_norm": 0.032194700092077255, "learning_rate": 9.278059512540229e-06, "loss": 0.0021, "step": 30280 }, { "epoch": 0.5119018446380437, "grad_norm": 0.032202497124671936, "learning_rate": 9.277295928482285e-06, "loss": 0.0027, "step": 30290 }, { "epoch": 0.5120708449168941, "grad_norm": 0.05126439407467842, "learning_rate": 9.27653197227887e-06, "loss": 0.0022, "step": 30300 }, { "epoch": 0.5122398451957446, "grad_norm": 0.06987911462783813, "learning_rate": 9.275767643996454e-06, "loss": 0.0029, "step": 30310 }, { "epoch": 0.5124088454745951, "grad_norm": 0.3274784982204437, "learning_rate": 9.275002943701537e-06, "loss": 0.0028, "step": 30320 }, { "epoch": 0.5125778457534454, "grad_norm": 0.10538214445114136, "learning_rate": 9.27423787146065e-06, "loss": 0.0025, "step": 30330 }, { "epoch": 0.5127468460322959, "grad_norm": 0.12387062609195709, "learning_rate": 9.273472427340357e-06, "loss": 0.0011, "step": 30340 }, { "epoch": 0.5129158463111464, "grad_norm": 0.09943099319934845, "learning_rate": 9.27270661140726e-06, "loss": 0.0022, "step": 30350 }, { "epoch": 0.5130848465899969, "grad_norm": 0.16701321303844452, "learning_rate": 9.271940423727986e-06, "loss": 0.0034, "step": 30360 }, { "epoch": 0.5132538468688473, "grad_norm": 0.16028109192848206, "learning_rate": 9.271173864369196e-06, "loss": 0.0031, "step": 30370 }, { "epoch": 0.5134228471476978, "grad_norm": 0.050224725157022476, "learning_rate": 9.270406933397587e-06, "loss": 0.0063, "step": 30380 }, { "epoch": 0.5135918474265483, "grad_norm": 0.06589037925004959, "learning_rate": 9.269639630879884e-06, "loss": 0.002, "step": 30390 }, { "epoch": 0.5137608477053988, "grad_norm": 0.24337688088417053, "learning_rate": 9.268871956882844e-06, "loss": 0.0051, "step": 30400 }, { "epoch": 0.5139298479842491, "grad_norm": 0.08122189342975616, "learning_rate": 9.268103911473262e-06, "loss": 0.0019, "step": 30410 }, { "epoch": 0.5140988482630996, "grad_norm": 0.14038895070552826, "learning_rate": 9.267335494717959e-06, "loss": 0.0034, "step": 30420 }, { "epoch": 0.5142678485419501, "grad_norm": 0.19843675196170807, "learning_rate": 9.266566706683795e-06, "loss": 0.0021, "step": 30430 }, { "epoch": 0.5144368488208005, "grad_norm": 0.09629879146814346, "learning_rate": 9.265797547437653e-06, "loss": 0.0037, "step": 30440 }, { "epoch": 0.514605849099651, "grad_norm": 0.13716882467269897, "learning_rate": 9.265028017046459e-06, "loss": 0.0029, "step": 30450 }, { "epoch": 0.5147748493785015, "grad_norm": 0.09429722279310226, "learning_rate": 9.26425811557716e-06, "loss": 0.0033, "step": 30460 }, { "epoch": 0.514943849657352, "grad_norm": 0.07236813008785248, "learning_rate": 9.263487843096746e-06, "loss": 0.0031, "step": 30470 }, { "epoch": 0.5151128499362024, "grad_norm": 0.07731340080499649, "learning_rate": 9.26271719967223e-06, "loss": 0.0013, "step": 30480 }, { "epoch": 0.5152818502150529, "grad_norm": 0.1405019909143448, "learning_rate": 9.261946185370668e-06, "loss": 0.0042, "step": 30490 }, { "epoch": 0.5154508504939033, "grad_norm": 0.04756263270974159, "learning_rate": 9.261174800259137e-06, "loss": 0.0042, "step": 30500 }, { "epoch": 0.5156198507727537, "grad_norm": 0.054839082062244415, "learning_rate": 9.26040304440475e-06, "loss": 0.0026, "step": 30510 }, { "epoch": 0.5157888510516042, "grad_norm": 0.01461437065154314, "learning_rate": 9.259630917874659e-06, "loss": 0.0021, "step": 30520 }, { "epoch": 0.5159578513304547, "grad_norm": 0.1616632491350174, "learning_rate": 9.25885842073604e-06, "loss": 0.0029, "step": 30530 }, { "epoch": 0.5161268516093052, "grad_norm": 0.07244772464036942, "learning_rate": 9.258085553056101e-06, "loss": 0.002, "step": 30540 }, { "epoch": 0.5162958518881556, "grad_norm": 0.023395681753754616, "learning_rate": 9.257312314902089e-06, "loss": 0.0019, "step": 30550 }, { "epoch": 0.5164648521670061, "grad_norm": 0.1325421929359436, "learning_rate": 9.256538706341279e-06, "loss": 0.0034, "step": 30560 }, { "epoch": 0.5166338524458566, "grad_norm": 0.0765732079744339, "learning_rate": 9.255764727440977e-06, "loss": 0.0029, "step": 30570 }, { "epoch": 0.5168028527247069, "grad_norm": 0.056597087532281876, "learning_rate": 9.254990378268527e-06, "loss": 0.0015, "step": 30580 }, { "epoch": 0.5169718530035574, "grad_norm": 0.13077285885810852, "learning_rate": 9.254215658891296e-06, "loss": 0.0063, "step": 30590 }, { "epoch": 0.5171408532824079, "grad_norm": 0.12355636805295944, "learning_rate": 9.25344056937669e-06, "loss": 0.0042, "step": 30600 }, { "epoch": 0.5173098535612584, "grad_norm": 0.17257574200630188, "learning_rate": 9.252665109792148e-06, "loss": 0.0047, "step": 30610 }, { "epoch": 0.5174788538401088, "grad_norm": 0.06446636468172073, "learning_rate": 9.251889280205135e-06, "loss": 0.0023, "step": 30620 }, { "epoch": 0.5176478541189593, "grad_norm": 0.032882850617170334, "learning_rate": 9.251113080683154e-06, "loss": 0.0034, "step": 30630 }, { "epoch": 0.5178168543978098, "grad_norm": 0.2541007399559021, "learning_rate": 9.250336511293738e-06, "loss": 0.0025, "step": 30640 }, { "epoch": 0.5179858546766603, "grad_norm": 0.07943384349346161, "learning_rate": 9.249559572104453e-06, "loss": 0.0023, "step": 30650 }, { "epoch": 0.5181548549555107, "grad_norm": 0.1908251792192459, "learning_rate": 9.248782263182896e-06, "loss": 0.0034, "step": 30660 }, { "epoch": 0.5183238552343611, "grad_norm": 0.0682080015540123, "learning_rate": 9.248004584596695e-06, "loss": 0.0019, "step": 30670 }, { "epoch": 0.5184928555132116, "grad_norm": 0.05401146411895752, "learning_rate": 9.247226536413514e-06, "loss": 0.0018, "step": 30680 }, { "epoch": 0.518661855792062, "grad_norm": 0.09396155923604965, "learning_rate": 9.246448118701044e-06, "loss": 0.0027, "step": 30690 }, { "epoch": 0.5188308560709125, "grad_norm": 0.11234533041715622, "learning_rate": 9.245669331527016e-06, "loss": 0.0018, "step": 30700 }, { "epoch": 0.518999856349763, "grad_norm": 0.11173015832901001, "learning_rate": 9.244890174959183e-06, "loss": 0.0031, "step": 30710 }, { "epoch": 0.5191688566286135, "grad_norm": 0.04353360831737518, "learning_rate": 9.24411064906534e-06, "loss": 0.0013, "step": 30720 }, { "epoch": 0.5193378569074639, "grad_norm": 0.0787418782711029, "learning_rate": 9.243330753913305e-06, "loss": 0.0034, "step": 30730 }, { "epoch": 0.5195068571863144, "grad_norm": 0.17237931489944458, "learning_rate": 9.242550489570935e-06, "loss": 0.0019, "step": 30740 }, { "epoch": 0.5196758574651649, "grad_norm": 0.08603113889694214, "learning_rate": 9.241769856106119e-06, "loss": 0.0026, "step": 30750 }, { "epoch": 0.5198448577440152, "grad_norm": 0.1021459698677063, "learning_rate": 9.240988853586774e-06, "loss": 0.0025, "step": 30760 }, { "epoch": 0.5200138580228657, "grad_norm": 0.14208252727985382, "learning_rate": 9.24020748208085e-06, "loss": 0.0032, "step": 30770 }, { "epoch": 0.5201828583017162, "grad_norm": 0.04353468865156174, "learning_rate": 9.23942574165633e-06, "loss": 0.0022, "step": 30780 }, { "epoch": 0.5203518585805667, "grad_norm": 0.053181033581495285, "learning_rate": 9.23864363238123e-06, "loss": 0.0013, "step": 30790 }, { "epoch": 0.5205208588594171, "grad_norm": 0.14026151597499847, "learning_rate": 9.237861154323596e-06, "loss": 0.0029, "step": 30800 }, { "epoch": 0.5206898591382676, "grad_norm": 0.006315328646451235, "learning_rate": 9.23707830755151e-06, "loss": 0.0037, "step": 30810 }, { "epoch": 0.5208588594171181, "grad_norm": 0.04784843325614929, "learning_rate": 9.236295092133083e-06, "loss": 0.004, "step": 30820 }, { "epoch": 0.5210278596959685, "grad_norm": 0.06517542898654938, "learning_rate": 9.235511508136456e-06, "loss": 0.0024, "step": 30830 }, { "epoch": 0.5211968599748189, "grad_norm": 0.38778600096702576, "learning_rate": 9.234727555629807e-06, "loss": 0.0029, "step": 30840 }, { "epoch": 0.5213658602536694, "grad_norm": 0.23829753696918488, "learning_rate": 9.233943234681345e-06, "loss": 0.0028, "step": 30850 }, { "epoch": 0.5215348605325199, "grad_norm": 0.06499862670898438, "learning_rate": 9.233158545359304e-06, "loss": 0.0025, "step": 30860 }, { "epoch": 0.5217038608113703, "grad_norm": 0.170320063829422, "learning_rate": 9.232373487731962e-06, "loss": 0.0045, "step": 30870 }, { "epoch": 0.5218728610902208, "grad_norm": 0.07646413147449493, "learning_rate": 9.231588061867619e-06, "loss": 0.0022, "step": 30880 }, { "epoch": 0.5220418613690713, "grad_norm": 0.010970628820359707, "learning_rate": 9.230802267834613e-06, "loss": 0.0032, "step": 30890 }, { "epoch": 0.5222108616479217, "grad_norm": 0.061763424426317215, "learning_rate": 9.23001610570131e-06, "loss": 0.0035, "step": 30900 }, { "epoch": 0.5223798619267722, "grad_norm": 0.050179943442344666, "learning_rate": 9.229229575536112e-06, "loss": 0.0063, "step": 30910 }, { "epoch": 0.5225488622056227, "grad_norm": 0.09505718946456909, "learning_rate": 9.228442677407448e-06, "loss": 0.0026, "step": 30920 }, { "epoch": 0.522717862484473, "grad_norm": 0.05893668159842491, "learning_rate": 9.227655411383785e-06, "loss": 0.0014, "step": 30930 }, { "epoch": 0.5228868627633235, "grad_norm": 0.040056515485048294, "learning_rate": 9.226867777533619e-06, "loss": 0.0031, "step": 30940 }, { "epoch": 0.523055863042174, "grad_norm": 0.406801700592041, "learning_rate": 9.226079775925475e-06, "loss": 0.003, "step": 30950 }, { "epoch": 0.5232248633210245, "grad_norm": 0.15354953706264496, "learning_rate": 9.225291406627914e-06, "loss": 0.0036, "step": 30960 }, { "epoch": 0.5233938635998749, "grad_norm": 0.026319092139601707, "learning_rate": 9.22450266970953e-06, "loss": 0.0024, "step": 30970 }, { "epoch": 0.5235628638787254, "grad_norm": 0.08271504193544388, "learning_rate": 9.223713565238947e-06, "loss": 0.003, "step": 30980 }, { "epoch": 0.5237318641575759, "grad_norm": 0.054755691438913345, "learning_rate": 9.222924093284818e-06, "loss": 0.0032, "step": 30990 }, { "epoch": 0.5239008644364264, "grad_norm": 0.05949968844652176, "learning_rate": 9.222134253915831e-06, "loss": 0.0026, "step": 31000 }, { "epoch": 0.5240698647152768, "grad_norm": 0.03801442310214043, "learning_rate": 9.221344047200709e-06, "loss": 0.0031, "step": 31010 }, { "epoch": 0.5242388649941272, "grad_norm": 0.027336781844496727, "learning_rate": 9.220553473208201e-06, "loss": 0.0044, "step": 31020 }, { "epoch": 0.5244078652729777, "grad_norm": 0.03937138244509697, "learning_rate": 9.219762532007093e-06, "loss": 0.002, "step": 31030 }, { "epoch": 0.5245768655518281, "grad_norm": 0.015671947970986366, "learning_rate": 9.218971223666197e-06, "loss": 0.0018, "step": 31040 }, { "epoch": 0.5247458658306786, "grad_norm": 0.05549074336886406, "learning_rate": 9.218179548254364e-06, "loss": 0.0046, "step": 31050 }, { "epoch": 0.5249148661095291, "grad_norm": 0.14241427183151245, "learning_rate": 9.217387505840473e-06, "loss": 0.0024, "step": 31060 }, { "epoch": 0.5250838663883796, "grad_norm": 0.04867669567465782, "learning_rate": 9.216595096493436e-06, "loss": 0.0028, "step": 31070 }, { "epoch": 0.52525286666723, "grad_norm": 0.09113096445798874, "learning_rate": 9.215802320282197e-06, "loss": 0.0031, "step": 31080 }, { "epoch": 0.5254218669460805, "grad_norm": 0.027196591719985008, "learning_rate": 9.215009177275729e-06, "loss": 0.0029, "step": 31090 }, { "epoch": 0.5255908672249309, "grad_norm": 0.06898143142461777, "learning_rate": 9.214215667543039e-06, "loss": 0.0032, "step": 31100 }, { "epoch": 0.5257598675037813, "grad_norm": 0.07773678749799728, "learning_rate": 9.21342179115317e-06, "loss": 0.0026, "step": 31110 }, { "epoch": 0.5259288677826318, "grad_norm": 0.12087377160787582, "learning_rate": 9.212627548175188e-06, "loss": 0.0028, "step": 31120 }, { "epoch": 0.5260978680614823, "grad_norm": 0.047073110938072205, "learning_rate": 9.2118329386782e-06, "loss": 0.0024, "step": 31130 }, { "epoch": 0.5262668683403328, "grad_norm": 0.05412263050675392, "learning_rate": 9.211037962731339e-06, "loss": 0.0022, "step": 31140 }, { "epoch": 0.5264358686191832, "grad_norm": 0.1352180540561676, "learning_rate": 9.210242620403773e-06, "loss": 0.0029, "step": 31150 }, { "epoch": 0.5266048688980337, "grad_norm": 0.11758013814687729, "learning_rate": 9.2094469117647e-06, "loss": 0.0023, "step": 31160 }, { "epoch": 0.5267738691768842, "grad_norm": 0.02222493104636669, "learning_rate": 9.20865083688335e-06, "loss": 0.0017, "step": 31170 }, { "epoch": 0.5269428694557347, "grad_norm": 0.10861939936876297, "learning_rate": 9.207854395828985e-06, "loss": 0.0024, "step": 31180 }, { "epoch": 0.527111869734585, "grad_norm": 0.11261676996946335, "learning_rate": 9.207057588670903e-06, "loss": 0.0026, "step": 31190 }, { "epoch": 0.5272808700134355, "grad_norm": 0.04202567785978317, "learning_rate": 9.206260415478425e-06, "loss": 0.0016, "step": 31200 }, { "epoch": 0.527449870292286, "grad_norm": 0.03397662192583084, "learning_rate": 9.20546287632091e-06, "loss": 0.0021, "step": 31210 }, { "epoch": 0.5276188705711364, "grad_norm": 0.004616164602339268, "learning_rate": 9.20466497126775e-06, "loss": 0.0017, "step": 31220 }, { "epoch": 0.5277878708499869, "grad_norm": 0.09319368749856949, "learning_rate": 9.203866700388366e-06, "loss": 0.0016, "step": 31230 }, { "epoch": 0.5279568711288374, "grad_norm": 0.16816814243793488, "learning_rate": 9.20306806375221e-06, "loss": 0.0027, "step": 31240 }, { "epoch": 0.5281258714076879, "grad_norm": 0.04165370762348175, "learning_rate": 9.202269061428768e-06, "loss": 0.0022, "step": 31250 }, { "epoch": 0.5282948716865383, "grad_norm": 0.0498676560819149, "learning_rate": 9.201469693487558e-06, "loss": 0.0022, "step": 31260 }, { "epoch": 0.5284638719653888, "grad_norm": 0.2199142873287201, "learning_rate": 9.200669959998128e-06, "loss": 0.0054, "step": 31270 }, { "epoch": 0.5286328722442392, "grad_norm": 0.08191534131765366, "learning_rate": 9.199869861030057e-06, "loss": 0.0017, "step": 31280 }, { "epoch": 0.5288018725230896, "grad_norm": 0.03749081864953041, "learning_rate": 9.199069396652963e-06, "loss": 0.0015, "step": 31290 }, { "epoch": 0.5289708728019401, "grad_norm": 0.4414420425891876, "learning_rate": 9.198268566936484e-06, "loss": 0.0022, "step": 31300 }, { "epoch": 0.5291398730807906, "grad_norm": 0.035784609615802765, "learning_rate": 9.197467371950296e-06, "loss": 0.0019, "step": 31310 }, { "epoch": 0.529308873359641, "grad_norm": 0.17147906124591827, "learning_rate": 9.196665811764114e-06, "loss": 0.0032, "step": 31320 }, { "epoch": 0.5294778736384915, "grad_norm": 0.17997874319553375, "learning_rate": 9.19586388644767e-06, "loss": 0.0016, "step": 31330 }, { "epoch": 0.529646873917342, "grad_norm": 0.13828697800636292, "learning_rate": 9.19506159607074e-06, "loss": 0.0024, "step": 31340 }, { "epoch": 0.5298158741961925, "grad_norm": 0.14012043178081512, "learning_rate": 9.194258940703125e-06, "loss": 0.0025, "step": 31350 }, { "epoch": 0.5299848744750428, "grad_norm": 0.18596386909484863, "learning_rate": 9.193455920414662e-06, "loss": 0.0017, "step": 31360 }, { "epoch": 0.5301538747538933, "grad_norm": 0.06577977538108826, "learning_rate": 9.192652535275215e-06, "loss": 0.0023, "step": 31370 }, { "epoch": 0.5303228750327438, "grad_norm": 0.12237042933702469, "learning_rate": 9.191848785354683e-06, "loss": 0.0028, "step": 31380 }, { "epoch": 0.5304918753115943, "grad_norm": 0.05757032707333565, "learning_rate": 9.191044670722998e-06, "loss": 0.0035, "step": 31390 }, { "epoch": 0.5306608755904447, "grad_norm": 0.0789322629570961, "learning_rate": 9.190240191450119e-06, "loss": 0.0046, "step": 31400 }, { "epoch": 0.5308298758692952, "grad_norm": 0.12790855765342712, "learning_rate": 9.189435347606043e-06, "loss": 0.0033, "step": 31410 }, { "epoch": 0.5309988761481457, "grad_norm": 0.05954355373978615, "learning_rate": 9.188630139260793e-06, "loss": 0.0019, "step": 31420 }, { "epoch": 0.5311678764269961, "grad_norm": 0.0388394370675087, "learning_rate": 9.187824566484425e-06, "loss": 0.0026, "step": 31430 }, { "epoch": 0.5313368767058466, "grad_norm": 0.16026939451694489, "learning_rate": 9.18701862934703e-06, "loss": 0.0037, "step": 31440 }, { "epoch": 0.531505876984697, "grad_norm": 0.0926368236541748, "learning_rate": 9.186212327918729e-06, "loss": 0.0043, "step": 31450 }, { "epoch": 0.5316748772635475, "grad_norm": 0.17913147807121277, "learning_rate": 9.185405662269671e-06, "loss": 0.0018, "step": 31460 }, { "epoch": 0.5318438775423979, "grad_norm": 0.03901561349630356, "learning_rate": 9.184598632470042e-06, "loss": 0.0026, "step": 31470 }, { "epoch": 0.5320128778212484, "grad_norm": 0.08872038125991821, "learning_rate": 9.183791238590057e-06, "loss": 0.002, "step": 31480 }, { "epoch": 0.5321818781000989, "grad_norm": 0.20491473376750946, "learning_rate": 9.182983480699965e-06, "loss": 0.0029, "step": 31490 }, { "epoch": 0.5323508783789493, "grad_norm": 0.04137842357158661, "learning_rate": 9.182175358870041e-06, "loss": 0.0014, "step": 31500 }, { "epoch": 0.5325198786577998, "grad_norm": 0.15952403843402863, "learning_rate": 9.181366873170599e-06, "loss": 0.0027, "step": 31510 }, { "epoch": 0.5326888789366503, "grad_norm": 0.10556403547525406, "learning_rate": 9.180558023671979e-06, "loss": 0.0028, "step": 31520 }, { "epoch": 0.5328578792155007, "grad_norm": 0.0331096388399601, "learning_rate": 9.179748810444557e-06, "loss": 0.0019, "step": 31530 }, { "epoch": 0.5330268794943511, "grad_norm": 0.07562128454446793, "learning_rate": 9.178939233558735e-06, "loss": 0.0017, "step": 31540 }, { "epoch": 0.5331958797732016, "grad_norm": 0.14268942177295685, "learning_rate": 9.178129293084954e-06, "loss": 0.0022, "step": 31550 }, { "epoch": 0.5333648800520521, "grad_norm": 0.12696851789951324, "learning_rate": 9.177318989093681e-06, "loss": 0.0026, "step": 31560 }, { "epoch": 0.5335338803309025, "grad_norm": 0.015331567265093327, "learning_rate": 9.176508321655415e-06, "loss": 0.0016, "step": 31570 }, { "epoch": 0.533702880609753, "grad_norm": 0.060741398483514786, "learning_rate": 9.17569729084069e-06, "loss": 0.0017, "step": 31580 }, { "epoch": 0.5338718808886035, "grad_norm": 0.007240627892315388, "learning_rate": 9.17488589672007e-06, "loss": 0.002, "step": 31590 }, { "epoch": 0.534040881167454, "grad_norm": 0.19343307614326477, "learning_rate": 9.174074139364148e-06, "loss": 0.0033, "step": 31600 }, { "epoch": 0.5342098814463044, "grad_norm": 0.009958441369235516, "learning_rate": 9.173262018843552e-06, "loss": 0.0031, "step": 31610 }, { "epoch": 0.5343788817251548, "grad_norm": 0.04688011482357979, "learning_rate": 9.172449535228941e-06, "loss": 0.0017, "step": 31620 }, { "epoch": 0.5345478820040053, "grad_norm": 0.11201368272304535, "learning_rate": 9.171636688591005e-06, "loss": 0.0017, "step": 31630 }, { "epoch": 0.5347168822828557, "grad_norm": 0.13468189537525177, "learning_rate": 9.170823479000464e-06, "loss": 0.0035, "step": 31640 }, { "epoch": 0.5348858825617062, "grad_norm": 0.05479159206151962, "learning_rate": 9.170009906528074e-06, "loss": 0.0021, "step": 31650 }, { "epoch": 0.5350548828405567, "grad_norm": 0.10649165511131287, "learning_rate": 9.169195971244615e-06, "loss": 0.0031, "step": 31660 }, { "epoch": 0.5352238831194072, "grad_norm": 0.08019914478063583, "learning_rate": 9.168381673220909e-06, "loss": 0.0021, "step": 31670 }, { "epoch": 0.5353928833982576, "grad_norm": 0.03295615315437317, "learning_rate": 9.1675670125278e-06, "loss": 0.0025, "step": 31680 }, { "epoch": 0.5355618836771081, "grad_norm": 0.10095320641994476, "learning_rate": 9.166751989236168e-06, "loss": 0.0028, "step": 31690 }, { "epoch": 0.5357308839559586, "grad_norm": 0.16818812489509583, "learning_rate": 9.165936603416925e-06, "loss": 0.0013, "step": 31700 }, { "epoch": 0.5358998842348089, "grad_norm": 0.16022954881191254, "learning_rate": 9.165120855141014e-06, "loss": 0.0049, "step": 31710 }, { "epoch": 0.5360688845136594, "grad_norm": 0.09274117648601532, "learning_rate": 9.164304744479406e-06, "loss": 0.0027, "step": 31720 }, { "epoch": 0.5362378847925099, "grad_norm": 0.04743310436606407, "learning_rate": 9.163488271503111e-06, "loss": 0.002, "step": 31730 }, { "epoch": 0.5364068850713604, "grad_norm": 0.12824346125125885, "learning_rate": 9.162671436283164e-06, "loss": 0.0022, "step": 31740 }, { "epoch": 0.5365758853502108, "grad_norm": 0.11546172946691513, "learning_rate": 9.16185423889063e-06, "loss": 0.0028, "step": 31750 }, { "epoch": 0.5367448856290613, "grad_norm": 0.28843066096305847, "learning_rate": 9.161036679396615e-06, "loss": 0.0032, "step": 31760 }, { "epoch": 0.5369138859079118, "grad_norm": 0.09800495207309723, "learning_rate": 9.160218757872248e-06, "loss": 0.0018, "step": 31770 }, { "epoch": 0.5370828861867623, "grad_norm": 0.05644332244992256, "learning_rate": 9.159400474388694e-06, "loss": 0.0022, "step": 31780 }, { "epoch": 0.5372518864656126, "grad_norm": 0.20425622165203094, "learning_rate": 9.158581829017143e-06, "loss": 0.0031, "step": 31790 }, { "epoch": 0.5374208867444631, "grad_norm": 0.026468642055988312, "learning_rate": 9.157762821828826e-06, "loss": 0.0026, "step": 31800 }, { "epoch": 0.5375898870233136, "grad_norm": 0.08805709332227707, "learning_rate": 9.156943452894998e-06, "loss": 0.0045, "step": 31810 }, { "epoch": 0.537758887302164, "grad_norm": 0.09152763336896896, "learning_rate": 9.156123722286949e-06, "loss": 0.0043, "step": 31820 }, { "epoch": 0.5379278875810145, "grad_norm": 0.10086562484502792, "learning_rate": 9.155303630076e-06, "loss": 0.0031, "step": 31830 }, { "epoch": 0.538096887859865, "grad_norm": 0.1953403353691101, "learning_rate": 9.154483176333502e-06, "loss": 0.0027, "step": 31840 }, { "epoch": 0.5382658881387155, "grad_norm": 0.05798371136188507, "learning_rate": 9.153662361130838e-06, "loss": 0.0029, "step": 31850 }, { "epoch": 0.5384348884175659, "grad_norm": 0.23227271437644958, "learning_rate": 9.152841184539423e-06, "loss": 0.0016, "step": 31860 }, { "epoch": 0.5386038886964164, "grad_norm": 0.03487158566713333, "learning_rate": 9.152019646630705e-06, "loss": 0.0031, "step": 31870 }, { "epoch": 0.5387728889752668, "grad_norm": 0.12952357530593872, "learning_rate": 9.15119774747616e-06, "loss": 0.0015, "step": 31880 }, { "epoch": 0.5389418892541172, "grad_norm": 0.049278002232313156, "learning_rate": 9.1503754871473e-06, "loss": 0.0025, "step": 31890 }, { "epoch": 0.5391108895329677, "grad_norm": 0.13247987627983093, "learning_rate": 9.14955286571566e-06, "loss": 0.003, "step": 31900 }, { "epoch": 0.5392798898118182, "grad_norm": 0.02803332731127739, "learning_rate": 9.148729883252818e-06, "loss": 0.0024, "step": 31910 }, { "epoch": 0.5394488900906687, "grad_norm": 0.17933359742164612, "learning_rate": 9.147906539830376e-06, "loss": 0.0027, "step": 31920 }, { "epoch": 0.5396178903695191, "grad_norm": 0.05666980519890785, "learning_rate": 9.147082835519967e-06, "loss": 0.0024, "step": 31930 }, { "epoch": 0.5397868906483696, "grad_norm": 0.02192831039428711, "learning_rate": 9.146258770393256e-06, "loss": 0.003, "step": 31940 }, { "epoch": 0.5399558909272201, "grad_norm": 0.058615997433662415, "learning_rate": 9.145434344521946e-06, "loss": 0.002, "step": 31950 }, { "epoch": 0.5401248912060705, "grad_norm": 0.09989965707063675, "learning_rate": 9.144609557977762e-06, "loss": 0.0026, "step": 31960 }, { "epoch": 0.5402938914849209, "grad_norm": 0.11751866340637207, "learning_rate": 9.143784410832465e-06, "loss": 0.0031, "step": 31970 }, { "epoch": 0.5404628917637714, "grad_norm": 0.07591240853071213, "learning_rate": 9.142958903157849e-06, "loss": 0.002, "step": 31980 }, { "epoch": 0.5406318920426219, "grad_norm": 0.145902618765831, "learning_rate": 9.142133035025733e-06, "loss": 0.0017, "step": 31990 }, { "epoch": 0.5408008923214723, "grad_norm": 0.16412098705768585, "learning_rate": 9.141306806507974e-06, "loss": 0.0031, "step": 32000 }, { "epoch": 0.5409698926003228, "grad_norm": 0.14947715401649475, "learning_rate": 9.140480217676458e-06, "loss": 0.003, "step": 32010 }, { "epoch": 0.5411388928791733, "grad_norm": 0.0731707289814949, "learning_rate": 9.139653268603102e-06, "loss": 0.0029, "step": 32020 }, { "epoch": 0.5413078931580237, "grad_norm": 0.15577909350395203, "learning_rate": 9.138825959359855e-06, "loss": 0.0021, "step": 32030 }, { "epoch": 0.5414768934368742, "grad_norm": 0.035175248980522156, "learning_rate": 9.137998290018697e-06, "loss": 0.0022, "step": 32040 }, { "epoch": 0.5416458937157246, "grad_norm": 0.08923418819904327, "learning_rate": 9.137170260651637e-06, "loss": 0.0024, "step": 32050 }, { "epoch": 0.541814893994575, "grad_norm": 0.0423198901116848, "learning_rate": 9.13634187133072e-06, "loss": 0.0012, "step": 32060 }, { "epoch": 0.5419838942734255, "grad_norm": 0.08710048347711563, "learning_rate": 9.13551312212802e-06, "loss": 0.0024, "step": 32070 }, { "epoch": 0.542152894552276, "grad_norm": 0.07039597630500793, "learning_rate": 9.13468401311564e-06, "loss": 0.0023, "step": 32080 }, { "epoch": 0.5423218948311265, "grad_norm": 0.03258507326245308, "learning_rate": 9.133854544365719e-06, "loss": 0.0018, "step": 32090 }, { "epoch": 0.542490895109977, "grad_norm": 0.04773247241973877, "learning_rate": 9.133024715950422e-06, "loss": 0.002, "step": 32100 }, { "epoch": 0.5426598953888274, "grad_norm": 0.013408832252025604, "learning_rate": 9.132194527941952e-06, "loss": 0.0012, "step": 32110 }, { "epoch": 0.5428288956676779, "grad_norm": 0.16576145589351654, "learning_rate": 9.131363980412536e-06, "loss": 0.002, "step": 32120 }, { "epoch": 0.5429978959465284, "grad_norm": 0.023107144981622696, "learning_rate": 9.130533073434438e-06, "loss": 0.0012, "step": 32130 }, { "epoch": 0.5431668962253787, "grad_norm": 0.09261453151702881, "learning_rate": 9.129701807079948e-06, "loss": 0.0018, "step": 32140 }, { "epoch": 0.5433358965042292, "grad_norm": 0.0984678715467453, "learning_rate": 9.128870181421392e-06, "loss": 0.0019, "step": 32150 }, { "epoch": 0.5435048967830797, "grad_norm": 0.1755990982055664, "learning_rate": 9.128038196531126e-06, "loss": 0.0031, "step": 32160 }, { "epoch": 0.5436738970619301, "grad_norm": 0.07609538733959198, "learning_rate": 9.127205852481536e-06, "loss": 0.0029, "step": 32170 }, { "epoch": 0.5438428973407806, "grad_norm": 0.07591988146305084, "learning_rate": 9.12637314934504e-06, "loss": 0.0029, "step": 32180 }, { "epoch": 0.5440118976196311, "grad_norm": 0.08974188566207886, "learning_rate": 9.125540087194089e-06, "loss": 0.004, "step": 32190 }, { "epoch": 0.5441808978984816, "grad_norm": 0.08013894408941269, "learning_rate": 9.124706666101159e-06, "loss": 0.0034, "step": 32200 }, { "epoch": 0.544349898177332, "grad_norm": 0.2126358449459076, "learning_rate": 9.123872886138765e-06, "loss": 0.0029, "step": 32210 }, { "epoch": 0.5445188984561824, "grad_norm": 0.19131335616111755, "learning_rate": 9.12303874737945e-06, "loss": 0.0036, "step": 32220 }, { "epoch": 0.5446878987350329, "grad_norm": 0.08860641717910767, "learning_rate": 9.122204249895787e-06, "loss": 0.0017, "step": 32230 }, { "epoch": 0.5448568990138833, "grad_norm": 0.045733124017715454, "learning_rate": 9.121369393760381e-06, "loss": 0.0016, "step": 32240 }, { "epoch": 0.5450258992927338, "grad_norm": 0.031239159405231476, "learning_rate": 9.120534179045872e-06, "loss": 0.0024, "step": 32250 }, { "epoch": 0.5451948995715843, "grad_norm": 0.07091531157493591, "learning_rate": 9.119698605824923e-06, "loss": 0.001, "step": 32260 }, { "epoch": 0.5453638998504348, "grad_norm": 0.06964509934186935, "learning_rate": 9.118862674170234e-06, "loss": 0.0044, "step": 32270 }, { "epoch": 0.5455329001292852, "grad_norm": 0.13213202357292175, "learning_rate": 9.118026384154537e-06, "loss": 0.0025, "step": 32280 }, { "epoch": 0.5457019004081357, "grad_norm": 0.13697420060634613, "learning_rate": 9.117189735850592e-06, "loss": 0.0026, "step": 32290 }, { "epoch": 0.5458709006869862, "grad_norm": 0.05169123411178589, "learning_rate": 9.116352729331192e-06, "loss": 0.0026, "step": 32300 }, { "epoch": 0.5460399009658365, "grad_norm": 0.0705161914229393, "learning_rate": 9.115515364669162e-06, "loss": 0.0014, "step": 32310 }, { "epoch": 0.546208901244687, "grad_norm": 0.19334140419960022, "learning_rate": 9.114677641937353e-06, "loss": 0.0024, "step": 32320 }, { "epoch": 0.5463779015235375, "grad_norm": 0.07870922982692719, "learning_rate": 9.113839561208653e-06, "loss": 0.0018, "step": 32330 }, { "epoch": 0.546546901802388, "grad_norm": 0.1923825740814209, "learning_rate": 9.113001122555981e-06, "loss": 0.0027, "step": 32340 }, { "epoch": 0.5467159020812384, "grad_norm": 0.06787336617708206, "learning_rate": 9.11216232605228e-06, "loss": 0.0016, "step": 32350 }, { "epoch": 0.5468849023600889, "grad_norm": 0.03022446669638157, "learning_rate": 9.111323171770535e-06, "loss": 0.0019, "step": 32360 }, { "epoch": 0.5470539026389394, "grad_norm": 0.1334880292415619, "learning_rate": 9.110483659783755e-06, "loss": 0.0025, "step": 32370 }, { "epoch": 0.5472229029177899, "grad_norm": 0.04222896322607994, "learning_rate": 9.109643790164979e-06, "loss": 0.0022, "step": 32380 }, { "epoch": 0.5473919031966403, "grad_norm": 0.11605576425790787, "learning_rate": 9.108803562987282e-06, "loss": 0.006, "step": 32390 }, { "epoch": 0.5475609034754907, "grad_norm": 0.09948180615901947, "learning_rate": 9.107962978323768e-06, "loss": 0.002, "step": 32400 }, { "epoch": 0.5477299037543412, "grad_norm": 0.06817879527807236, "learning_rate": 9.107122036247572e-06, "loss": 0.0028, "step": 32410 }, { "epoch": 0.5478989040331916, "grad_norm": 0.11131305992603302, "learning_rate": 9.106280736831858e-06, "loss": 0.0015, "step": 32420 }, { "epoch": 0.5480679043120421, "grad_norm": 0.05916478484869003, "learning_rate": 9.105439080149826e-06, "loss": 0.003, "step": 32430 }, { "epoch": 0.5482369045908926, "grad_norm": 0.10021656006574631, "learning_rate": 9.104597066274701e-06, "loss": 0.002, "step": 32440 }, { "epoch": 0.548405904869743, "grad_norm": 0.1359965056180954, "learning_rate": 9.103754695279746e-06, "loss": 0.0024, "step": 32450 }, { "epoch": 0.5485749051485935, "grad_norm": 0.0387590229511261, "learning_rate": 9.102911967238248e-06, "loss": 0.0029, "step": 32460 }, { "epoch": 0.548743905427444, "grad_norm": 0.10812094807624817, "learning_rate": 9.10206888222353e-06, "loss": 0.0037, "step": 32470 }, { "epoch": 0.5489129057062944, "grad_norm": 0.05625665932893753, "learning_rate": 9.101225440308944e-06, "loss": 0.0034, "step": 32480 }, { "epoch": 0.5490819059851448, "grad_norm": 0.06848200410604477, "learning_rate": 9.100381641567875e-06, "loss": 0.0019, "step": 32490 }, { "epoch": 0.5492509062639953, "grad_norm": 0.12069488316774368, "learning_rate": 9.099537486073736e-06, "loss": 0.0091, "step": 32500 }, { "epoch": 0.5494199065428458, "grad_norm": 0.044651955366134644, "learning_rate": 9.098692973899971e-06, "loss": 0.002, "step": 32510 }, { "epoch": 0.5495889068216963, "grad_norm": 0.1285020112991333, "learning_rate": 9.097848105120061e-06, "loss": 0.0029, "step": 32520 }, { "epoch": 0.5497579071005467, "grad_norm": 0.09652730077505112, "learning_rate": 9.09700287980751e-06, "loss": 0.0018, "step": 32530 }, { "epoch": 0.5499269073793972, "grad_norm": 0.06952265650033951, "learning_rate": 9.09615729803586e-06, "loss": 0.0015, "step": 32540 }, { "epoch": 0.5500959076582477, "grad_norm": 0.214644655585289, "learning_rate": 9.095311359878676e-06, "loss": 0.0023, "step": 32550 }, { "epoch": 0.5502649079370981, "grad_norm": 0.06943915039300919, "learning_rate": 9.094465065409563e-06, "loss": 0.0049, "step": 32560 }, { "epoch": 0.5504339082159485, "grad_norm": 0.06609531491994858, "learning_rate": 9.09361841470215e-06, "loss": 0.0019, "step": 32570 }, { "epoch": 0.550602908494799, "grad_norm": 0.13681089878082275, "learning_rate": 9.0927714078301e-06, "loss": 0.0021, "step": 32580 }, { "epoch": 0.5507719087736495, "grad_norm": 0.06775745004415512, "learning_rate": 9.09192404486711e-06, "loss": 0.003, "step": 32590 }, { "epoch": 0.5509409090524999, "grad_norm": 0.051594868302345276, "learning_rate": 9.091076325886903e-06, "loss": 0.0035, "step": 32600 }, { "epoch": 0.5511099093313504, "grad_norm": 0.018013061955571175, "learning_rate": 9.090228250963233e-06, "loss": 0.0026, "step": 32610 }, { "epoch": 0.5512789096102009, "grad_norm": 0.06551416218280792, "learning_rate": 9.089379820169886e-06, "loss": 0.0025, "step": 32620 }, { "epoch": 0.5514479098890513, "grad_norm": 0.25022977590560913, "learning_rate": 9.088531033580681e-06, "loss": 0.0041, "step": 32630 }, { "epoch": 0.5516169101679018, "grad_norm": 0.16517165303230286, "learning_rate": 9.087681891269469e-06, "loss": 0.0035, "step": 32640 }, { "epoch": 0.5517859104467523, "grad_norm": 0.24520257115364075, "learning_rate": 9.086832393310125e-06, "loss": 0.0021, "step": 32650 }, { "epoch": 0.5519549107256027, "grad_norm": 0.47518280148506165, "learning_rate": 9.085982539776563e-06, "loss": 0.001, "step": 32660 }, { "epoch": 0.5521239110044531, "grad_norm": 0.013922463171184063, "learning_rate": 9.085132330742723e-06, "loss": 0.003, "step": 32670 }, { "epoch": 0.5522929112833036, "grad_norm": 0.07399224489927292, "learning_rate": 9.084281766282578e-06, "loss": 0.0014, "step": 32680 }, { "epoch": 0.5524619115621541, "grad_norm": 0.08443097025156021, "learning_rate": 9.08343084647013e-06, "loss": 0.0023, "step": 32690 }, { "epoch": 0.5526309118410045, "grad_norm": 0.0952787771821022, "learning_rate": 9.082579571379413e-06, "loss": 0.0018, "step": 32700 }, { "epoch": 0.552799912119855, "grad_norm": 0.1248018890619278, "learning_rate": 9.081727941084493e-06, "loss": 0.0019, "step": 32710 }, { "epoch": 0.5529689123987055, "grad_norm": 0.12016724795103073, "learning_rate": 9.080875955659466e-06, "loss": 0.0022, "step": 32720 }, { "epoch": 0.553137912677556, "grad_norm": 0.04242364317178726, "learning_rate": 9.080023615178456e-06, "loss": 0.002, "step": 32730 }, { "epoch": 0.5533069129564063, "grad_norm": 0.05166156217455864, "learning_rate": 9.079170919715627e-06, "loss": 0.0038, "step": 32740 }, { "epoch": 0.5534759132352568, "grad_norm": 0.1383553296327591, "learning_rate": 9.078317869345161e-06, "loss": 0.0024, "step": 32750 }, { "epoch": 0.5536449135141073, "grad_norm": 0.022725990042090416, "learning_rate": 9.077464464141284e-06, "loss": 0.0024, "step": 32760 }, { "epoch": 0.5538139137929577, "grad_norm": 0.06411861628293991, "learning_rate": 9.07661070417824e-06, "loss": 0.0038, "step": 32770 }, { "epoch": 0.5539829140718082, "grad_norm": 0.00547699024900794, "learning_rate": 9.075756589530313e-06, "loss": 0.0019, "step": 32780 }, { "epoch": 0.5541519143506587, "grad_norm": 0.03633767366409302, "learning_rate": 9.074902120271816e-06, "loss": 0.0028, "step": 32790 }, { "epoch": 0.5543209146295092, "grad_norm": 0.1565227508544922, "learning_rate": 9.074047296477092e-06, "loss": 0.0027, "step": 32800 }, { "epoch": 0.5544899149083596, "grad_norm": 0.12397000938653946, "learning_rate": 9.073192118220513e-06, "loss": 0.0021, "step": 32810 }, { "epoch": 0.5546589151872101, "grad_norm": 0.0234407689422369, "learning_rate": 9.072336585576485e-06, "loss": 0.0032, "step": 32820 }, { "epoch": 0.5548279154660605, "grad_norm": 0.06488706916570663, "learning_rate": 9.071480698619442e-06, "loss": 0.0014, "step": 32830 }, { "epoch": 0.554996915744911, "grad_norm": 0.09103720635175705, "learning_rate": 9.070624457423853e-06, "loss": 0.0027, "step": 32840 }, { "epoch": 0.5551659160237614, "grad_norm": 0.09234441816806793, "learning_rate": 9.069767862064213e-06, "loss": 0.0029, "step": 32850 }, { "epoch": 0.5553349163026119, "grad_norm": 0.85475093126297, "learning_rate": 9.06891091261505e-06, "loss": 0.0029, "step": 32860 }, { "epoch": 0.5555039165814624, "grad_norm": 0.05765415355563164, "learning_rate": 9.068053609150924e-06, "loss": 0.0013, "step": 32870 }, { "epoch": 0.5556729168603128, "grad_norm": 0.05210142582654953, "learning_rate": 9.067195951746423e-06, "loss": 0.0034, "step": 32880 }, { "epoch": 0.5558419171391633, "grad_norm": 0.029150711372494698, "learning_rate": 9.066337940476171e-06, "loss": 0.0027, "step": 32890 }, { "epoch": 0.5560109174180138, "grad_norm": 0.33490970730781555, "learning_rate": 9.065479575414813e-06, "loss": 0.0023, "step": 32900 }, { "epoch": 0.5561799176968641, "grad_norm": 0.019613586366176605, "learning_rate": 9.064620856637035e-06, "loss": 0.0009, "step": 32910 }, { "epoch": 0.5563489179757146, "grad_norm": 0.06637836247682571, "learning_rate": 9.063761784217551e-06, "loss": 0.0027, "step": 32920 }, { "epoch": 0.5565179182545651, "grad_norm": 0.07676636427640915, "learning_rate": 9.0629023582311e-06, "loss": 0.0013, "step": 32930 }, { "epoch": 0.5566869185334156, "grad_norm": 0.04984048381447792, "learning_rate": 9.06204257875246e-06, "loss": 0.0021, "step": 32940 }, { "epoch": 0.556855918812266, "grad_norm": 0.11220373213291168, "learning_rate": 9.061182445856434e-06, "loss": 0.0024, "step": 32950 }, { "epoch": 0.5570249190911165, "grad_norm": 0.11472593247890472, "learning_rate": 9.060321959617857e-06, "loss": 0.0021, "step": 32960 }, { "epoch": 0.557193919369967, "grad_norm": 0.07053294032812119, "learning_rate": 9.059461120111598e-06, "loss": 0.0025, "step": 32970 }, { "epoch": 0.5573629196488175, "grad_norm": 0.03769908845424652, "learning_rate": 9.058599927412553e-06, "loss": 0.0015, "step": 32980 }, { "epoch": 0.5575319199276679, "grad_norm": 0.0317186638712883, "learning_rate": 9.057738381595651e-06, "loss": 0.0023, "step": 32990 }, { "epoch": 0.5577009202065183, "grad_norm": 0.04581945016980171, "learning_rate": 9.056876482735848e-06, "loss": 0.0021, "step": 33000 }, { "epoch": 0.5578699204853688, "grad_norm": 0.04762342572212219, "learning_rate": 9.056014230908135e-06, "loss": 0.0022, "step": 33010 }, { "epoch": 0.5580389207642192, "grad_norm": 0.026327649131417274, "learning_rate": 9.055151626187533e-06, "loss": 0.0034, "step": 33020 }, { "epoch": 0.5582079210430697, "grad_norm": 0.05236377194523811, "learning_rate": 9.054288668649092e-06, "loss": 0.0021, "step": 33030 }, { "epoch": 0.5583769213219202, "grad_norm": 0.014177908189594746, "learning_rate": 9.053425358367894e-06, "loss": 0.0026, "step": 33040 }, { "epoch": 0.5585459216007707, "grad_norm": 0.014622579328715801, "learning_rate": 9.05256169541905e-06, "loss": 0.0014, "step": 33050 }, { "epoch": 0.5587149218796211, "grad_norm": 0.06770174205303192, "learning_rate": 9.0516976798777e-06, "loss": 0.0023, "step": 33060 }, { "epoch": 0.5588839221584716, "grad_norm": 0.0714285597205162, "learning_rate": 9.050833311819025e-06, "loss": 0.001, "step": 33070 }, { "epoch": 0.5590529224373221, "grad_norm": 0.12493888288736343, "learning_rate": 9.049968591318224e-06, "loss": 0.0032, "step": 33080 }, { "epoch": 0.5592219227161724, "grad_norm": 0.24726036190986633, "learning_rate": 9.049103518450533e-06, "loss": 0.0018, "step": 33090 }, { "epoch": 0.5593909229950229, "grad_norm": 0.06651870161294937, "learning_rate": 9.048238093291218e-06, "loss": 0.0021, "step": 33100 }, { "epoch": 0.5595599232738734, "grad_norm": 0.03772980347275734, "learning_rate": 9.047372315915572e-06, "loss": 0.0025, "step": 33110 }, { "epoch": 0.5597289235527239, "grad_norm": 0.028655195608735085, "learning_rate": 9.046506186398926e-06, "loss": 0.0013, "step": 33120 }, { "epoch": 0.5598979238315743, "grad_norm": 0.1510440707206726, "learning_rate": 9.045639704816637e-06, "loss": 0.0032, "step": 33130 }, { "epoch": 0.5600669241104248, "grad_norm": 0.03013589419424534, "learning_rate": 9.044772871244093e-06, "loss": 0.0019, "step": 33140 }, { "epoch": 0.5602359243892753, "grad_norm": 0.01859009638428688, "learning_rate": 9.04390568575671e-06, "loss": 0.002, "step": 33150 }, { "epoch": 0.5604049246681257, "grad_norm": 0.1631273627281189, "learning_rate": 9.04303814842994e-06, "loss": 0.0036, "step": 33160 }, { "epoch": 0.5605739249469761, "grad_norm": 0.134274423122406, "learning_rate": 9.042170259339263e-06, "loss": 0.0019, "step": 33170 }, { "epoch": 0.5607429252258266, "grad_norm": 0.038943417370319366, "learning_rate": 9.041302018560187e-06, "loss": 0.0019, "step": 33180 }, { "epoch": 0.5609119255046771, "grad_norm": 0.10986550152301788, "learning_rate": 9.040433426168255e-06, "loss": 0.0015, "step": 33190 }, { "epoch": 0.5610809257835275, "grad_norm": 0.13235639035701752, "learning_rate": 9.039564482239039e-06, "loss": 0.0011, "step": 33200 }, { "epoch": 0.561249926062378, "grad_norm": 0.06512071937322617, "learning_rate": 9.038695186848141e-06, "loss": 0.0021, "step": 33210 }, { "epoch": 0.5614189263412285, "grad_norm": 0.14054960012435913, "learning_rate": 9.037825540071194e-06, "loss": 0.0025, "step": 33220 }, { "epoch": 0.561587926620079, "grad_norm": 0.07978338748216629, "learning_rate": 9.03695554198386e-06, "loss": 0.0044, "step": 33230 }, { "epoch": 0.5617569268989294, "grad_norm": 0.058655884116888046, "learning_rate": 9.036085192661837e-06, "loss": 0.004, "step": 33240 }, { "epoch": 0.5619259271777799, "grad_norm": 0.06768335402011871, "learning_rate": 9.035214492180843e-06, "loss": 0.0015, "step": 33250 }, { "epoch": 0.5620949274566303, "grad_norm": 0.16143356263637543, "learning_rate": 9.034343440616641e-06, "loss": 0.0025, "step": 33260 }, { "epoch": 0.5622639277354807, "grad_norm": 0.06286460161209106, "learning_rate": 9.033472038045011e-06, "loss": 0.0026, "step": 33270 }, { "epoch": 0.5624329280143312, "grad_norm": 0.05834538862109184, "learning_rate": 9.032600284541771e-06, "loss": 0.0028, "step": 33280 }, { "epoch": 0.5626019282931817, "grad_norm": 0.09082058072090149, "learning_rate": 9.031728180182768e-06, "loss": 0.003, "step": 33290 }, { "epoch": 0.5627709285720321, "grad_norm": 0.07078733295202255, "learning_rate": 9.03085572504388e-06, "loss": 0.0023, "step": 33300 }, { "epoch": 0.5629399288508826, "grad_norm": 0.078410305082798, "learning_rate": 9.029982919201012e-06, "loss": 0.0022, "step": 33310 }, { "epoch": 0.5631089291297331, "grad_norm": 0.04747273400425911, "learning_rate": 9.029109762730105e-06, "loss": 0.0024, "step": 33320 }, { "epoch": 0.5632779294085836, "grad_norm": 0.07112249732017517, "learning_rate": 9.02823625570713e-06, "loss": 0.0023, "step": 33330 }, { "epoch": 0.563446929687434, "grad_norm": 0.10954030603170395, "learning_rate": 9.02736239820808e-06, "loss": 0.0025, "step": 33340 }, { "epoch": 0.5636159299662844, "grad_norm": 0.13121449947357178, "learning_rate": 9.026488190308989e-06, "loss": 0.0015, "step": 33350 }, { "epoch": 0.5637849302451349, "grad_norm": 0.03465167433023453, "learning_rate": 9.025613632085919e-06, "loss": 0.0021, "step": 33360 }, { "epoch": 0.5639539305239853, "grad_norm": 0.055887263268232346, "learning_rate": 9.024738723614957e-06, "loss": 0.002, "step": 33370 }, { "epoch": 0.5641229308028358, "grad_norm": 0.25826144218444824, "learning_rate": 9.023863464972225e-06, "loss": 0.0025, "step": 33380 }, { "epoch": 0.5642919310816863, "grad_norm": 0.07254608720541, "learning_rate": 9.022987856233876e-06, "loss": 0.0093, "step": 33390 }, { "epoch": 0.5644609313605368, "grad_norm": 0.18888425827026367, "learning_rate": 9.022111897476092e-06, "loss": 0.0042, "step": 33400 }, { "epoch": 0.5646299316393872, "grad_norm": 0.11029987782239914, "learning_rate": 9.021235588775083e-06, "loss": 0.0026, "step": 33410 }, { "epoch": 0.5647989319182377, "grad_norm": 0.1506171077489853, "learning_rate": 9.020358930207098e-06, "loss": 0.0027, "step": 33420 }, { "epoch": 0.5649679321970881, "grad_norm": 0.007704961579293013, "learning_rate": 9.019481921848405e-06, "loss": 0.0023, "step": 33430 }, { "epoch": 0.5651369324759385, "grad_norm": 0.05094357952475548, "learning_rate": 9.01860456377531e-06, "loss": 0.0012, "step": 33440 }, { "epoch": 0.565305932754789, "grad_norm": 0.04711581766605377, "learning_rate": 9.017726856064148e-06, "loss": 0.0019, "step": 33450 }, { "epoch": 0.5654749330336395, "grad_norm": 0.14686250686645508, "learning_rate": 9.016848798791283e-06, "loss": 0.0022, "step": 33460 }, { "epoch": 0.56564393331249, "grad_norm": 0.0039303540252149105, "learning_rate": 9.01597039203311e-06, "loss": 0.0034, "step": 33470 }, { "epoch": 0.5658129335913404, "grad_norm": 0.01116950437426567, "learning_rate": 9.015091635866056e-06, "loss": 0.0018, "step": 33480 }, { "epoch": 0.5659819338701909, "grad_norm": 0.12378732860088348, "learning_rate": 9.014212530366575e-06, "loss": 0.0032, "step": 33490 }, { "epoch": 0.5661509341490414, "grad_norm": 0.3053177297115326, "learning_rate": 9.013333075611154e-06, "loss": 0.0037, "step": 33500 }, { "epoch": 0.5663199344278919, "grad_norm": 0.052048180252313614, "learning_rate": 9.012453271676311e-06, "loss": 0.0022, "step": 33510 }, { "epoch": 0.5664889347067422, "grad_norm": 0.14100271463394165, "learning_rate": 9.011573118638595e-06, "loss": 0.0034, "step": 33520 }, { "epoch": 0.5666579349855927, "grad_norm": 0.08364363014698029, "learning_rate": 9.01069261657458e-06, "loss": 0.0022, "step": 33530 }, { "epoch": 0.5668269352644432, "grad_norm": 0.09455575048923492, "learning_rate": 9.009811765560871e-06, "loss": 0.0028, "step": 33540 }, { "epoch": 0.5669959355432936, "grad_norm": 0.1075403094291687, "learning_rate": 9.008930565674115e-06, "loss": 0.0031, "step": 33550 }, { "epoch": 0.5671649358221441, "grad_norm": 0.08358623832464218, "learning_rate": 9.008049016990974e-06, "loss": 0.0021, "step": 33560 }, { "epoch": 0.5673339361009946, "grad_norm": 0.09312465786933899, "learning_rate": 9.00716711958815e-06, "loss": 0.0022, "step": 33570 }, { "epoch": 0.5675029363798451, "grad_norm": 0.23224703967571259, "learning_rate": 9.006284873542373e-06, "loss": 0.0019, "step": 33580 }, { "epoch": 0.5676719366586955, "grad_norm": 0.08911000937223434, "learning_rate": 9.005402278930398e-06, "loss": 0.0022, "step": 33590 }, { "epoch": 0.5678409369375459, "grad_norm": 0.04529863968491554, "learning_rate": 9.00451933582902e-06, "loss": 0.0016, "step": 33600 }, { "epoch": 0.5680099372163964, "grad_norm": 0.11283257603645325, "learning_rate": 9.003636044315056e-06, "loss": 0.0031, "step": 33610 }, { "epoch": 0.5681789374952468, "grad_norm": 0.04146464169025421, "learning_rate": 9.00275240446536e-06, "loss": 0.0029, "step": 33620 }, { "epoch": 0.5683479377740973, "grad_norm": 0.2886311113834381, "learning_rate": 9.00186841635681e-06, "loss": 0.0026, "step": 33630 }, { "epoch": 0.5685169380529478, "grad_norm": 0.03935188800096512, "learning_rate": 9.000984080066316e-06, "loss": 0.0031, "step": 33640 }, { "epoch": 0.5686859383317983, "grad_norm": 0.09145606309175491, "learning_rate": 9.000099395670826e-06, "loss": 0.0047, "step": 33650 }, { "epoch": 0.5688549386106487, "grad_norm": 0.11949055641889572, "learning_rate": 8.999214363247306e-06, "loss": 0.0031, "step": 33660 }, { "epoch": 0.5690239388894992, "grad_norm": 0.043543096631765366, "learning_rate": 8.99832898287276e-06, "loss": 0.0029, "step": 33670 }, { "epoch": 0.5691929391683497, "grad_norm": 0.08801903575658798, "learning_rate": 8.997443254624218e-06, "loss": 0.0028, "step": 33680 }, { "epoch": 0.5693619394472, "grad_norm": 0.2695542275905609, "learning_rate": 8.996557178578747e-06, "loss": 0.005, "step": 33690 }, { "epoch": 0.5695309397260505, "grad_norm": 0.06001931428909302, "learning_rate": 8.995670754813437e-06, "loss": 0.0024, "step": 33700 }, { "epoch": 0.569699940004901, "grad_norm": 0.031208530068397522, "learning_rate": 8.994783983405414e-06, "loss": 0.004, "step": 33710 }, { "epoch": 0.5698689402837515, "grad_norm": 0.03699713572859764, "learning_rate": 8.993896864431825e-06, "loss": 0.0039, "step": 33720 }, { "epoch": 0.5700379405626019, "grad_norm": 0.06721201539039612, "learning_rate": 8.993009397969861e-06, "loss": 0.0016, "step": 33730 }, { "epoch": 0.5702069408414524, "grad_norm": 0.13690564036369324, "learning_rate": 8.992121584096731e-06, "loss": 0.0017, "step": 33740 }, { "epoch": 0.5703759411203029, "grad_norm": 0.06848353147506714, "learning_rate": 8.991233422889683e-06, "loss": 0.0025, "step": 33750 }, { "epoch": 0.5705449413991533, "grad_norm": 0.06637344509363174, "learning_rate": 8.990344914425988e-06, "loss": 0.003, "step": 33760 }, { "epoch": 0.5707139416780038, "grad_norm": 0.07482563704252243, "learning_rate": 8.989456058782952e-06, "loss": 0.0019, "step": 33770 }, { "epoch": 0.5708829419568542, "grad_norm": 0.02254422754049301, "learning_rate": 8.988566856037908e-06, "loss": 0.0008, "step": 33780 }, { "epoch": 0.5710519422357047, "grad_norm": 0.1611790657043457, "learning_rate": 8.987677306268226e-06, "loss": 0.0023, "step": 33790 }, { "epoch": 0.5712209425145551, "grad_norm": 0.07324439287185669, "learning_rate": 8.986787409551294e-06, "loss": 0.0015, "step": 33800 }, { "epoch": 0.5713899427934056, "grad_norm": 0.36423519253730774, "learning_rate": 8.985897165964544e-06, "loss": 0.0025, "step": 33810 }, { "epoch": 0.5715589430722561, "grad_norm": 0.09435511380434036, "learning_rate": 8.985006575585426e-06, "loss": 0.002, "step": 33820 }, { "epoch": 0.5717279433511065, "grad_norm": 0.1214037612080574, "learning_rate": 8.984115638491428e-06, "loss": 0.0023, "step": 33830 }, { "epoch": 0.571896943629957, "grad_norm": 0.1913721114397049, "learning_rate": 8.983224354760068e-06, "loss": 0.0029, "step": 33840 }, { "epoch": 0.5720659439088075, "grad_norm": 0.026578059419989586, "learning_rate": 8.98233272446889e-06, "loss": 0.0027, "step": 33850 }, { "epoch": 0.5722349441876579, "grad_norm": 0.19247399270534515, "learning_rate": 8.981440747695467e-06, "loss": 0.0023, "step": 33860 }, { "epoch": 0.5724039444665083, "grad_norm": 0.1015344113111496, "learning_rate": 8.980548424517412e-06, "loss": 0.0046, "step": 33870 }, { "epoch": 0.5725729447453588, "grad_norm": 0.0901235044002533, "learning_rate": 8.979655755012355e-06, "loss": 0.0026, "step": 33880 }, { "epoch": 0.5727419450242093, "grad_norm": 0.21112436056137085, "learning_rate": 8.978762739257966e-06, "loss": 0.0032, "step": 33890 }, { "epoch": 0.5729109453030597, "grad_norm": 0.3893822431564331, "learning_rate": 8.977869377331944e-06, "loss": 0.0022, "step": 33900 }, { "epoch": 0.5730799455819102, "grad_norm": 0.19143308699131012, "learning_rate": 8.976975669312012e-06, "loss": 0.0022, "step": 33910 }, { "epoch": 0.5732489458607607, "grad_norm": 0.06548358500003815, "learning_rate": 8.976081615275924e-06, "loss": 0.007, "step": 33920 }, { "epoch": 0.5734179461396112, "grad_norm": 0.04744194447994232, "learning_rate": 8.975187215301475e-06, "loss": 0.0019, "step": 33930 }, { "epoch": 0.5735869464184616, "grad_norm": 0.07531792670488358, "learning_rate": 8.974292469466477e-06, "loss": 0.0022, "step": 33940 }, { "epoch": 0.573755946697312, "grad_norm": 0.05589604005217552, "learning_rate": 8.973397377848778e-06, "loss": 0.0023, "step": 33950 }, { "epoch": 0.5739249469761625, "grad_norm": 0.07739026844501495, "learning_rate": 8.972501940526256e-06, "loss": 0.0021, "step": 33960 }, { "epoch": 0.574093947255013, "grad_norm": 0.027860552072525024, "learning_rate": 8.971606157576818e-06, "loss": 0.0024, "step": 33970 }, { "epoch": 0.5742629475338634, "grad_norm": 0.034402504563331604, "learning_rate": 8.9707100290784e-06, "loss": 0.0018, "step": 33980 }, { "epoch": 0.5744319478127139, "grad_norm": 0.12433133274316788, "learning_rate": 8.969813555108972e-06, "loss": 0.0022, "step": 33990 }, { "epoch": 0.5746009480915644, "grad_norm": 0.0021321179810911417, "learning_rate": 8.96891673574653e-06, "loss": 0.0024, "step": 34000 }, { "epoch": 0.5747699483704148, "grad_norm": 0.09629344195127487, "learning_rate": 8.968019571069101e-06, "loss": 0.0029, "step": 34010 }, { "epoch": 0.5749389486492653, "grad_norm": 0.05528600513935089, "learning_rate": 8.967122061154747e-06, "loss": 0.002, "step": 34020 }, { "epoch": 0.5751079489281158, "grad_norm": 0.05657871440052986, "learning_rate": 8.96622420608155e-06, "loss": 0.0015, "step": 34030 }, { "epoch": 0.5752769492069661, "grad_norm": 0.03395133092999458, "learning_rate": 8.965326005927633e-06, "loss": 0.0018, "step": 34040 }, { "epoch": 0.5754459494858166, "grad_norm": 0.08920653909444809, "learning_rate": 8.96442746077114e-06, "loss": 0.0022, "step": 34050 }, { "epoch": 0.5756149497646671, "grad_norm": 0.06838784366846085, "learning_rate": 8.963528570690247e-06, "loss": 0.0017, "step": 34060 }, { "epoch": 0.5757839500435176, "grad_norm": 0.0616769976913929, "learning_rate": 8.962629335763166e-06, "loss": 0.0044, "step": 34070 }, { "epoch": 0.575952950322368, "grad_norm": 0.05100078135728836, "learning_rate": 8.961729756068135e-06, "loss": 0.003, "step": 34080 }, { "epoch": 0.5761219506012185, "grad_norm": 0.08292622119188309, "learning_rate": 8.96082983168342e-06, "loss": 0.0039, "step": 34090 }, { "epoch": 0.576290950880069, "grad_norm": 0.02434578351676464, "learning_rate": 8.959929562687317e-06, "loss": 0.0038, "step": 34100 }, { "epoch": 0.5764599511589195, "grad_norm": 0.10119115561246872, "learning_rate": 8.959028949158158e-06, "loss": 0.0018, "step": 34110 }, { "epoch": 0.5766289514377698, "grad_norm": 0.13875305652618408, "learning_rate": 8.958127991174298e-06, "loss": 0.0024, "step": 34120 }, { "epoch": 0.5767979517166203, "grad_norm": 0.15823498368263245, "learning_rate": 8.957226688814124e-06, "loss": 0.0014, "step": 34130 }, { "epoch": 0.5769669519954708, "grad_norm": 0.15111730992794037, "learning_rate": 8.956325042156055e-06, "loss": 0.0023, "step": 34140 }, { "epoch": 0.5771359522743212, "grad_norm": 0.1382988840341568, "learning_rate": 8.95542305127854e-06, "loss": 0.0045, "step": 34150 }, { "epoch": 0.5773049525531717, "grad_norm": 0.011094818823039532, "learning_rate": 8.954520716260054e-06, "loss": 0.0038, "step": 34160 }, { "epoch": 0.5774739528320222, "grad_norm": 0.043708279728889465, "learning_rate": 8.953618037179105e-06, "loss": 0.0016, "step": 34170 }, { "epoch": 0.5776429531108727, "grad_norm": 0.07289635390043259, "learning_rate": 8.952715014114231e-06, "loss": 0.0026, "step": 34180 }, { "epoch": 0.5778119533897231, "grad_norm": 0.03761967271566391, "learning_rate": 8.951811647144002e-06, "loss": 0.0019, "step": 34190 }, { "epoch": 0.5779809536685736, "grad_norm": 0.15979503095149994, "learning_rate": 8.95090793634701e-06, "loss": 0.0022, "step": 34200 }, { "epoch": 0.578149953947424, "grad_norm": 0.10363156348466873, "learning_rate": 8.950003881801885e-06, "loss": 0.0035, "step": 34210 }, { "epoch": 0.5783189542262744, "grad_norm": 0.1256801337003708, "learning_rate": 8.949099483587286e-06, "loss": 0.0016, "step": 34220 }, { "epoch": 0.5784879545051249, "grad_norm": 0.11050673574209213, "learning_rate": 8.948194741781897e-06, "loss": 0.0022, "step": 34230 }, { "epoch": 0.5786569547839754, "grad_norm": 0.06849426031112671, "learning_rate": 8.947289656464437e-06, "loss": 0.0023, "step": 34240 }, { "epoch": 0.5788259550628259, "grad_norm": 0.20660637319087982, "learning_rate": 8.94638422771365e-06, "loss": 0.0035, "step": 34250 }, { "epoch": 0.5789949553416763, "grad_norm": 0.13702276349067688, "learning_rate": 8.945478455608313e-06, "loss": 0.0029, "step": 34260 }, { "epoch": 0.5791639556205268, "grad_norm": 0.19022051990032196, "learning_rate": 8.944572340227239e-06, "loss": 0.0026, "step": 34270 }, { "epoch": 0.5793329558993773, "grad_norm": 0.01711823046207428, "learning_rate": 8.943665881649257e-06, "loss": 0.002, "step": 34280 }, { "epoch": 0.5795019561782276, "grad_norm": 0.03700932487845421, "learning_rate": 8.942759079953235e-06, "loss": 0.0019, "step": 34290 }, { "epoch": 0.5796709564570781, "grad_norm": 0.1668715626001358, "learning_rate": 8.941851935218072e-06, "loss": 0.0019, "step": 34300 }, { "epoch": 0.5798399567359286, "grad_norm": 0.0773654580116272, "learning_rate": 8.94094444752269e-06, "loss": 0.0025, "step": 34310 }, { "epoch": 0.5800089570147791, "grad_norm": 0.38987037539482117, "learning_rate": 8.940036616946047e-06, "loss": 0.0051, "step": 34320 }, { "epoch": 0.5801779572936295, "grad_norm": 0.03946467489004135, "learning_rate": 8.939128443567128e-06, "loss": 0.0013, "step": 34330 }, { "epoch": 0.58034695757248, "grad_norm": 0.036311425268650055, "learning_rate": 8.93821992746495e-06, "loss": 0.0021, "step": 34340 }, { "epoch": 0.5805159578513305, "grad_norm": 0.07707275450229645, "learning_rate": 8.937311068718557e-06, "loss": 0.002, "step": 34350 }, { "epoch": 0.580684958130181, "grad_norm": 0.0606757290661335, "learning_rate": 8.936401867407025e-06, "loss": 0.0022, "step": 34360 }, { "epoch": 0.5808539584090314, "grad_norm": 0.029825543984770775, "learning_rate": 8.935492323609457e-06, "loss": 0.0025, "step": 34370 }, { "epoch": 0.5810229586878818, "grad_norm": 0.09910440444946289, "learning_rate": 8.93458243740499e-06, "loss": 0.0018, "step": 34380 }, { "epoch": 0.5811919589667323, "grad_norm": 0.01993374340236187, "learning_rate": 8.933672208872786e-06, "loss": 0.0035, "step": 34390 }, { "epoch": 0.5813609592455827, "grad_norm": 0.2147493213415146, "learning_rate": 8.932761638092042e-06, "loss": 0.0027, "step": 34400 }, { "epoch": 0.5815299595244332, "grad_norm": 0.03388592600822449, "learning_rate": 8.931850725141979e-06, "loss": 0.0041, "step": 34410 }, { "epoch": 0.5816989598032837, "grad_norm": 0.18381360173225403, "learning_rate": 8.930939470101855e-06, "loss": 0.0034, "step": 34420 }, { "epoch": 0.5818679600821342, "grad_norm": 0.24191924929618835, "learning_rate": 8.93002787305095e-06, "loss": 0.0028, "step": 34430 }, { "epoch": 0.5820369603609846, "grad_norm": 0.19341941177845, "learning_rate": 8.92911593406858e-06, "loss": 0.003, "step": 34440 }, { "epoch": 0.5822059606398351, "grad_norm": 0.11159269511699677, "learning_rate": 8.928203653234084e-06, "loss": 0.002, "step": 34450 }, { "epoch": 0.5823749609186856, "grad_norm": 0.0494178868830204, "learning_rate": 8.927291030626842e-06, "loss": 0.002, "step": 34460 }, { "epoch": 0.5825439611975359, "grad_norm": 0.1472940593957901, "learning_rate": 8.926378066326247e-06, "loss": 0.003, "step": 34470 }, { "epoch": 0.5827129614763864, "grad_norm": 0.09544549882411957, "learning_rate": 8.925464760411739e-06, "loss": 0.0035, "step": 34480 }, { "epoch": 0.5828819617552369, "grad_norm": 0.09508192539215088, "learning_rate": 8.924551112962779e-06, "loss": 0.0026, "step": 34490 }, { "epoch": 0.5830509620340874, "grad_norm": 0.0262440275400877, "learning_rate": 8.923637124058854e-06, "loss": 0.0035, "step": 34500 }, { "epoch": 0.5832199623129378, "grad_norm": 0.07030729204416275, "learning_rate": 8.922722793779492e-06, "loss": 0.0027, "step": 34510 }, { "epoch": 0.5833889625917883, "grad_norm": 0.006284997798502445, "learning_rate": 8.921808122204239e-06, "loss": 0.0026, "step": 34520 }, { "epoch": 0.5835579628706388, "grad_norm": 0.03774796798825264, "learning_rate": 8.920893109412678e-06, "loss": 0.0027, "step": 34530 }, { "epoch": 0.5837269631494892, "grad_norm": 0.09865015000104904, "learning_rate": 8.91997775548442e-06, "loss": 0.003, "step": 34540 }, { "epoch": 0.5838959634283396, "grad_norm": 0.28293201327323914, "learning_rate": 8.919062060499105e-06, "loss": 0.0025, "step": 34550 }, { "epoch": 0.5840649637071901, "grad_norm": 0.06183357164263725, "learning_rate": 8.918146024536401e-06, "loss": 0.0015, "step": 34560 }, { "epoch": 0.5842339639860405, "grad_norm": 0.0637751892209053, "learning_rate": 8.917229647676009e-06, "loss": 0.0026, "step": 34570 }, { "epoch": 0.584402964264891, "grad_norm": 0.10728228837251663, "learning_rate": 8.916312929997659e-06, "loss": 0.0046, "step": 34580 }, { "epoch": 0.5845719645437415, "grad_norm": 0.13044388592243195, "learning_rate": 8.915395871581108e-06, "loss": 0.0018, "step": 34590 }, { "epoch": 0.584740964822592, "grad_norm": 0.18705812096595764, "learning_rate": 8.914478472506146e-06, "loss": 0.0022, "step": 34600 }, { "epoch": 0.5849099651014424, "grad_norm": 0.3285347819328308, "learning_rate": 8.913560732852592e-06, "loss": 0.0014, "step": 34610 }, { "epoch": 0.5850789653802929, "grad_norm": 0.2867709696292877, "learning_rate": 8.912642652700292e-06, "loss": 0.0028, "step": 34620 }, { "epoch": 0.5852479656591434, "grad_norm": 0.024372834712266922, "learning_rate": 8.911724232129124e-06, "loss": 0.0021, "step": 34630 }, { "epoch": 0.5854169659379937, "grad_norm": 0.031246446073055267, "learning_rate": 8.910805471218994e-06, "loss": 0.0015, "step": 34640 }, { "epoch": 0.5855859662168442, "grad_norm": 0.15418541431427002, "learning_rate": 8.90988637004984e-06, "loss": 0.0021, "step": 34650 }, { "epoch": 0.5857549664956947, "grad_norm": 0.21680156886577606, "learning_rate": 8.90896692870163e-06, "loss": 0.0018, "step": 34660 }, { "epoch": 0.5859239667745452, "grad_norm": 0.10197052359580994, "learning_rate": 8.908047147254356e-06, "loss": 0.0014, "step": 34670 }, { "epoch": 0.5860929670533956, "grad_norm": 0.1428227722644806, "learning_rate": 8.907127025788045e-06, "loss": 0.004, "step": 34680 }, { "epoch": 0.5862619673322461, "grad_norm": 0.02672131545841694, "learning_rate": 8.906206564382753e-06, "loss": 0.0021, "step": 34690 }, { "epoch": 0.5864309676110966, "grad_norm": 0.06392728537321091, "learning_rate": 8.905285763118565e-06, "loss": 0.002, "step": 34700 }, { "epoch": 0.5865999678899471, "grad_norm": 0.08085457235574722, "learning_rate": 8.904364622075593e-06, "loss": 0.0034, "step": 34710 }, { "epoch": 0.5867689681687975, "grad_norm": 0.1856919229030609, "learning_rate": 8.903443141333982e-06, "loss": 0.0027, "step": 34720 }, { "epoch": 0.5869379684476479, "grad_norm": 0.12695810198783875, "learning_rate": 8.902521320973905e-06, "loss": 0.0039, "step": 34730 }, { "epoch": 0.5871069687264984, "grad_norm": 0.043028876185417175, "learning_rate": 8.901599161075565e-06, "loss": 0.0015, "step": 34740 }, { "epoch": 0.5872759690053488, "grad_norm": 0.04045486077666283, "learning_rate": 8.900676661719195e-06, "loss": 0.002, "step": 34750 }, { "epoch": 0.5874449692841993, "grad_norm": 0.10769476741552353, "learning_rate": 8.899753822985054e-06, "loss": 0.0032, "step": 34760 }, { "epoch": 0.5876139695630498, "grad_norm": 0.0701104998588562, "learning_rate": 8.898830644953436e-06, "loss": 0.0017, "step": 34770 }, { "epoch": 0.5877829698419003, "grad_norm": 0.13843099772930145, "learning_rate": 8.897907127704663e-06, "loss": 0.0023, "step": 34780 }, { "epoch": 0.5879519701207507, "grad_norm": 0.12599579989910126, "learning_rate": 8.896983271319085e-06, "loss": 0.0022, "step": 34790 }, { "epoch": 0.5881209703996012, "grad_norm": 0.16534028947353363, "learning_rate": 8.89605907587708e-06, "loss": 0.0029, "step": 34800 }, { "epoch": 0.5882899706784516, "grad_norm": 0.06567218899726868, "learning_rate": 8.895134541459058e-06, "loss": 0.0019, "step": 34810 }, { "epoch": 0.588458970957302, "grad_norm": 0.283370703458786, "learning_rate": 8.894209668145459e-06, "loss": 0.002, "step": 34820 }, { "epoch": 0.5886279712361525, "grad_norm": 0.026961514726281166, "learning_rate": 8.89328445601675e-06, "loss": 0.0021, "step": 34830 }, { "epoch": 0.588796971515003, "grad_norm": 0.1643812507390976, "learning_rate": 8.89235890515343e-06, "loss": 0.002, "step": 34840 }, { "epoch": 0.5889659717938535, "grad_norm": 0.06348308175802231, "learning_rate": 8.891433015636028e-06, "loss": 0.0027, "step": 34850 }, { "epoch": 0.5891349720727039, "grad_norm": 0.042579248547554016, "learning_rate": 8.890506787545099e-06, "loss": 0.0018, "step": 34860 }, { "epoch": 0.5893039723515544, "grad_norm": 0.06588687747716904, "learning_rate": 8.889580220961228e-06, "loss": 0.0026, "step": 34870 }, { "epoch": 0.5894729726304049, "grad_norm": 0.13126246631145477, "learning_rate": 8.888653315965033e-06, "loss": 0.0015, "step": 34880 }, { "epoch": 0.5896419729092554, "grad_norm": 0.1396174430847168, "learning_rate": 8.887726072637159e-06, "loss": 0.0025, "step": 34890 }, { "epoch": 0.5898109731881057, "grad_norm": 0.1821875274181366, "learning_rate": 8.88679849105828e-06, "loss": 0.0028, "step": 34900 }, { "epoch": 0.5899799734669562, "grad_norm": 0.017610182985663414, "learning_rate": 8.8858705713091e-06, "loss": 0.0019, "step": 34910 }, { "epoch": 0.5901489737458067, "grad_norm": 0.14475102722644806, "learning_rate": 8.884942313470353e-06, "loss": 0.0019, "step": 34920 }, { "epoch": 0.5903179740246571, "grad_norm": 0.07700800150632858, "learning_rate": 8.884013717622802e-06, "loss": 0.0021, "step": 34930 }, { "epoch": 0.5904869743035076, "grad_norm": 0.08157818764448166, "learning_rate": 8.88308478384724e-06, "loss": 0.0021, "step": 34940 }, { "epoch": 0.5906559745823581, "grad_norm": 0.08447717875242233, "learning_rate": 8.882155512224486e-06, "loss": 0.0022, "step": 34950 }, { "epoch": 0.5908249748612086, "grad_norm": 0.10168961435556412, "learning_rate": 8.881225902835394e-06, "loss": 0.0019, "step": 34960 }, { "epoch": 0.590993975140059, "grad_norm": 0.160582035779953, "learning_rate": 8.880295955760842e-06, "loss": 0.0019, "step": 34970 }, { "epoch": 0.5911629754189094, "grad_norm": 0.11416268348693848, "learning_rate": 8.879365671081743e-06, "loss": 0.0021, "step": 34980 }, { "epoch": 0.5913319756977599, "grad_norm": 0.5007433891296387, "learning_rate": 8.878435048879034e-06, "loss": 0.005, "step": 34990 }, { "epoch": 0.5915009759766103, "grad_norm": 0.0692245215177536, "learning_rate": 8.877504089233685e-06, "loss": 0.0023, "step": 35000 }, { "epoch": 0.5916699762554608, "grad_norm": 0.06784098595380783, "learning_rate": 8.876572792226693e-06, "loss": 0.002, "step": 35010 }, { "epoch": 0.5918389765343113, "grad_norm": 0.10995849967002869, "learning_rate": 8.875641157939085e-06, "loss": 0.0018, "step": 35020 }, { "epoch": 0.5920079768131618, "grad_norm": 0.03497052937746048, "learning_rate": 8.87470918645192e-06, "loss": 0.0025, "step": 35030 }, { "epoch": 0.5921769770920122, "grad_norm": 0.07584590464830399, "learning_rate": 8.87377687784628e-06, "loss": 0.0015, "step": 35040 }, { "epoch": 0.5923459773708627, "grad_norm": 0.05822953209280968, "learning_rate": 8.872844232203284e-06, "loss": 0.0024, "step": 35050 }, { "epoch": 0.5925149776497132, "grad_norm": 0.09714366495609283, "learning_rate": 8.871911249604076e-06, "loss": 0.0032, "step": 35060 }, { "epoch": 0.5926839779285635, "grad_norm": 0.045151591300964355, "learning_rate": 8.870977930129828e-06, "loss": 0.0025, "step": 35070 }, { "epoch": 0.592852978207414, "grad_norm": 0.0487397275865078, "learning_rate": 8.870044273861746e-06, "loss": 0.0025, "step": 35080 }, { "epoch": 0.5930219784862645, "grad_norm": 0.03342911973595619, "learning_rate": 8.869110280881062e-06, "loss": 0.002, "step": 35090 }, { "epoch": 0.593190978765115, "grad_norm": 0.012296173721551895, "learning_rate": 8.868175951269036e-06, "loss": 0.0018, "step": 35100 }, { "epoch": 0.5933599790439654, "grad_norm": 0.12815845012664795, "learning_rate": 8.86724128510696e-06, "loss": 0.0038, "step": 35110 }, { "epoch": 0.5935289793228159, "grad_norm": 0.07406541705131531, "learning_rate": 8.866306282476155e-06, "loss": 0.0018, "step": 35120 }, { "epoch": 0.5936979796016664, "grad_norm": 0.0447167307138443, "learning_rate": 8.865370943457973e-06, "loss": 0.0027, "step": 35130 }, { "epoch": 0.5938669798805168, "grad_norm": 0.11929440498352051, "learning_rate": 8.864435268133789e-06, "loss": 0.0019, "step": 35140 }, { "epoch": 0.5940359801593673, "grad_norm": 0.11725620925426483, "learning_rate": 8.863499256585012e-06, "loss": 0.0017, "step": 35150 }, { "epoch": 0.5942049804382177, "grad_norm": 0.02146654948592186, "learning_rate": 8.862562908893081e-06, "loss": 0.0033, "step": 35160 }, { "epoch": 0.5943739807170682, "grad_norm": 0.027879882603883743, "learning_rate": 8.861626225139465e-06, "loss": 0.002, "step": 35170 }, { "epoch": 0.5945429809959186, "grad_norm": 0.04303164780139923, "learning_rate": 8.860689205405655e-06, "loss": 0.0024, "step": 35180 }, { "epoch": 0.5947119812747691, "grad_norm": 0.08739340305328369, "learning_rate": 8.859751849773179e-06, "loss": 0.0033, "step": 35190 }, { "epoch": 0.5948809815536196, "grad_norm": 0.08744668960571289, "learning_rate": 8.858814158323593e-06, "loss": 0.002, "step": 35200 }, { "epoch": 0.59504998183247, "grad_norm": 0.07492510974407196, "learning_rate": 8.857876131138476e-06, "loss": 0.003, "step": 35210 }, { "epoch": 0.5952189821113205, "grad_norm": 0.08802341669797897, "learning_rate": 8.856937768299445e-06, "loss": 0.0015, "step": 35220 }, { "epoch": 0.595387982390171, "grad_norm": 0.11602721363306046, "learning_rate": 8.855999069888141e-06, "loss": 0.0025, "step": 35230 }, { "epoch": 0.5955569826690214, "grad_norm": 0.07800030708312988, "learning_rate": 8.855060035986235e-06, "loss": 0.0012, "step": 35240 }, { "epoch": 0.5957259829478718, "grad_norm": 0.052270859479904175, "learning_rate": 8.85412066667543e-06, "loss": 0.0015, "step": 35250 }, { "epoch": 0.5958949832267223, "grad_norm": 0.04862003028392792, "learning_rate": 8.85318096203745e-06, "loss": 0.0024, "step": 35260 }, { "epoch": 0.5960639835055728, "grad_norm": 0.7330689430236816, "learning_rate": 8.852240922154059e-06, "loss": 0.0024, "step": 35270 }, { "epoch": 0.5962329837844232, "grad_norm": 0.056989800184965134, "learning_rate": 8.851300547107042e-06, "loss": 0.0022, "step": 35280 }, { "epoch": 0.5964019840632737, "grad_norm": 0.09635084867477417, "learning_rate": 8.850359836978218e-06, "loss": 0.0023, "step": 35290 }, { "epoch": 0.5965709843421242, "grad_norm": 0.10977847874164581, "learning_rate": 8.849418791849433e-06, "loss": 0.002, "step": 35300 }, { "epoch": 0.5967399846209747, "grad_norm": 0.03430016711354256, "learning_rate": 8.84847741180256e-06, "loss": 0.0021, "step": 35310 }, { "epoch": 0.5969089848998251, "grad_norm": 0.026696454733610153, "learning_rate": 8.847535696919509e-06, "loss": 0.0018, "step": 35320 }, { "epoch": 0.5970779851786755, "grad_norm": 0.15002593398094177, "learning_rate": 8.84659364728221e-06, "loss": 0.0024, "step": 35330 }, { "epoch": 0.597246985457526, "grad_norm": 0.05427820608019829, "learning_rate": 8.845651262972625e-06, "loss": 0.0024, "step": 35340 }, { "epoch": 0.5974159857363764, "grad_norm": 0.10184963792562485, "learning_rate": 8.844708544072749e-06, "loss": 0.0015, "step": 35350 }, { "epoch": 0.5975849860152269, "grad_norm": 0.07781493663787842, "learning_rate": 8.843765490664601e-06, "loss": 0.0029, "step": 35360 }, { "epoch": 0.5977539862940774, "grad_norm": 0.11765439808368683, "learning_rate": 8.842822102830233e-06, "loss": 0.0042, "step": 35370 }, { "epoch": 0.5979229865729279, "grad_norm": 0.29873672127723694, "learning_rate": 8.841878380651721e-06, "loss": 0.0016, "step": 35380 }, { "epoch": 0.5980919868517783, "grad_norm": 0.059736013412475586, "learning_rate": 8.840934324211178e-06, "loss": 0.0019, "step": 35390 }, { "epoch": 0.5982609871306288, "grad_norm": 0.08907829970121384, "learning_rate": 8.839989933590738e-06, "loss": 0.0029, "step": 35400 }, { "epoch": 0.5984299874094793, "grad_norm": 0.05392741784453392, "learning_rate": 8.839045208872568e-06, "loss": 0.0032, "step": 35410 }, { "epoch": 0.5985989876883296, "grad_norm": 0.033009592443704605, "learning_rate": 8.838100150138864e-06, "loss": 0.0016, "step": 35420 }, { "epoch": 0.5987679879671801, "grad_norm": 0.07848300039768219, "learning_rate": 8.83715475747185e-06, "loss": 0.0028, "step": 35430 }, { "epoch": 0.5989369882460306, "grad_norm": 0.08762569725513458, "learning_rate": 8.836209030953784e-06, "loss": 0.0029, "step": 35440 }, { "epoch": 0.5991059885248811, "grad_norm": 0.03356378898024559, "learning_rate": 8.835262970666943e-06, "loss": 0.0021, "step": 35450 }, { "epoch": 0.5992749888037315, "grad_norm": 0.09711670875549316, "learning_rate": 8.834316576693642e-06, "loss": 0.0026, "step": 35460 }, { "epoch": 0.599443989082582, "grad_norm": 0.05829253047704697, "learning_rate": 8.83336984911622e-06, "loss": 0.0022, "step": 35470 }, { "epoch": 0.5996129893614325, "grad_norm": 0.09206686168909073, "learning_rate": 8.83242278801705e-06, "loss": 0.0022, "step": 35480 }, { "epoch": 0.599781989640283, "grad_norm": 0.05280447378754616, "learning_rate": 8.831475393478529e-06, "loss": 0.0018, "step": 35490 }, { "epoch": 0.5999509899191333, "grad_norm": 0.1475137323141098, "learning_rate": 8.830527665583083e-06, "loss": 0.0032, "step": 35500 }, { "epoch": 0.6001199901979838, "grad_norm": 0.04320276901125908, "learning_rate": 8.829579604413172e-06, "loss": 0.0028, "step": 35510 }, { "epoch": 0.6002889904768343, "grad_norm": 0.07844381034374237, "learning_rate": 8.82863121005128e-06, "loss": 0.0019, "step": 35520 }, { "epoch": 0.6004579907556847, "grad_norm": 0.07419198751449585, "learning_rate": 8.827682482579923e-06, "loss": 0.0019, "step": 35530 }, { "epoch": 0.6006269910345352, "grad_norm": 0.09416607767343521, "learning_rate": 8.826733422081644e-06, "loss": 0.0023, "step": 35540 }, { "epoch": 0.6007959913133857, "grad_norm": 0.020704537630081177, "learning_rate": 8.825784028639016e-06, "loss": 0.002, "step": 35550 }, { "epoch": 0.6009649915922362, "grad_norm": 0.15058861672878265, "learning_rate": 8.824834302334641e-06, "loss": 0.002, "step": 35560 }, { "epoch": 0.6011339918710866, "grad_norm": 0.32956650853157043, "learning_rate": 8.823884243251152e-06, "loss": 0.0032, "step": 35570 }, { "epoch": 0.6013029921499371, "grad_norm": 0.24377502501010895, "learning_rate": 8.822933851471205e-06, "loss": 0.0047, "step": 35580 }, { "epoch": 0.6014719924287875, "grad_norm": 0.11893951892852783, "learning_rate": 8.821983127077492e-06, "loss": 0.0018, "step": 35590 }, { "epoch": 0.6016409927076379, "grad_norm": 0.1588331013917923, "learning_rate": 8.821032070152726e-06, "loss": 0.0021, "step": 35600 }, { "epoch": 0.6018099929864884, "grad_norm": 0.04255605861544609, "learning_rate": 8.820080680779659e-06, "loss": 0.0031, "step": 35610 }, { "epoch": 0.6019789932653389, "grad_norm": 0.09891010075807571, "learning_rate": 8.819128959041064e-06, "loss": 0.002, "step": 35620 }, { "epoch": 0.6021479935441894, "grad_norm": 0.16634584963321686, "learning_rate": 8.818176905019744e-06, "loss": 0.0045, "step": 35630 }, { "epoch": 0.6023169938230398, "grad_norm": 0.16242848336696625, "learning_rate": 8.817224518798535e-06, "loss": 0.003, "step": 35640 }, { "epoch": 0.6024859941018903, "grad_norm": 0.028616510331630707, "learning_rate": 8.816271800460297e-06, "loss": 0.0044, "step": 35650 }, { "epoch": 0.6026549943807408, "grad_norm": 0.02689216285943985, "learning_rate": 8.815318750087923e-06, "loss": 0.0032, "step": 35660 }, { "epoch": 0.6028239946595911, "grad_norm": 0.044815417379140854, "learning_rate": 8.814365367764332e-06, "loss": 0.0016, "step": 35670 }, { "epoch": 0.6029929949384416, "grad_norm": 0.06668156385421753, "learning_rate": 8.813411653572473e-06, "loss": 0.0014, "step": 35680 }, { "epoch": 0.6031619952172921, "grad_norm": 0.10036733001470566, "learning_rate": 8.812457607595324e-06, "loss": 0.002, "step": 35690 }, { "epoch": 0.6033309954961426, "grad_norm": 0.1022668406367302, "learning_rate": 8.811503229915889e-06, "loss": 0.0024, "step": 35700 }, { "epoch": 0.603499995774993, "grad_norm": 0.04607870429754257, "learning_rate": 8.81054852061721e-06, "loss": 0.0033, "step": 35710 }, { "epoch": 0.6036689960538435, "grad_norm": 0.04547217860817909, "learning_rate": 8.809593479782343e-06, "loss": 0.0022, "step": 35720 }, { "epoch": 0.603837996332694, "grad_norm": 0.16747385263442993, "learning_rate": 8.808638107494388e-06, "loss": 0.0034, "step": 35730 }, { "epoch": 0.6040069966115444, "grad_norm": 0.10530160367488861, "learning_rate": 8.807682403836464e-06, "loss": 0.0081, "step": 35740 }, { "epoch": 0.6041759968903949, "grad_norm": 0.12396273016929626, "learning_rate": 8.806726368891723e-06, "loss": 0.0028, "step": 35750 }, { "epoch": 0.6043449971692453, "grad_norm": 0.13143059611320496, "learning_rate": 8.80577000274334e-06, "loss": 0.0032, "step": 35760 }, { "epoch": 0.6045139974480958, "grad_norm": 0.17417097091674805, "learning_rate": 8.804813305474533e-06, "loss": 0.0022, "step": 35770 }, { "epoch": 0.6046829977269462, "grad_norm": 0.24094782769680023, "learning_rate": 8.80385627716853e-06, "loss": 0.0025, "step": 35780 }, { "epoch": 0.6048519980057967, "grad_norm": 0.16232720017433167, "learning_rate": 8.802898917908601e-06, "loss": 0.0037, "step": 35790 }, { "epoch": 0.6050209982846472, "grad_norm": 0.040980543941259384, "learning_rate": 8.801941227778042e-06, "loss": 0.0019, "step": 35800 }, { "epoch": 0.6051899985634976, "grad_norm": 0.08733880519866943, "learning_rate": 8.800983206860173e-06, "loss": 0.0037, "step": 35810 }, { "epoch": 0.6053589988423481, "grad_norm": 0.0999969094991684, "learning_rate": 8.800024855238351e-06, "loss": 0.0022, "step": 35820 }, { "epoch": 0.6055279991211986, "grad_norm": 0.2854050099849701, "learning_rate": 8.799066172995955e-06, "loss": 0.0032, "step": 35830 }, { "epoch": 0.6056969994000491, "grad_norm": 0.06074146181344986, "learning_rate": 8.798107160216394e-06, "loss": 0.0016, "step": 35840 }, { "epoch": 0.6058659996788994, "grad_norm": 0.27209043502807617, "learning_rate": 8.79714781698311e-06, "loss": 0.0023, "step": 35850 }, { "epoch": 0.6060349999577499, "grad_norm": 0.1067693680524826, "learning_rate": 8.796188143379565e-06, "loss": 0.0028, "step": 35860 }, { "epoch": 0.6062040002366004, "grad_norm": 0.12572214007377625, "learning_rate": 8.79522813948926e-06, "loss": 0.0015, "step": 35870 }, { "epoch": 0.6063730005154508, "grad_norm": 0.08308408409357071, "learning_rate": 8.79426780539572e-06, "loss": 0.0026, "step": 35880 }, { "epoch": 0.6065420007943013, "grad_norm": 0.053814616054296494, "learning_rate": 8.793307141182496e-06, "loss": 0.0037, "step": 35890 }, { "epoch": 0.6067110010731518, "grad_norm": 0.15410888195037842, "learning_rate": 8.792346146933172e-06, "loss": 0.0011, "step": 35900 }, { "epoch": 0.6068800013520023, "grad_norm": 0.02498161606490612, "learning_rate": 8.791384822731358e-06, "loss": 0.0027, "step": 35910 }, { "epoch": 0.6070490016308527, "grad_norm": 0.10394150763750076, "learning_rate": 8.790423168660695e-06, "loss": 0.0032, "step": 35920 }, { "epoch": 0.6072180019097031, "grad_norm": 0.02300155907869339, "learning_rate": 8.789461184804853e-06, "loss": 0.0021, "step": 35930 }, { "epoch": 0.6073870021885536, "grad_norm": 0.061770204454660416, "learning_rate": 8.788498871247526e-06, "loss": 0.0034, "step": 35940 }, { "epoch": 0.607556002467404, "grad_norm": 0.07594380527734756, "learning_rate": 8.787536228072442e-06, "loss": 0.0021, "step": 35950 }, { "epoch": 0.6077250027462545, "grad_norm": 0.10542141646146774, "learning_rate": 8.786573255363355e-06, "loss": 0.0016, "step": 35960 }, { "epoch": 0.607894003025105, "grad_norm": 0.14784705638885498, "learning_rate": 8.785609953204047e-06, "loss": 0.003, "step": 35970 }, { "epoch": 0.6080630033039555, "grad_norm": 0.034316789358854294, "learning_rate": 8.784646321678332e-06, "loss": 0.0016, "step": 35980 }, { "epoch": 0.6082320035828059, "grad_norm": 0.10311736911535263, "learning_rate": 8.783682360870052e-06, "loss": 0.0018, "step": 35990 }, { "epoch": 0.6084010038616564, "grad_norm": 0.11101002246141434, "learning_rate": 8.782718070863072e-06, "loss": 0.0023, "step": 36000 }, { "epoch": 0.6085700041405069, "grad_norm": 0.029666326940059662, "learning_rate": 8.781753451741295e-06, "loss": 0.0023, "step": 36010 }, { "epoch": 0.6087390044193572, "grad_norm": 0.07296469062566757, "learning_rate": 8.780788503588642e-06, "loss": 0.0028, "step": 36020 }, { "epoch": 0.6089080046982077, "grad_norm": 0.0563102550804615, "learning_rate": 8.77982322648907e-06, "loss": 0.0008, "step": 36030 }, { "epoch": 0.6090770049770582, "grad_norm": 0.0819505974650383, "learning_rate": 8.778857620526566e-06, "loss": 0.0026, "step": 36040 }, { "epoch": 0.6092460052559087, "grad_norm": 0.021597327664494514, "learning_rate": 8.77789168578514e-06, "loss": 0.0014, "step": 36050 }, { "epoch": 0.6094150055347591, "grad_norm": 0.07086276262998581, "learning_rate": 8.776925422348833e-06, "loss": 0.0016, "step": 36060 }, { "epoch": 0.6095840058136096, "grad_norm": 0.06087581813335419, "learning_rate": 8.775958830301713e-06, "loss": 0.0024, "step": 36070 }, { "epoch": 0.6097530060924601, "grad_norm": 0.264974981546402, "learning_rate": 8.77499190972788e-06, "loss": 0.005, "step": 36080 }, { "epoch": 0.6099220063713106, "grad_norm": 0.29691457748413086, "learning_rate": 8.774024660711462e-06, "loss": 0.0027, "step": 36090 }, { "epoch": 0.610091006650161, "grad_norm": 0.1828441023826599, "learning_rate": 8.773057083336612e-06, "loss": 0.0026, "step": 36100 }, { "epoch": 0.6102600069290114, "grad_norm": 0.06337442249059677, "learning_rate": 8.772089177687516e-06, "loss": 0.0033, "step": 36110 }, { "epoch": 0.6104290072078619, "grad_norm": 0.11669375747442245, "learning_rate": 8.771120943848384e-06, "loss": 0.0013, "step": 36120 }, { "epoch": 0.6105980074867123, "grad_norm": 0.20372651517391205, "learning_rate": 8.77015238190346e-06, "loss": 0.0023, "step": 36130 }, { "epoch": 0.6107670077655628, "grad_norm": 0.24548251926898956, "learning_rate": 8.76918349193701e-06, "loss": 0.005, "step": 36140 }, { "epoch": 0.6109360080444133, "grad_norm": 0.12593404948711395, "learning_rate": 8.768214274033334e-06, "loss": 0.0025, "step": 36150 }, { "epoch": 0.6111050083232638, "grad_norm": 0.1450646072626114, "learning_rate": 8.76724472827676e-06, "loss": 0.0019, "step": 36160 }, { "epoch": 0.6112740086021142, "grad_norm": 0.11664585769176483, "learning_rate": 8.766274854751642e-06, "loss": 0.0019, "step": 36170 }, { "epoch": 0.6114430088809647, "grad_norm": 0.20358210802078247, "learning_rate": 8.765304653542362e-06, "loss": 0.0031, "step": 36180 }, { "epoch": 0.6116120091598151, "grad_norm": 0.08079025894403458, "learning_rate": 8.764334124733336e-06, "loss": 0.0043, "step": 36190 }, { "epoch": 0.6117810094386655, "grad_norm": 0.18731054663658142, "learning_rate": 8.763363268409002e-06, "loss": 0.003, "step": 36200 }, { "epoch": 0.611950009717516, "grad_norm": 0.023311221972107887, "learning_rate": 8.762392084653829e-06, "loss": 0.004, "step": 36210 }, { "epoch": 0.6121190099963665, "grad_norm": 0.029529789462685585, "learning_rate": 8.761420573552314e-06, "loss": 0.0028, "step": 36220 }, { "epoch": 0.612288010275217, "grad_norm": 0.05020727589726448, "learning_rate": 8.760448735188987e-06, "loss": 0.0056, "step": 36230 }, { "epoch": 0.6124570105540674, "grad_norm": 0.04046209901571274, "learning_rate": 8.7594765696484e-06, "loss": 0.0025, "step": 36240 }, { "epoch": 0.6126260108329179, "grad_norm": 0.04337368533015251, "learning_rate": 8.758504077015136e-06, "loss": 0.0034, "step": 36250 }, { "epoch": 0.6127950111117684, "grad_norm": 0.05420010909438133, "learning_rate": 8.757531257373806e-06, "loss": 0.0022, "step": 36260 }, { "epoch": 0.6129640113906188, "grad_norm": 0.016965752467513084, "learning_rate": 8.756558110809052e-06, "loss": 0.0043, "step": 36270 }, { "epoch": 0.6131330116694692, "grad_norm": 0.08924896270036697, "learning_rate": 8.755584637405541e-06, "loss": 0.0031, "step": 36280 }, { "epoch": 0.6133020119483197, "grad_norm": 0.017866497859358788, "learning_rate": 8.75461083724797e-06, "loss": 0.0023, "step": 36290 }, { "epoch": 0.6134710122271702, "grad_norm": 0.10966131091117859, "learning_rate": 8.753636710421067e-06, "loss": 0.0036, "step": 36300 }, { "epoch": 0.6136400125060206, "grad_norm": 0.022142933681607246, "learning_rate": 8.75266225700958e-06, "loss": 0.0024, "step": 36310 }, { "epoch": 0.6138090127848711, "grad_norm": 0.11732402443885803, "learning_rate": 8.751687477098296e-06, "loss": 0.002, "step": 36320 }, { "epoch": 0.6139780130637216, "grad_norm": 0.07567041367292404, "learning_rate": 8.750712370772025e-06, "loss": 0.0013, "step": 36330 }, { "epoch": 0.614147013342572, "grad_norm": 0.07736122608184814, "learning_rate": 8.749736938115605e-06, "loss": 0.0032, "step": 36340 }, { "epoch": 0.6143160136214225, "grad_norm": 0.023870807141065598, "learning_rate": 8.748761179213903e-06, "loss": 0.0017, "step": 36350 }, { "epoch": 0.6144850139002729, "grad_norm": 0.16306602954864502, "learning_rate": 8.747785094151815e-06, "loss": 0.0043, "step": 36360 }, { "epoch": 0.6146540141791234, "grad_norm": 0.08596381545066833, "learning_rate": 8.746808683014266e-06, "loss": 0.005, "step": 36370 }, { "epoch": 0.6148230144579738, "grad_norm": 0.16764451563358307, "learning_rate": 8.745831945886207e-06, "loss": 0.0028, "step": 36380 }, { "epoch": 0.6149920147368243, "grad_norm": 0.054501231759786606, "learning_rate": 8.74485488285262e-06, "loss": 0.0017, "step": 36390 }, { "epoch": 0.6151610150156748, "grad_norm": 0.07855256646871567, "learning_rate": 8.743877493998514e-06, "loss": 0.0025, "step": 36400 }, { "epoch": 0.6153300152945252, "grad_norm": 0.14877623319625854, "learning_rate": 8.742899779408926e-06, "loss": 0.0018, "step": 36410 }, { "epoch": 0.6154990155733757, "grad_norm": 0.03570941835641861, "learning_rate": 8.741921739168923e-06, "loss": 0.0049, "step": 36420 }, { "epoch": 0.6156680158522262, "grad_norm": 0.11335615813732147, "learning_rate": 8.7409433733636e-06, "loss": 0.0023, "step": 36430 }, { "epoch": 0.6158370161310767, "grad_norm": 0.23981893062591553, "learning_rate": 8.739964682078076e-06, "loss": 0.0026, "step": 36440 }, { "epoch": 0.616006016409927, "grad_norm": 0.09835696965456009, "learning_rate": 8.738985665397505e-06, "loss": 0.0034, "step": 36450 }, { "epoch": 0.6161750166887775, "grad_norm": 0.08674298226833344, "learning_rate": 8.738006323407064e-06, "loss": 0.0084, "step": 36460 }, { "epoch": 0.616344016967628, "grad_norm": 0.04812869057059288, "learning_rate": 8.737026656191964e-06, "loss": 0.0019, "step": 36470 }, { "epoch": 0.6165130172464784, "grad_norm": 0.0043332562781870365, "learning_rate": 8.736046663837439e-06, "loss": 0.0029, "step": 36480 }, { "epoch": 0.6166820175253289, "grad_norm": 0.10359267890453339, "learning_rate": 8.735066346428751e-06, "loss": 0.0017, "step": 36490 }, { "epoch": 0.6168510178041794, "grad_norm": 0.06938648223876953, "learning_rate": 8.734085704051194e-06, "loss": 0.0025, "step": 36500 }, { "epoch": 0.6170200180830299, "grad_norm": 0.09565950185060501, "learning_rate": 8.733104736790088e-06, "loss": 0.0028, "step": 36510 }, { "epoch": 0.6171890183618803, "grad_norm": 0.19001342356204987, "learning_rate": 8.732123444730785e-06, "loss": 0.0013, "step": 36520 }, { "epoch": 0.6173580186407308, "grad_norm": 0.22249983251094818, "learning_rate": 8.731141827958659e-06, "loss": 0.0024, "step": 36530 }, { "epoch": 0.6175270189195812, "grad_norm": 0.06101768836379051, "learning_rate": 8.730159886559116e-06, "loss": 0.0018, "step": 36540 }, { "epoch": 0.6176960191984316, "grad_norm": 0.05670422315597534, "learning_rate": 8.729177620617588e-06, "loss": 0.0024, "step": 36550 }, { "epoch": 0.6178650194772821, "grad_norm": 0.10868832468986511, "learning_rate": 8.72819503021954e-06, "loss": 0.0022, "step": 36560 }, { "epoch": 0.6180340197561326, "grad_norm": 0.1556289792060852, "learning_rate": 8.727212115450462e-06, "loss": 0.0029, "step": 36570 }, { "epoch": 0.6182030200349831, "grad_norm": 0.06256671994924545, "learning_rate": 8.72622887639587e-06, "loss": 0.0023, "step": 36580 }, { "epoch": 0.6183720203138335, "grad_norm": 0.04170137643814087, "learning_rate": 8.725245313141313e-06, "loss": 0.0015, "step": 36590 }, { "epoch": 0.618541020592684, "grad_norm": 0.07276473194360733, "learning_rate": 8.724261425772362e-06, "loss": 0.0034, "step": 36600 }, { "epoch": 0.6187100208715345, "grad_norm": 0.08967496454715729, "learning_rate": 8.723277214374625e-06, "loss": 0.0022, "step": 36610 }, { "epoch": 0.6188790211503848, "grad_norm": 0.051659129559993744, "learning_rate": 8.722292679033731e-06, "loss": 0.0023, "step": 36620 }, { "epoch": 0.6190480214292353, "grad_norm": 0.04656399413943291, "learning_rate": 8.721307819835336e-06, "loss": 0.0017, "step": 36630 }, { "epoch": 0.6192170217080858, "grad_norm": 0.025773653760552406, "learning_rate": 8.720322636865132e-06, "loss": 0.0016, "step": 36640 }, { "epoch": 0.6193860219869363, "grad_norm": 0.08894651383161545, "learning_rate": 8.719337130208833e-06, "loss": 0.0028, "step": 36650 }, { "epoch": 0.6195550222657867, "grad_norm": 0.05207120627164841, "learning_rate": 8.718351299952185e-06, "loss": 0.0017, "step": 36660 }, { "epoch": 0.6197240225446372, "grad_norm": 0.1420729160308838, "learning_rate": 8.717365146180956e-06, "loss": 0.0025, "step": 36670 }, { "epoch": 0.6198930228234877, "grad_norm": 0.105345718562603, "learning_rate": 8.71637866898095e-06, "loss": 0.0026, "step": 36680 }, { "epoch": 0.6200620231023382, "grad_norm": 0.16853417456150055, "learning_rate": 8.715391868437992e-06, "loss": 0.0017, "step": 36690 }, { "epoch": 0.6202310233811886, "grad_norm": 0.07919266819953918, "learning_rate": 8.714404744637938e-06, "loss": 0.0018, "step": 36700 }, { "epoch": 0.620400023660039, "grad_norm": 0.05420541763305664, "learning_rate": 8.713417297666678e-06, "loss": 0.0035, "step": 36710 }, { "epoch": 0.6205690239388895, "grad_norm": 0.12405819445848465, "learning_rate": 8.71242952761012e-06, "loss": 0.0014, "step": 36720 }, { "epoch": 0.6207380242177399, "grad_norm": 0.14918090403079987, "learning_rate": 8.711441434554207e-06, "loss": 0.0017, "step": 36730 }, { "epoch": 0.6209070244965904, "grad_norm": 0.1024555116891861, "learning_rate": 8.710453018584906e-06, "loss": 0.0017, "step": 36740 }, { "epoch": 0.6210760247754409, "grad_norm": 0.0839439108967781, "learning_rate": 8.709464279788213e-06, "loss": 0.0018, "step": 36750 }, { "epoch": 0.6212450250542914, "grad_norm": 0.11152900755405426, "learning_rate": 8.708475218250158e-06, "loss": 0.0024, "step": 36760 }, { "epoch": 0.6214140253331418, "grad_norm": 0.0031347458716481924, "learning_rate": 8.707485834056789e-06, "loss": 0.0015, "step": 36770 }, { "epoch": 0.6215830256119923, "grad_norm": 0.1972360461950302, "learning_rate": 8.706496127294191e-06, "loss": 0.0034, "step": 36780 }, { "epoch": 0.6217520258908428, "grad_norm": 0.057392384856939316, "learning_rate": 8.705506098048468e-06, "loss": 0.0021, "step": 36790 }, { "epoch": 0.6219210261696931, "grad_norm": 0.2517576813697815, "learning_rate": 8.704515746405764e-06, "loss": 0.0029, "step": 36800 }, { "epoch": 0.6220900264485436, "grad_norm": 0.08012565225362778, "learning_rate": 8.703525072452241e-06, "loss": 0.0024, "step": 36810 }, { "epoch": 0.6222590267273941, "grad_norm": 0.023475486785173416, "learning_rate": 8.702534076274092e-06, "loss": 0.0027, "step": 36820 }, { "epoch": 0.6224280270062446, "grad_norm": 0.04848853126168251, "learning_rate": 8.701542757957539e-06, "loss": 0.0025, "step": 36830 }, { "epoch": 0.622597027285095, "grad_norm": 0.02437884360551834, "learning_rate": 8.700551117588834e-06, "loss": 0.0022, "step": 36840 }, { "epoch": 0.6227660275639455, "grad_norm": 0.07362344115972519, "learning_rate": 8.69955915525425e-06, "loss": 0.0043, "step": 36850 }, { "epoch": 0.622935027842796, "grad_norm": 0.06642599403858185, "learning_rate": 8.698566871040094e-06, "loss": 0.0022, "step": 36860 }, { "epoch": 0.6231040281216464, "grad_norm": 0.07961343228816986, "learning_rate": 8.697574265032701e-06, "loss": 0.0024, "step": 36870 }, { "epoch": 0.6232730284004968, "grad_norm": 0.05048443377017975, "learning_rate": 8.696581337318432e-06, "loss": 0.0011, "step": 36880 }, { "epoch": 0.6234420286793473, "grad_norm": 0.12879827618598938, "learning_rate": 8.695588087983676e-06, "loss": 0.0016, "step": 36890 }, { "epoch": 0.6236110289581978, "grad_norm": 0.20827510952949524, "learning_rate": 8.694594517114851e-06, "loss": 0.0023, "step": 36900 }, { "epoch": 0.6237800292370482, "grad_norm": 0.11347052454948425, "learning_rate": 8.693600624798403e-06, "loss": 0.0018, "step": 36910 }, { "epoch": 0.6239490295158987, "grad_norm": 0.17409923672676086, "learning_rate": 8.692606411120804e-06, "loss": 0.0021, "step": 36920 }, { "epoch": 0.6241180297947492, "grad_norm": 0.06025707721710205, "learning_rate": 8.691611876168556e-06, "loss": 0.0017, "step": 36930 }, { "epoch": 0.6242870300735996, "grad_norm": 0.07102333009243011, "learning_rate": 8.69061702002819e-06, "loss": 0.0019, "step": 36940 }, { "epoch": 0.6244560303524501, "grad_norm": 0.16395600140094757, "learning_rate": 8.68962184278626e-06, "loss": 0.0035, "step": 36950 }, { "epoch": 0.6246250306313006, "grad_norm": 0.09424368292093277, "learning_rate": 8.688626344529353e-06, "loss": 0.001, "step": 36960 }, { "epoch": 0.624794030910151, "grad_norm": 0.04473784193396568, "learning_rate": 8.687630525344084e-06, "loss": 0.0013, "step": 36970 }, { "epoch": 0.6249630311890014, "grad_norm": 0.06145351380109787, "learning_rate": 8.686634385317089e-06, "loss": 0.0028, "step": 36980 }, { "epoch": 0.6251320314678519, "grad_norm": 0.08815675973892212, "learning_rate": 8.685637924535044e-06, "loss": 0.0028, "step": 36990 }, { "epoch": 0.6253010317467024, "grad_norm": 0.12052954733371735, "learning_rate": 8.68464114308464e-06, "loss": 0.0023, "step": 37000 }, { "epoch": 0.6254700320255528, "grad_norm": 0.09614243358373642, "learning_rate": 8.683644041052605e-06, "loss": 0.0022, "step": 37010 }, { "epoch": 0.6256390323044033, "grad_norm": 0.07599113136529922, "learning_rate": 8.682646618525692e-06, "loss": 0.0031, "step": 37020 }, { "epoch": 0.6258080325832538, "grad_norm": 0.04539747163653374, "learning_rate": 8.681648875590678e-06, "loss": 0.001, "step": 37030 }, { "epoch": 0.6259770328621043, "grad_norm": 0.15155501663684845, "learning_rate": 8.680650812334374e-06, "loss": 0.0043, "step": 37040 }, { "epoch": 0.6261460331409546, "grad_norm": 0.012156975455582142, "learning_rate": 8.679652428843618e-06, "loss": 0.0012, "step": 37050 }, { "epoch": 0.6263150334198051, "grad_norm": 0.028095301240682602, "learning_rate": 8.678653725205271e-06, "loss": 0.0025, "step": 37060 }, { "epoch": 0.6264840336986556, "grad_norm": 0.11469139158725739, "learning_rate": 8.677654701506227e-06, "loss": 0.0028, "step": 37070 }, { "epoch": 0.626653033977506, "grad_norm": 0.09888265281915665, "learning_rate": 8.676655357833405e-06, "loss": 0.0031, "step": 37080 }, { "epoch": 0.6268220342563565, "grad_norm": 0.041696254163980484, "learning_rate": 8.675655694273753e-06, "loss": 0.0013, "step": 37090 }, { "epoch": 0.626991034535207, "grad_norm": 0.08160720765590668, "learning_rate": 8.674655710914246e-06, "loss": 0.0018, "step": 37100 }, { "epoch": 0.6271600348140575, "grad_norm": 0.050693582743406296, "learning_rate": 8.673655407841888e-06, "loss": 0.0022, "step": 37110 }, { "epoch": 0.6273290350929079, "grad_norm": 0.058973561972379684, "learning_rate": 8.672654785143712e-06, "loss": 0.0022, "step": 37120 }, { "epoch": 0.6274980353717584, "grad_norm": 0.05065792426466942, "learning_rate": 8.671653842906774e-06, "loss": 0.0022, "step": 37130 }, { "epoch": 0.6276670356506088, "grad_norm": 0.17974597215652466, "learning_rate": 8.670652581218162e-06, "loss": 0.0019, "step": 37140 }, { "epoch": 0.6278360359294592, "grad_norm": 0.0987791046500206, "learning_rate": 8.669651000164992e-06, "loss": 0.0032, "step": 37150 }, { "epoch": 0.6280050362083097, "grad_norm": 0.08784514665603638, "learning_rate": 8.668649099834404e-06, "loss": 0.0016, "step": 37160 }, { "epoch": 0.6281740364871602, "grad_norm": 0.1562742441892624, "learning_rate": 8.667646880313569e-06, "loss": 0.0037, "step": 37170 }, { "epoch": 0.6283430367660107, "grad_norm": 0.0067938147112727165, "learning_rate": 8.666644341689686e-06, "loss": 0.0016, "step": 37180 }, { "epoch": 0.6285120370448611, "grad_norm": 0.07273447513580322, "learning_rate": 8.66564148404998e-06, "loss": 0.0029, "step": 37190 }, { "epoch": 0.6286810373237116, "grad_norm": 0.11638252437114716, "learning_rate": 8.664638307481704e-06, "loss": 0.0017, "step": 37200 }, { "epoch": 0.6288500376025621, "grad_norm": 0.18697509169578552, "learning_rate": 8.66363481207214e-06, "loss": 0.0032, "step": 37210 }, { "epoch": 0.6290190378814126, "grad_norm": 0.16037853062152863, "learning_rate": 8.662630997908597e-06, "loss": 0.0024, "step": 37220 }, { "epoch": 0.6291880381602629, "grad_norm": 0.07233048975467682, "learning_rate": 8.66162686507841e-06, "loss": 0.0019, "step": 37230 }, { "epoch": 0.6293570384391134, "grad_norm": 0.1072436049580574, "learning_rate": 8.660622413668945e-06, "loss": 0.0023, "step": 37240 }, { "epoch": 0.6295260387179639, "grad_norm": 0.09999535977840424, "learning_rate": 8.659617643767595e-06, "loss": 0.0022, "step": 37250 }, { "epoch": 0.6296950389968143, "grad_norm": 0.06610429286956787, "learning_rate": 8.658612555461779e-06, "loss": 0.0023, "step": 37260 }, { "epoch": 0.6298640392756648, "grad_norm": 0.05709943547844887, "learning_rate": 8.657607148838943e-06, "loss": 0.0023, "step": 37270 }, { "epoch": 0.6300330395545153, "grad_norm": 0.040123891085386276, "learning_rate": 8.656601423986564e-06, "loss": 0.0019, "step": 37280 }, { "epoch": 0.6302020398333658, "grad_norm": 0.04873102530837059, "learning_rate": 8.655595380992144e-06, "loss": 0.0017, "step": 37290 }, { "epoch": 0.6303710401122162, "grad_norm": 0.13486704230308533, "learning_rate": 8.654589019943215e-06, "loss": 0.0027, "step": 37300 }, { "epoch": 0.6305400403910666, "grad_norm": 0.03334526717662811, "learning_rate": 8.653582340927333e-06, "loss": 0.0019, "step": 37310 }, { "epoch": 0.6307090406699171, "grad_norm": 0.2030312567949295, "learning_rate": 8.652575344032085e-06, "loss": 0.002, "step": 37320 }, { "epoch": 0.6308780409487675, "grad_norm": 0.06566629558801651, "learning_rate": 8.651568029345088e-06, "loss": 0.0031, "step": 37330 }, { "epoch": 0.631047041227618, "grad_norm": 0.13266269862651825, "learning_rate": 8.650560396953978e-06, "loss": 0.0019, "step": 37340 }, { "epoch": 0.6312160415064685, "grad_norm": 0.16177508234977722, "learning_rate": 8.649552446946426e-06, "loss": 0.0029, "step": 37350 }, { "epoch": 0.631385041785319, "grad_norm": 0.02120721898972988, "learning_rate": 8.64854417941013e-06, "loss": 0.0017, "step": 37360 }, { "epoch": 0.6315540420641694, "grad_norm": 0.06453455984592438, "learning_rate": 8.647535594432812e-06, "loss": 0.0013, "step": 37370 }, { "epoch": 0.6317230423430199, "grad_norm": 0.01956728659570217, "learning_rate": 8.646526692102224e-06, "loss": 0.0023, "step": 37380 }, { "epoch": 0.6318920426218704, "grad_norm": 0.09439927339553833, "learning_rate": 8.645517472506146e-06, "loss": 0.0019, "step": 37390 }, { "epoch": 0.6320610429007207, "grad_norm": 0.17421653866767883, "learning_rate": 8.644507935732385e-06, "loss": 0.003, "step": 37400 }, { "epoch": 0.6322300431795712, "grad_norm": 0.02730964496731758, "learning_rate": 8.643498081868778e-06, "loss": 0.0047, "step": 37410 }, { "epoch": 0.6323990434584217, "grad_norm": 0.043696578592061996, "learning_rate": 8.642487911003181e-06, "loss": 0.0024, "step": 37420 }, { "epoch": 0.6325680437372722, "grad_norm": 0.019823472946882248, "learning_rate": 8.64147742322349e-06, "loss": 0.0018, "step": 37430 }, { "epoch": 0.6327370440161226, "grad_norm": 0.013545677065849304, "learning_rate": 8.640466618617618e-06, "loss": 0.0017, "step": 37440 }, { "epoch": 0.6329060442949731, "grad_norm": 0.06836103647947311, "learning_rate": 8.639455497273512e-06, "loss": 0.0038, "step": 37450 }, { "epoch": 0.6330750445738236, "grad_norm": 0.028289545327425003, "learning_rate": 8.638444059279146e-06, "loss": 0.0014, "step": 37460 }, { "epoch": 0.633244044852674, "grad_norm": 0.04600983485579491, "learning_rate": 8.637432304722517e-06, "loss": 0.002, "step": 37470 }, { "epoch": 0.6334130451315245, "grad_norm": 0.08732181042432785, "learning_rate": 8.636420233691654e-06, "loss": 0.002, "step": 37480 }, { "epoch": 0.6335820454103749, "grad_norm": 0.045885950326919556, "learning_rate": 8.63540784627461e-06, "loss": 0.0037, "step": 37490 }, { "epoch": 0.6337510456892254, "grad_norm": 0.09269632399082184, "learning_rate": 8.634395142559471e-06, "loss": 0.0021, "step": 37500 }, { "epoch": 0.6339200459680758, "grad_norm": 0.1494239717721939, "learning_rate": 8.633382122634347e-06, "loss": 0.003, "step": 37510 }, { "epoch": 0.6340890462469263, "grad_norm": 0.05048111826181412, "learning_rate": 8.632368786587371e-06, "loss": 0.0025, "step": 37520 }, { "epoch": 0.6342580465257768, "grad_norm": 0.04324718937277794, "learning_rate": 8.631355134506713e-06, "loss": 0.0021, "step": 37530 }, { "epoch": 0.6344270468046272, "grad_norm": 0.039146389812231064, "learning_rate": 8.630341166480565e-06, "loss": 0.0012, "step": 37540 }, { "epoch": 0.6345960470834777, "grad_norm": 0.0289743822067976, "learning_rate": 8.629326882597145e-06, "loss": 0.0021, "step": 37550 }, { "epoch": 0.6347650473623282, "grad_norm": 0.08949195593595505, "learning_rate": 8.628312282944701e-06, "loss": 0.0036, "step": 37560 }, { "epoch": 0.6349340476411786, "grad_norm": 0.16662773489952087, "learning_rate": 8.62729736761151e-06, "loss": 0.0019, "step": 37570 }, { "epoch": 0.635103047920029, "grad_norm": 0.06934000551700592, "learning_rate": 8.626282136685873e-06, "loss": 0.002, "step": 37580 }, { "epoch": 0.6352720481988795, "grad_norm": 0.005404521245509386, "learning_rate": 8.625266590256121e-06, "loss": 0.0016, "step": 37590 }, { "epoch": 0.63544104847773, "grad_norm": 0.029986826702952385, "learning_rate": 8.62425072841061e-06, "loss": 0.0022, "step": 37600 }, { "epoch": 0.6356100487565804, "grad_norm": 0.06996724754571915, "learning_rate": 8.623234551237724e-06, "loss": 0.002, "step": 37610 }, { "epoch": 0.6357790490354309, "grad_norm": 0.046281080693006516, "learning_rate": 8.62221805882588e-06, "loss": 0.0014, "step": 37620 }, { "epoch": 0.6359480493142814, "grad_norm": 0.03734259307384491, "learning_rate": 8.621201251263514e-06, "loss": 0.0011, "step": 37630 }, { "epoch": 0.6361170495931319, "grad_norm": 0.04263751208782196, "learning_rate": 8.620184128639094e-06, "loss": 0.0027, "step": 37640 }, { "epoch": 0.6362860498719823, "grad_norm": 0.05749150365591049, "learning_rate": 8.619166691041114e-06, "loss": 0.0032, "step": 37650 }, { "epoch": 0.6364550501508327, "grad_norm": 0.148232102394104, "learning_rate": 8.618148938558097e-06, "loss": 0.0035, "step": 37660 }, { "epoch": 0.6366240504296832, "grad_norm": 0.01768629066646099, "learning_rate": 8.617130871278592e-06, "loss": 0.0014, "step": 37670 }, { "epoch": 0.6367930507085336, "grad_norm": 0.02394348382949829, "learning_rate": 8.616112489291177e-06, "loss": 0.0032, "step": 37680 }, { "epoch": 0.6369620509873841, "grad_norm": 0.03687208145856857, "learning_rate": 8.615093792684453e-06, "loss": 0.0016, "step": 37690 }, { "epoch": 0.6371310512662346, "grad_norm": 0.07785465568304062, "learning_rate": 8.614074781547055e-06, "loss": 0.0024, "step": 37700 }, { "epoch": 0.6373000515450851, "grad_norm": 0.067860446870327, "learning_rate": 8.61305545596764e-06, "loss": 0.0012, "step": 37710 }, { "epoch": 0.6374690518239355, "grad_norm": 0.05987097695469856, "learning_rate": 8.612035816034895e-06, "loss": 0.0023, "step": 37720 }, { "epoch": 0.637638052102786, "grad_norm": 0.06388407200574875, "learning_rate": 8.611015861837532e-06, "loss": 0.0019, "step": 37730 }, { "epoch": 0.6378070523816364, "grad_norm": 0.05277885124087334, "learning_rate": 8.609995593464294e-06, "loss": 0.0022, "step": 37740 }, { "epoch": 0.6379760526604868, "grad_norm": 0.04513770341873169, "learning_rate": 8.608975011003949e-06, "loss": 0.0017, "step": 37750 }, { "epoch": 0.6381450529393373, "grad_norm": 0.03364074230194092, "learning_rate": 8.60795411454529e-06, "loss": 0.0026, "step": 37760 }, { "epoch": 0.6383140532181878, "grad_norm": 0.09462929517030716, "learning_rate": 8.606932904177144e-06, "loss": 0.0019, "step": 37770 }, { "epoch": 0.6384830534970383, "grad_norm": 0.07569252699613571, "learning_rate": 8.605911379988359e-06, "loss": 0.0023, "step": 37780 }, { "epoch": 0.6386520537758887, "grad_norm": 0.06281714886426926, "learning_rate": 8.604889542067814e-06, "loss": 0.0017, "step": 37790 }, { "epoch": 0.6388210540547392, "grad_norm": 0.06001376360654831, "learning_rate": 8.60386739050441e-06, "loss": 0.0034, "step": 37800 }, { "epoch": 0.6389900543335897, "grad_norm": 0.02849130518734455, "learning_rate": 8.602844925387083e-06, "loss": 0.0035, "step": 37810 }, { "epoch": 0.6391590546124402, "grad_norm": 0.15602053701877594, "learning_rate": 8.601822146804792e-06, "loss": 0.0011, "step": 37820 }, { "epoch": 0.6393280548912905, "grad_norm": 0.19311657547950745, "learning_rate": 8.600799054846525e-06, "loss": 0.0024, "step": 37830 }, { "epoch": 0.639497055170141, "grad_norm": 0.05133889615535736, "learning_rate": 8.599775649601292e-06, "loss": 0.0029, "step": 37840 }, { "epoch": 0.6396660554489915, "grad_norm": 0.09188152849674225, "learning_rate": 8.598751931158136e-06, "loss": 0.0015, "step": 37850 }, { "epoch": 0.6398350557278419, "grad_norm": 0.05758042261004448, "learning_rate": 8.597727899606125e-06, "loss": 0.0029, "step": 37860 }, { "epoch": 0.6400040560066924, "grad_norm": 0.040008142590522766, "learning_rate": 8.596703555034356e-06, "loss": 0.0021, "step": 37870 }, { "epoch": 0.6401730562855429, "grad_norm": 0.06656382232904434, "learning_rate": 8.595678897531952e-06, "loss": 0.002, "step": 37880 }, { "epoch": 0.6403420565643934, "grad_norm": 0.03702574595808983, "learning_rate": 8.594653927188062e-06, "loss": 0.0017, "step": 37890 }, { "epoch": 0.6405110568432438, "grad_norm": 0.0633983165025711, "learning_rate": 8.593628644091863e-06, "loss": 0.0018, "step": 37900 }, { "epoch": 0.6406800571220943, "grad_norm": 0.0489104762673378, "learning_rate": 8.59260304833256e-06, "loss": 0.0024, "step": 37910 }, { "epoch": 0.6408490574009447, "grad_norm": 0.024015096947550774, "learning_rate": 8.591577139999387e-06, "loss": 0.0016, "step": 37920 }, { "epoch": 0.6410180576797951, "grad_norm": 0.1253378987312317, "learning_rate": 8.590550919181601e-06, "loss": 0.0025, "step": 37930 }, { "epoch": 0.6411870579586456, "grad_norm": 0.049682728946208954, "learning_rate": 8.589524385968486e-06, "loss": 0.0016, "step": 37940 }, { "epoch": 0.6413560582374961, "grad_norm": 0.04882590472698212, "learning_rate": 8.588497540449361e-06, "loss": 0.0016, "step": 37950 }, { "epoch": 0.6415250585163466, "grad_norm": 0.19600225985050201, "learning_rate": 8.587470382713562e-06, "loss": 0.0024, "step": 37960 }, { "epoch": 0.641694058795197, "grad_norm": 0.06281403452157974, "learning_rate": 8.586442912850456e-06, "loss": 0.0037, "step": 37970 }, { "epoch": 0.6418630590740475, "grad_norm": 0.2796551287174225, "learning_rate": 8.585415130949444e-06, "loss": 0.0014, "step": 37980 }, { "epoch": 0.642032059352898, "grad_norm": 0.1508016139268875, "learning_rate": 8.584387037099941e-06, "loss": 0.0036, "step": 37990 }, { "epoch": 0.6422010596317483, "grad_norm": 0.13536739349365234, "learning_rate": 8.5833586313914e-06, "loss": 0.0021, "step": 38000 }, { "epoch": 0.6423700599105988, "grad_norm": 0.024195270612835884, "learning_rate": 8.582329913913297e-06, "loss": 0.0052, "step": 38010 }, { "epoch": 0.6425390601894493, "grad_norm": 0.07989949733018875, "learning_rate": 8.581300884755133e-06, "loss": 0.0025, "step": 38020 }, { "epoch": 0.6427080604682998, "grad_norm": 0.051182933151721954, "learning_rate": 8.58027154400644e-06, "loss": 0.0026, "step": 38030 }, { "epoch": 0.6428770607471502, "grad_norm": 0.04734310135245323, "learning_rate": 8.579241891756778e-06, "loss": 0.0017, "step": 38040 }, { "epoch": 0.6430460610260007, "grad_norm": 0.03388788551092148, "learning_rate": 8.578211928095727e-06, "loss": 0.0009, "step": 38050 }, { "epoch": 0.6432150613048512, "grad_norm": 0.04434414207935333, "learning_rate": 8.577181653112904e-06, "loss": 0.0018, "step": 38060 }, { "epoch": 0.6433840615837016, "grad_norm": 0.05806152522563934, "learning_rate": 8.576151066897944e-06, "loss": 0.0019, "step": 38070 }, { "epoch": 0.6435530618625521, "grad_norm": 0.059645067900419235, "learning_rate": 8.575120169540514e-06, "loss": 0.0038, "step": 38080 }, { "epoch": 0.6437220621414025, "grad_norm": 0.05072898417711258, "learning_rate": 8.574088961130308e-06, "loss": 0.0029, "step": 38090 }, { "epoch": 0.643891062420253, "grad_norm": 0.14522813260555267, "learning_rate": 8.573057441757045e-06, "loss": 0.0019, "step": 38100 }, { "epoch": 0.6440600626991034, "grad_norm": 0.041980892419815063, "learning_rate": 8.572025611510473e-06, "loss": 0.0031, "step": 38110 }, { "epoch": 0.6442290629779539, "grad_norm": 0.09430034458637238, "learning_rate": 8.570993470480367e-06, "loss": 0.0025, "step": 38120 }, { "epoch": 0.6443980632568044, "grad_norm": 0.1635764241218567, "learning_rate": 8.569961018756526e-06, "loss": 0.0016, "step": 38130 }, { "epoch": 0.6445670635356548, "grad_norm": 0.08032064884901047, "learning_rate": 8.56892825642878e-06, "loss": 0.0042, "step": 38140 }, { "epoch": 0.6447360638145053, "grad_norm": 0.1031501516699791, "learning_rate": 8.567895183586983e-06, "loss": 0.0024, "step": 38150 }, { "epoch": 0.6449050640933558, "grad_norm": 0.09169802814722061, "learning_rate": 8.566861800321019e-06, "loss": 0.0024, "step": 38160 }, { "epoch": 0.6450740643722063, "grad_norm": 0.059827886521816254, "learning_rate": 8.565828106720797e-06, "loss": 0.0024, "step": 38170 }, { "epoch": 0.6452430646510566, "grad_norm": 0.04389607533812523, "learning_rate": 8.564794102876251e-06, "loss": 0.0013, "step": 38180 }, { "epoch": 0.6454120649299071, "grad_norm": 0.07341597229242325, "learning_rate": 8.563759788877348e-06, "loss": 0.0017, "step": 38190 }, { "epoch": 0.6455810652087576, "grad_norm": 0.062065500766038895, "learning_rate": 8.562725164814077e-06, "loss": 0.0029, "step": 38200 }, { "epoch": 0.645750065487608, "grad_norm": 0.0988144800066948, "learning_rate": 8.561690230776451e-06, "loss": 0.0023, "step": 38210 }, { "epoch": 0.6459190657664585, "grad_norm": 0.032794203609228134, "learning_rate": 8.560654986854522e-06, "loss": 0.0015, "step": 38220 }, { "epoch": 0.646088066045309, "grad_norm": 0.06741363555192947, "learning_rate": 8.559619433138357e-06, "loss": 0.0037, "step": 38230 }, { "epoch": 0.6462570663241595, "grad_norm": 0.5312454700469971, "learning_rate": 8.558583569718053e-06, "loss": 0.0019, "step": 38240 }, { "epoch": 0.6464260666030099, "grad_norm": 0.27058297395706177, "learning_rate": 8.557547396683738e-06, "loss": 0.0015, "step": 38250 }, { "epoch": 0.6465950668818603, "grad_norm": 0.0890686959028244, "learning_rate": 8.556510914125562e-06, "loss": 0.0023, "step": 38260 }, { "epoch": 0.6467640671607108, "grad_norm": 0.081411212682724, "learning_rate": 8.555474122133706e-06, "loss": 0.0028, "step": 38270 }, { "epoch": 0.6469330674395612, "grad_norm": 0.017591990530490875, "learning_rate": 8.554437020798374e-06, "loss": 0.001, "step": 38280 }, { "epoch": 0.6471020677184117, "grad_norm": 0.042150210589170456, "learning_rate": 8.553399610209798e-06, "loss": 0.0012, "step": 38290 }, { "epoch": 0.6472710679972622, "grad_norm": 0.09767927974462509, "learning_rate": 8.552361890458242e-06, "loss": 0.0016, "step": 38300 }, { "epoch": 0.6474400682761127, "grad_norm": 0.27567175030708313, "learning_rate": 8.55132386163399e-06, "loss": 0.002, "step": 38310 }, { "epoch": 0.6476090685549631, "grad_norm": 0.026824578642845154, "learning_rate": 8.550285523827352e-06, "loss": 0.0023, "step": 38320 }, { "epoch": 0.6477780688338136, "grad_norm": 0.14262062311172485, "learning_rate": 8.549246877128674e-06, "loss": 0.0033, "step": 38330 }, { "epoch": 0.6479470691126641, "grad_norm": 0.08674229681491852, "learning_rate": 8.548207921628321e-06, "loss": 0.0018, "step": 38340 }, { "epoch": 0.6481160693915144, "grad_norm": 0.03979768604040146, "learning_rate": 8.547168657416688e-06, "loss": 0.0018, "step": 38350 }, { "epoch": 0.6482850696703649, "grad_norm": 0.02881024219095707, "learning_rate": 8.546129084584196e-06, "loss": 0.0015, "step": 38360 }, { "epoch": 0.6484540699492154, "grad_norm": 0.03815798461437225, "learning_rate": 8.54508920322129e-06, "loss": 0.0019, "step": 38370 }, { "epoch": 0.6486230702280659, "grad_norm": 0.04694780707359314, "learning_rate": 8.544049013418447e-06, "loss": 0.0017, "step": 38380 }, { "epoch": 0.6487920705069163, "grad_norm": 0.12879303097724915, "learning_rate": 8.54300851526617e-06, "loss": 0.002, "step": 38390 }, { "epoch": 0.6489610707857668, "grad_norm": 0.0995473712682724, "learning_rate": 8.541967708854986e-06, "loss": 0.0015, "step": 38400 }, { "epoch": 0.6491300710646173, "grad_norm": 0.17174296081066132, "learning_rate": 8.54092659427545e-06, "loss": 0.0035, "step": 38410 }, { "epoch": 0.6492990713434678, "grad_norm": 0.017600564286112785, "learning_rate": 8.539885171618143e-06, "loss": 0.0019, "step": 38420 }, { "epoch": 0.6494680716223181, "grad_norm": 0.1489916890859604, "learning_rate": 8.538843440973677e-06, "loss": 0.0023, "step": 38430 }, { "epoch": 0.6496370719011686, "grad_norm": 0.008243445307016373, "learning_rate": 8.537801402432684e-06, "loss": 0.002, "step": 38440 }, { "epoch": 0.6498060721800191, "grad_norm": 0.10142067819833755, "learning_rate": 8.536759056085828e-06, "loss": 0.002, "step": 38450 }, { "epoch": 0.6499750724588695, "grad_norm": 0.07499175518751144, "learning_rate": 8.535716402023798e-06, "loss": 0.0038, "step": 38460 }, { "epoch": 0.65014407273772, "grad_norm": 0.026488911360502243, "learning_rate": 8.53467344033731e-06, "loss": 0.0025, "step": 38470 }, { "epoch": 0.6503130730165705, "grad_norm": 0.05331513658165932, "learning_rate": 8.533630171117108e-06, "loss": 0.0022, "step": 38480 }, { "epoch": 0.650482073295421, "grad_norm": 0.09920284897089005, "learning_rate": 8.53258659445396e-06, "loss": 0.0019, "step": 38490 }, { "epoch": 0.6506510735742714, "grad_norm": 0.03672245889902115, "learning_rate": 8.531542710438662e-06, "loss": 0.0028, "step": 38500 }, { "epoch": 0.6508200738531219, "grad_norm": 0.16491785645484924, "learning_rate": 8.530498519162037e-06, "loss": 0.0082, "step": 38510 }, { "epoch": 0.6509890741319723, "grad_norm": 0.11625287681818008, "learning_rate": 8.529454020714936e-06, "loss": 0.0019, "step": 38520 }, { "epoch": 0.6511580744108227, "grad_norm": 0.09387943893671036, "learning_rate": 8.528409215188233e-06, "loss": 0.0018, "step": 38530 }, { "epoch": 0.6513270746896732, "grad_norm": 0.12414376437664032, "learning_rate": 8.527364102672835e-06, "loss": 0.005, "step": 38540 }, { "epoch": 0.6514960749685237, "grad_norm": 0.10484426468610764, "learning_rate": 8.526318683259668e-06, "loss": 0.0026, "step": 38550 }, { "epoch": 0.6516650752473742, "grad_norm": 0.035917624831199646, "learning_rate": 8.525272957039692e-06, "loss": 0.0025, "step": 38560 }, { "epoch": 0.6518340755262246, "grad_norm": 0.0756058618426323, "learning_rate": 8.524226924103887e-06, "loss": 0.0024, "step": 38570 }, { "epoch": 0.6520030758050751, "grad_norm": 0.04967297613620758, "learning_rate": 8.523180584543265e-06, "loss": 0.0022, "step": 38580 }, { "epoch": 0.6521720760839256, "grad_norm": 0.0803271010518074, "learning_rate": 8.52213393844886e-06, "loss": 0.003, "step": 38590 }, { "epoch": 0.652341076362776, "grad_norm": 0.0883374959230423, "learning_rate": 8.52108698591174e-06, "loss": 0.0023, "step": 38600 }, { "epoch": 0.6525100766416264, "grad_norm": 0.05122964084148407, "learning_rate": 8.52003972702299e-06, "loss": 0.0021, "step": 38610 }, { "epoch": 0.6526790769204769, "grad_norm": 0.02245727740228176, "learning_rate": 8.51899216187373e-06, "loss": 0.0012, "step": 38620 }, { "epoch": 0.6528480771993274, "grad_norm": 0.21799735724925995, "learning_rate": 8.517944290555102e-06, "loss": 0.0026, "step": 38630 }, { "epoch": 0.6530170774781778, "grad_norm": 0.020439540967345238, "learning_rate": 8.516896113158274e-06, "loss": 0.0016, "step": 38640 }, { "epoch": 0.6531860777570283, "grad_norm": 0.029102226719260216, "learning_rate": 8.515847629774445e-06, "loss": 0.0025, "step": 38650 }, { "epoch": 0.6533550780358788, "grad_norm": 0.049179356545209885, "learning_rate": 8.51479884049484e-06, "loss": 0.0024, "step": 38660 }, { "epoch": 0.6535240783147293, "grad_norm": 0.22065278887748718, "learning_rate": 8.513749745410705e-06, "loss": 0.0038, "step": 38670 }, { "epoch": 0.6536930785935797, "grad_norm": 0.046514589339494705, "learning_rate": 8.512700344613316e-06, "loss": 0.0025, "step": 38680 }, { "epoch": 0.6538620788724301, "grad_norm": 0.022628309205174446, "learning_rate": 8.51165063819398e-06, "loss": 0.0017, "step": 38690 }, { "epoch": 0.6540310791512806, "grad_norm": 0.150055930018425, "learning_rate": 8.510600626244024e-06, "loss": 0.0017, "step": 38700 }, { "epoch": 0.654200079430131, "grad_norm": 0.1109904795885086, "learning_rate": 8.509550308854801e-06, "loss": 0.0022, "step": 38710 }, { "epoch": 0.6543690797089815, "grad_norm": 0.0615401454269886, "learning_rate": 8.5084996861177e-06, "loss": 0.0013, "step": 38720 }, { "epoch": 0.654538079987832, "grad_norm": 0.01564824767410755, "learning_rate": 8.507448758124126e-06, "loss": 0.0019, "step": 38730 }, { "epoch": 0.6547070802666825, "grad_norm": 0.07921988517045975, "learning_rate": 8.506397524965517e-06, "loss": 0.0022, "step": 38740 }, { "epoch": 0.6548760805455329, "grad_norm": 0.11442530155181885, "learning_rate": 8.505345986733335e-06, "loss": 0.0018, "step": 38750 }, { "epoch": 0.6550450808243834, "grad_norm": 0.1136259213089943, "learning_rate": 8.504294143519067e-06, "loss": 0.0033, "step": 38760 }, { "epoch": 0.6552140811032339, "grad_norm": 0.1569293588399887, "learning_rate": 8.50324199541423e-06, "loss": 0.0033, "step": 38770 }, { "epoch": 0.6553830813820842, "grad_norm": 0.044599708169698715, "learning_rate": 8.502189542510365e-06, "loss": 0.0029, "step": 38780 }, { "epoch": 0.6555520816609347, "grad_norm": 0.09630095213651657, "learning_rate": 8.501136784899043e-06, "loss": 0.0021, "step": 38790 }, { "epoch": 0.6557210819397852, "grad_norm": 0.030088450759649277, "learning_rate": 8.500083722671857e-06, "loss": 0.002, "step": 38800 }, { "epoch": 0.6558900822186357, "grad_norm": 0.075492262840271, "learning_rate": 8.499030355920429e-06, "loss": 0.0021, "step": 38810 }, { "epoch": 0.6560590824974861, "grad_norm": 0.012088959105312824, "learning_rate": 8.497976684736407e-06, "loss": 0.0009, "step": 38820 }, { "epoch": 0.6562280827763366, "grad_norm": 0.06977924704551697, "learning_rate": 8.496922709211464e-06, "loss": 0.0077, "step": 38830 }, { "epoch": 0.6563970830551871, "grad_norm": 0.06464297324419022, "learning_rate": 8.495868429437302e-06, "loss": 0.0021, "step": 38840 }, { "epoch": 0.6565660833340375, "grad_norm": 0.13068783283233643, "learning_rate": 8.49481384550565e-06, "loss": 0.0034, "step": 38850 }, { "epoch": 0.656735083612888, "grad_norm": 0.0852997675538063, "learning_rate": 8.49375895750826e-06, "loss": 0.0024, "step": 38860 }, { "epoch": 0.6569040838917384, "grad_norm": 0.08365332335233688, "learning_rate": 8.492703765536913e-06, "loss": 0.0013, "step": 38870 }, { "epoch": 0.6570730841705889, "grad_norm": 0.028204182162880898, "learning_rate": 8.491648269683416e-06, "loss": 0.0023, "step": 38880 }, { "epoch": 0.6572420844494393, "grad_norm": 0.13570831716060638, "learning_rate": 8.490592470039605e-06, "loss": 0.0024, "step": 38890 }, { "epoch": 0.6574110847282898, "grad_norm": 0.07442338764667511, "learning_rate": 8.489536366697333e-06, "loss": 0.0037, "step": 38900 }, { "epoch": 0.6575800850071403, "grad_norm": 0.2090086191892624, "learning_rate": 8.48847995974849e-06, "loss": 0.0027, "step": 38910 }, { "epoch": 0.6577490852859907, "grad_norm": 0.06681293994188309, "learning_rate": 8.487423249284989e-06, "loss": 0.0014, "step": 38920 }, { "epoch": 0.6579180855648412, "grad_norm": 0.10129017382860184, "learning_rate": 8.486366235398771e-06, "loss": 0.0032, "step": 38930 }, { "epoch": 0.6580870858436917, "grad_norm": 0.09688436985015869, "learning_rate": 8.485308918181796e-06, "loss": 0.0026, "step": 38940 }, { "epoch": 0.658256086122542, "grad_norm": 0.08046122640371323, "learning_rate": 8.484251297726059e-06, "loss": 0.0017, "step": 38950 }, { "epoch": 0.6584250864013925, "grad_norm": 0.04880860820412636, "learning_rate": 8.483193374123576e-06, "loss": 0.0013, "step": 38960 }, { "epoch": 0.658594086680243, "grad_norm": 0.07618697732686996, "learning_rate": 8.482135147466395e-06, "loss": 0.001, "step": 38970 }, { "epoch": 0.6587630869590935, "grad_norm": 0.022066382691264153, "learning_rate": 8.481076617846586e-06, "loss": 0.0015, "step": 38980 }, { "epoch": 0.6589320872379439, "grad_norm": 0.030474252998828888, "learning_rate": 8.480017785356243e-06, "loss": 0.0024, "step": 38990 }, { "epoch": 0.6591010875167944, "grad_norm": 0.057772617787122726, "learning_rate": 8.478958650087492e-06, "loss": 0.0032, "step": 39000 }, { "epoch": 0.6592700877956449, "grad_norm": 0.023182455450296402, "learning_rate": 8.477899212132483e-06, "loss": 0.0018, "step": 39010 }, { "epoch": 0.6594390880744954, "grad_norm": 0.03691640868782997, "learning_rate": 8.476839471583391e-06, "loss": 0.0011, "step": 39020 }, { "epoch": 0.6596080883533458, "grad_norm": 0.08493918925523758, "learning_rate": 8.475779428532418e-06, "loss": 0.0027, "step": 39030 }, { "epoch": 0.6597770886321962, "grad_norm": 0.05697249621152878, "learning_rate": 8.474719083071796e-06, "loss": 0.0014, "step": 39040 }, { "epoch": 0.6599460889110467, "grad_norm": 0.0171417985111475, "learning_rate": 8.473658435293779e-06, "loss": 0.0031, "step": 39050 }, { "epoch": 0.6601150891898971, "grad_norm": 0.3440181314945221, "learning_rate": 8.472597485290647e-06, "loss": 0.0017, "step": 39060 }, { "epoch": 0.6602840894687476, "grad_norm": 0.03360786661505699, "learning_rate": 8.47153623315471e-06, "loss": 0.0023, "step": 39070 }, { "epoch": 0.6604530897475981, "grad_norm": 0.037310730665922165, "learning_rate": 8.470474678978297e-06, "loss": 0.0024, "step": 39080 }, { "epoch": 0.6606220900264486, "grad_norm": 0.06788239628076553, "learning_rate": 8.469412822853775e-06, "loss": 0.0027, "step": 39090 }, { "epoch": 0.660791090305299, "grad_norm": 0.11595677584409714, "learning_rate": 8.468350664873526e-06, "loss": 0.0029, "step": 39100 }, { "epoch": 0.6609600905841495, "grad_norm": 0.020321834832429886, "learning_rate": 8.467288205129967e-06, "loss": 0.0025, "step": 39110 }, { "epoch": 0.6611290908629999, "grad_norm": 0.008805109187960625, "learning_rate": 8.466225443715535e-06, "loss": 0.0025, "step": 39120 }, { "epoch": 0.6612980911418503, "grad_norm": 0.08032660186290741, "learning_rate": 8.465162380722693e-06, "loss": 0.002, "step": 39130 }, { "epoch": 0.6614670914207008, "grad_norm": 0.1384652853012085, "learning_rate": 8.464099016243933e-06, "loss": 0.0025, "step": 39140 }, { "epoch": 0.6616360916995513, "grad_norm": 0.06193441525101662, "learning_rate": 8.46303535037178e-06, "loss": 0.0022, "step": 39150 }, { "epoch": 0.6618050919784018, "grad_norm": 0.035007353872060776, "learning_rate": 8.461971383198768e-06, "loss": 0.0031, "step": 39160 }, { "epoch": 0.6619740922572522, "grad_norm": 0.0404779352247715, "learning_rate": 8.460907114817474e-06, "loss": 0.0026, "step": 39170 }, { "epoch": 0.6621430925361027, "grad_norm": 0.055109139531850815, "learning_rate": 8.459842545320492e-06, "loss": 0.0017, "step": 39180 }, { "epoch": 0.6623120928149532, "grad_norm": 0.013876045122742653, "learning_rate": 8.458777674800446e-06, "loss": 0.0016, "step": 39190 }, { "epoch": 0.6624810930938037, "grad_norm": 0.11018216609954834, "learning_rate": 8.457712503349984e-06, "loss": 0.0047, "step": 39200 }, { "epoch": 0.662650093372654, "grad_norm": 0.15153026580810547, "learning_rate": 8.45664703106178e-06, "loss": 0.0027, "step": 39210 }, { "epoch": 0.6628190936515045, "grad_norm": 0.08052218705415726, "learning_rate": 8.455581258028539e-06, "loss": 0.0029, "step": 39220 }, { "epoch": 0.662988093930355, "grad_norm": 0.05759026110172272, "learning_rate": 8.454515184342983e-06, "loss": 0.0022, "step": 39230 }, { "epoch": 0.6631570942092054, "grad_norm": 0.08084557205438614, "learning_rate": 8.453448810097871e-06, "loss": 0.002, "step": 39240 }, { "epoch": 0.6633260944880559, "grad_norm": 0.10249501466751099, "learning_rate": 8.452382135385978e-06, "loss": 0.0024, "step": 39250 }, { "epoch": 0.6634950947669064, "grad_norm": 0.13480144739151, "learning_rate": 8.451315160300114e-06, "loss": 0.002, "step": 39260 }, { "epoch": 0.6636640950457569, "grad_norm": 0.013405361212790012, "learning_rate": 8.450247884933107e-06, "loss": 0.0019, "step": 39270 }, { "epoch": 0.6638330953246073, "grad_norm": 0.0013330960646271706, "learning_rate": 8.449180309377817e-06, "loss": 0.0009, "step": 39280 }, { "epoch": 0.6640020956034578, "grad_norm": 0.17702990770339966, "learning_rate": 8.44811243372713e-06, "loss": 0.0023, "step": 39290 }, { "epoch": 0.6641710958823082, "grad_norm": 0.15253756940364838, "learning_rate": 8.447044258073955e-06, "loss": 0.0034, "step": 39300 }, { "epoch": 0.6643400961611586, "grad_norm": 0.15157701075077057, "learning_rate": 8.445975782511227e-06, "loss": 0.0015, "step": 39310 }, { "epoch": 0.6645090964400091, "grad_norm": 0.08215749263763428, "learning_rate": 8.444907007131911e-06, "loss": 0.0026, "step": 39320 }, { "epoch": 0.6646780967188596, "grad_norm": 0.13002920150756836, "learning_rate": 8.443837932028995e-06, "loss": 0.0014, "step": 39330 }, { "epoch": 0.66484709699771, "grad_norm": 0.09273547679185867, "learning_rate": 8.442768557295491e-06, "loss": 0.0012, "step": 39340 }, { "epoch": 0.6650160972765605, "grad_norm": 0.059002723544836044, "learning_rate": 8.441698883024443e-06, "loss": 0.0041, "step": 39350 }, { "epoch": 0.665185097555411, "grad_norm": 0.030401628464460373, "learning_rate": 8.440628909308917e-06, "loss": 0.0038, "step": 39360 }, { "epoch": 0.6653540978342615, "grad_norm": 0.06252733618021011, "learning_rate": 8.439558636242005e-06, "loss": 0.0027, "step": 39370 }, { "epoch": 0.6655230981131118, "grad_norm": 0.04471417888998985, "learning_rate": 8.438488063916826e-06, "loss": 0.0013, "step": 39380 }, { "epoch": 0.6656920983919623, "grad_norm": 0.12027346342802048, "learning_rate": 8.437417192426527e-06, "loss": 0.0021, "step": 39390 }, { "epoch": 0.6658610986708128, "grad_norm": 0.21675223112106323, "learning_rate": 8.436346021864277e-06, "loss": 0.0024, "step": 39400 }, { "epoch": 0.6660300989496633, "grad_norm": 0.14156877994537354, "learning_rate": 8.435274552323274e-06, "loss": 0.0026, "step": 39410 }, { "epoch": 0.6661990992285137, "grad_norm": 0.0528615340590477, "learning_rate": 8.43420278389674e-06, "loss": 0.0023, "step": 39420 }, { "epoch": 0.6663680995073642, "grad_norm": 0.040200572460889816, "learning_rate": 8.433130716677923e-06, "loss": 0.0021, "step": 39430 }, { "epoch": 0.6665370997862147, "grad_norm": 0.04540213569998741, "learning_rate": 8.432058350760103e-06, "loss": 0.0008, "step": 39440 }, { "epoch": 0.6667061000650651, "grad_norm": 0.06027652695775032, "learning_rate": 8.430985686236577e-06, "loss": 0.0039, "step": 39450 }, { "epoch": 0.6668751003439156, "grad_norm": 0.060570668429136276, "learning_rate": 8.429912723200672e-06, "loss": 0.0016, "step": 39460 }, { "epoch": 0.667044100622766, "grad_norm": 0.05765363201498985, "learning_rate": 8.428839461745742e-06, "loss": 0.002, "step": 39470 }, { "epoch": 0.6672131009016165, "grad_norm": 0.07702869176864624, "learning_rate": 8.427765901965165e-06, "loss": 0.0019, "step": 39480 }, { "epoch": 0.6673821011804669, "grad_norm": 0.10805605351924896, "learning_rate": 8.42669204395235e-06, "loss": 0.0022, "step": 39490 }, { "epoch": 0.6675511014593174, "grad_norm": 0.06277460604906082, "learning_rate": 8.425617887800722e-06, "loss": 0.0018, "step": 39500 }, { "epoch": 0.6677201017381679, "grad_norm": 0.0981348529458046, "learning_rate": 8.42454343360374e-06, "loss": 0.0032, "step": 39510 }, { "epoch": 0.6678891020170183, "grad_norm": 0.06740453839302063, "learning_rate": 8.423468681454886e-06, "loss": 0.0024, "step": 39520 }, { "epoch": 0.6680581022958688, "grad_norm": 0.029674388468265533, "learning_rate": 8.42239363144767e-06, "loss": 0.0024, "step": 39530 }, { "epoch": 0.6682271025747193, "grad_norm": 0.3581937551498413, "learning_rate": 8.421318283675628e-06, "loss": 0.0019, "step": 39540 }, { "epoch": 0.6683961028535698, "grad_norm": 0.13029687106609344, "learning_rate": 8.420242638232318e-06, "loss": 0.0018, "step": 39550 }, { "epoch": 0.6685651031324201, "grad_norm": 0.0804767906665802, "learning_rate": 8.419166695211325e-06, "loss": 0.0011, "step": 39560 }, { "epoch": 0.6687341034112706, "grad_norm": 0.0854073241353035, "learning_rate": 8.418090454706267e-06, "loss": 0.0018, "step": 39570 }, { "epoch": 0.6689031036901211, "grad_norm": 0.1427302360534668, "learning_rate": 8.417013916810774e-06, "loss": 0.0017, "step": 39580 }, { "epoch": 0.6690721039689715, "grad_norm": 0.13676531612873077, "learning_rate": 8.415937081618519e-06, "loss": 0.0013, "step": 39590 }, { "epoch": 0.669241104247822, "grad_norm": 0.03680437058210373, "learning_rate": 8.414859949223184e-06, "loss": 0.0014, "step": 39600 }, { "epoch": 0.6694101045266725, "grad_norm": 0.23977738618850708, "learning_rate": 8.41378251971849e-06, "loss": 0.0037, "step": 39610 }, { "epoch": 0.669579104805523, "grad_norm": 0.18916615843772888, "learning_rate": 8.412704793198175e-06, "loss": 0.0029, "step": 39620 }, { "epoch": 0.6697481050843734, "grad_norm": 0.10192802548408508, "learning_rate": 8.41162676975601e-06, "loss": 0.0025, "step": 39630 }, { "epoch": 0.6699171053632238, "grad_norm": 0.07449067384004593, "learning_rate": 8.410548449485785e-06, "loss": 0.0027, "step": 39640 }, { "epoch": 0.6700861056420743, "grad_norm": 0.03700610250234604, "learning_rate": 8.40946983248132e-06, "loss": 0.0029, "step": 39650 }, { "epoch": 0.6702551059209247, "grad_norm": 0.07545451074838638, "learning_rate": 8.40839091883646e-06, "loss": 0.0015, "step": 39660 }, { "epoch": 0.6704241061997752, "grad_norm": 0.01644122786819935, "learning_rate": 8.407311708645075e-06, "loss": 0.0016, "step": 39670 }, { "epoch": 0.6705931064786257, "grad_norm": 0.044411513954401016, "learning_rate": 8.406232202001066e-06, "loss": 0.002, "step": 39680 }, { "epoch": 0.6707621067574762, "grad_norm": 0.04547111690044403, "learning_rate": 8.40515239899835e-06, "loss": 0.0026, "step": 39690 }, { "epoch": 0.6709311070363266, "grad_norm": 0.14344197511672974, "learning_rate": 8.404072299730877e-06, "loss": 0.0024, "step": 39700 }, { "epoch": 0.6711001073151771, "grad_norm": 0.032576240599155426, "learning_rate": 8.402991904292621e-06, "loss": 0.001, "step": 39710 }, { "epoch": 0.6712691075940276, "grad_norm": 0.11407008767127991, "learning_rate": 8.401911212777583e-06, "loss": 0.0022, "step": 39720 }, { "epoch": 0.6714381078728779, "grad_norm": 0.02802203968167305, "learning_rate": 8.400830225279784e-06, "loss": 0.0026, "step": 39730 }, { "epoch": 0.6716071081517284, "grad_norm": 0.16928566992282867, "learning_rate": 8.39974894189328e-06, "loss": 0.0019, "step": 39740 }, { "epoch": 0.6717761084305789, "grad_norm": 0.08227849751710892, "learning_rate": 8.398667362712145e-06, "loss": 0.0026, "step": 39750 }, { "epoch": 0.6719451087094294, "grad_norm": 0.06275780498981476, "learning_rate": 8.397585487830482e-06, "loss": 0.0027, "step": 39760 }, { "epoch": 0.6721141089882798, "grad_norm": 0.14453285932540894, "learning_rate": 8.396503317342422e-06, "loss": 0.0021, "step": 39770 }, { "epoch": 0.6722831092671303, "grad_norm": 0.09584707766771317, "learning_rate": 8.395420851342115e-06, "loss": 0.0027, "step": 39780 }, { "epoch": 0.6724521095459808, "grad_norm": 0.07883055508136749, "learning_rate": 8.394338089923744e-06, "loss": 0.002, "step": 39790 }, { "epoch": 0.6726211098248313, "grad_norm": 0.12426584213972092, "learning_rate": 8.393255033181511e-06, "loss": 0.0021, "step": 39800 }, { "epoch": 0.6727901101036816, "grad_norm": 0.16997219622135162, "learning_rate": 8.39217168120965e-06, "loss": 0.001, "step": 39810 }, { "epoch": 0.6729591103825321, "grad_norm": 0.09920654445886612, "learning_rate": 8.39108803410242e-06, "loss": 0.0019, "step": 39820 }, { "epoch": 0.6731281106613826, "grad_norm": 0.06657091528177261, "learning_rate": 8.390004091954099e-06, "loss": 0.0022, "step": 39830 }, { "epoch": 0.673297110940233, "grad_norm": 0.087304025888443, "learning_rate": 8.388919854858997e-06, "loss": 0.0015, "step": 39840 }, { "epoch": 0.6734661112190835, "grad_norm": 0.09787190705537796, "learning_rate": 8.387835322911449e-06, "loss": 0.0026, "step": 39850 }, { "epoch": 0.673635111497934, "grad_norm": 0.08263973146677017, "learning_rate": 8.386750496205811e-06, "loss": 0.0018, "step": 39860 }, { "epoch": 0.6738041117767845, "grad_norm": 0.12377431243658066, "learning_rate": 8.385665374836473e-06, "loss": 0.0014, "step": 39870 }, { "epoch": 0.6739731120556349, "grad_norm": 0.06167079880833626, "learning_rate": 8.384579958897843e-06, "loss": 0.0016, "step": 39880 }, { "epoch": 0.6741421123344854, "grad_norm": 0.03726949170231819, "learning_rate": 8.383494248484356e-06, "loss": 0.002, "step": 39890 }, { "epoch": 0.6743111126133358, "grad_norm": 0.19137848913669586, "learning_rate": 8.38240824369048e-06, "loss": 0.0027, "step": 39900 }, { "epoch": 0.6744801128921862, "grad_norm": 0.033898577094078064, "learning_rate": 8.381321944610693e-06, "loss": 0.0024, "step": 39910 }, { "epoch": 0.6746491131710367, "grad_norm": 0.07841331511735916, "learning_rate": 8.380235351339518e-06, "loss": 0.0022, "step": 39920 }, { "epoch": 0.6748181134498872, "grad_norm": 0.08964890986680984, "learning_rate": 8.37914846397149e-06, "loss": 0.002, "step": 39930 }, { "epoch": 0.6749871137287377, "grad_norm": 0.013514422811567783, "learning_rate": 8.378061282601171e-06, "loss": 0.0007, "step": 39940 }, { "epoch": 0.6751561140075881, "grad_norm": 0.144585520029068, "learning_rate": 8.376973807323154e-06, "loss": 0.0025, "step": 39950 }, { "epoch": 0.6753251142864386, "grad_norm": 0.14480367302894592, "learning_rate": 8.375886038232056e-06, "loss": 0.0012, "step": 39960 }, { "epoch": 0.6754941145652891, "grad_norm": 0.07895231246948242, "learning_rate": 8.374797975422513e-06, "loss": 0.0019, "step": 39970 }, { "epoch": 0.6756631148441395, "grad_norm": 0.08148206025362015, "learning_rate": 8.373709618989196e-06, "loss": 0.0031, "step": 39980 }, { "epoch": 0.6758321151229899, "grad_norm": 0.040260862559080124, "learning_rate": 8.372620969026798e-06, "loss": 0.0027, "step": 39990 }, { "epoch": 0.6760011154018404, "grad_norm": 0.12580999732017517, "learning_rate": 8.371532025630033e-06, "loss": 0.0032, "step": 40000 }, { "epoch": 0.6761701156806909, "grad_norm": 0.0271195899695158, "learning_rate": 8.370442788893646e-06, "loss": 0.0027, "step": 40010 }, { "epoch": 0.6763391159595413, "grad_norm": 0.18015502393245697, "learning_rate": 8.369353258912408e-06, "loss": 0.0036, "step": 40020 }, { "epoch": 0.6765081162383918, "grad_norm": 0.06827221810817719, "learning_rate": 8.36826343578111e-06, "loss": 0.0054, "step": 40030 }, { "epoch": 0.6766771165172423, "grad_norm": 0.00251463963650167, "learning_rate": 8.367173319594575e-06, "loss": 0.0017, "step": 40040 }, { "epoch": 0.6768461167960927, "grad_norm": 0.025977712124586105, "learning_rate": 8.366082910447646e-06, "loss": 0.0023, "step": 40050 }, { "epoch": 0.6770151170749432, "grad_norm": 0.04994544759392738, "learning_rate": 8.364992208435195e-06, "loss": 0.0019, "step": 40060 }, { "epoch": 0.6771841173537936, "grad_norm": 0.45482775568962097, "learning_rate": 8.363901213652119e-06, "loss": 0.0025, "step": 40070 }, { "epoch": 0.677353117632644, "grad_norm": 0.060207583010196686, "learning_rate": 8.362809926193337e-06, "loss": 0.0013, "step": 40080 }, { "epoch": 0.6775221179114945, "grad_norm": 0.13387027382850647, "learning_rate": 8.3617183461538e-06, "loss": 0.003, "step": 40090 }, { "epoch": 0.677691118190345, "grad_norm": 0.09276372194290161, "learning_rate": 8.360626473628476e-06, "loss": 0.0019, "step": 40100 }, { "epoch": 0.6778601184691955, "grad_norm": 0.05634180083870888, "learning_rate": 8.359534308712369e-06, "loss": 0.0018, "step": 40110 }, { "epoch": 0.678029118748046, "grad_norm": 0.07601413130760193, "learning_rate": 8.358441851500499e-06, "loss": 0.002, "step": 40120 }, { "epoch": 0.6781981190268964, "grad_norm": 0.04013429954648018, "learning_rate": 8.357349102087915e-06, "loss": 0.0013, "step": 40130 }, { "epoch": 0.6783671193057469, "grad_norm": 0.13309259712696075, "learning_rate": 8.356256060569694e-06, "loss": 0.0012, "step": 40140 }, { "epoch": 0.6785361195845974, "grad_norm": 0.05572674050927162, "learning_rate": 8.355162727040934e-06, "loss": 0.0022, "step": 40150 }, { "epoch": 0.6787051198634477, "grad_norm": 0.0784897431731224, "learning_rate": 8.35406910159676e-06, "loss": 0.004, "step": 40160 }, { "epoch": 0.6788741201422982, "grad_norm": 0.06939193606376648, "learning_rate": 8.352975184332324e-06, "loss": 0.0026, "step": 40170 }, { "epoch": 0.6790431204211487, "grad_norm": 0.11883770674467087, "learning_rate": 8.351880975342802e-06, "loss": 0.0019, "step": 40180 }, { "epoch": 0.6792121206999991, "grad_norm": 0.04164041578769684, "learning_rate": 8.350786474723393e-06, "loss": 0.0009, "step": 40190 }, { "epoch": 0.6793811209788496, "grad_norm": 0.06501810997724533, "learning_rate": 8.349691682569325e-06, "loss": 0.0019, "step": 40200 }, { "epoch": 0.6795501212577001, "grad_norm": 0.1461230218410492, "learning_rate": 8.348596598975853e-06, "loss": 0.0015, "step": 40210 }, { "epoch": 0.6797191215365506, "grad_norm": 0.05555432662367821, "learning_rate": 8.347501224038253e-06, "loss": 0.0014, "step": 40220 }, { "epoch": 0.679888121815401, "grad_norm": 0.028255827724933624, "learning_rate": 8.346405557851827e-06, "loss": 0.0011, "step": 40230 }, { "epoch": 0.6800571220942515, "grad_norm": 0.06110149621963501, "learning_rate": 8.345309600511903e-06, "loss": 0.0032, "step": 40240 }, { "epoch": 0.6802261223731019, "grad_norm": 0.06957794725894928, "learning_rate": 8.344213352113835e-06, "loss": 0.0025, "step": 40250 }, { "epoch": 0.6803951226519523, "grad_norm": 0.027158791199326515, "learning_rate": 8.343116812753004e-06, "loss": 0.0027, "step": 40260 }, { "epoch": 0.6805641229308028, "grad_norm": 0.07340021431446075, "learning_rate": 8.342019982524811e-06, "loss": 0.0019, "step": 40270 }, { "epoch": 0.6807331232096533, "grad_norm": 0.06970573961734772, "learning_rate": 8.340922861524687e-06, "loss": 0.0013, "step": 40280 }, { "epoch": 0.6809021234885038, "grad_norm": 0.4278048574924469, "learning_rate": 8.33982544984809e-06, "loss": 0.0011, "step": 40290 }, { "epoch": 0.6810711237673542, "grad_norm": 0.10096825659275055, "learning_rate": 8.338727747590494e-06, "loss": 0.0017, "step": 40300 }, { "epoch": 0.6812401240462047, "grad_norm": 0.027060270309448242, "learning_rate": 8.337629754847408e-06, "loss": 0.0014, "step": 40310 }, { "epoch": 0.6814091243250552, "grad_norm": 0.0714111328125, "learning_rate": 8.336531471714361e-06, "loss": 0.0021, "step": 40320 }, { "epoch": 0.6815781246039055, "grad_norm": 0.07323069870471954, "learning_rate": 8.335432898286913e-06, "loss": 0.0027, "step": 40330 }, { "epoch": 0.681747124882756, "grad_norm": 0.04784776642918587, "learning_rate": 8.33433403466064e-06, "loss": 0.0023, "step": 40340 }, { "epoch": 0.6819161251616065, "grad_norm": 0.03423130884766579, "learning_rate": 8.333234880931151e-06, "loss": 0.0022, "step": 40350 }, { "epoch": 0.682085125440457, "grad_norm": 0.03832382336258888, "learning_rate": 8.332135437194077e-06, "loss": 0.0013, "step": 40360 }, { "epoch": 0.6822541257193074, "grad_norm": 0.07870305329561234, "learning_rate": 8.331035703545076e-06, "loss": 0.0022, "step": 40370 }, { "epoch": 0.6824231259981579, "grad_norm": 0.06901619583368301, "learning_rate": 8.32993568007983e-06, "loss": 0.0023, "step": 40380 }, { "epoch": 0.6825921262770084, "grad_norm": 0.0540190152823925, "learning_rate": 8.328835366894045e-06, "loss": 0.0013, "step": 40390 }, { "epoch": 0.6827611265558589, "grad_norm": 0.17458319664001465, "learning_rate": 8.327734764083458e-06, "loss": 0.0017, "step": 40400 }, { "epoch": 0.6829301268347093, "grad_norm": 0.03551612049341202, "learning_rate": 8.326633871743818e-06, "loss": 0.0024, "step": 40410 }, { "epoch": 0.6830991271135597, "grad_norm": 0.07492489367723465, "learning_rate": 8.325532689970917e-06, "loss": 0.0026, "step": 40420 }, { "epoch": 0.6832681273924102, "grad_norm": 0.048070840537548065, "learning_rate": 8.324431218860558e-06, "loss": 0.0013, "step": 40430 }, { "epoch": 0.6834371276712606, "grad_norm": 0.00637691980227828, "learning_rate": 8.323329458508575e-06, "loss": 0.0019, "step": 40440 }, { "epoch": 0.6836061279501111, "grad_norm": 0.07666812092065811, "learning_rate": 8.322227409010828e-06, "loss": 0.0023, "step": 40450 }, { "epoch": 0.6837751282289616, "grad_norm": 0.055106133222579956, "learning_rate": 8.3211250704632e-06, "loss": 0.0019, "step": 40460 }, { "epoch": 0.683944128507812, "grad_norm": 0.09392855316400528, "learning_rate": 8.320022442961599e-06, "loss": 0.0019, "step": 40470 }, { "epoch": 0.6841131287866625, "grad_norm": 0.1125345304608345, "learning_rate": 8.31891952660196e-06, "loss": 0.002, "step": 40480 }, { "epoch": 0.684282129065513, "grad_norm": 0.18436716496944427, "learning_rate": 8.317816321480243e-06, "loss": 0.0017, "step": 40490 }, { "epoch": 0.6844511293443635, "grad_norm": 0.03023306466639042, "learning_rate": 8.31671282769243e-06, "loss": 0.0017, "step": 40500 }, { "epoch": 0.6846201296232138, "grad_norm": 0.10193860530853271, "learning_rate": 8.31560904533453e-06, "loss": 0.0018, "step": 40510 }, { "epoch": 0.6847891299020643, "grad_norm": 0.10619567334651947, "learning_rate": 8.31450497450258e-06, "loss": 0.0021, "step": 40520 }, { "epoch": 0.6849581301809148, "grad_norm": 0.06131768599152565, "learning_rate": 8.313400615292636e-06, "loss": 0.0012, "step": 40530 }, { "epoch": 0.6851271304597653, "grad_norm": 0.047531671822071075, "learning_rate": 8.312295967800787e-06, "loss": 0.002, "step": 40540 }, { "epoch": 0.6852961307386157, "grad_norm": 0.0296910610049963, "learning_rate": 8.31119103212314e-06, "loss": 0.0015, "step": 40550 }, { "epoch": 0.6854651310174662, "grad_norm": 0.005351419560611248, "learning_rate": 8.31008580835583e-06, "loss": 0.002, "step": 40560 }, { "epoch": 0.6856341312963167, "grad_norm": 0.032045409083366394, "learning_rate": 8.308980296595015e-06, "loss": 0.0018, "step": 40570 }, { "epoch": 0.6858031315751671, "grad_norm": 0.11112460494041443, "learning_rate": 8.307874496936882e-06, "loss": 0.0028, "step": 40580 }, { "epoch": 0.6859721318540175, "grad_norm": 0.12762129306793213, "learning_rate": 8.306768409477643e-06, "loss": 0.0016, "step": 40590 }, { "epoch": 0.686141132132868, "grad_norm": 0.07278034836053848, "learning_rate": 8.30566203431353e-06, "loss": 0.0016, "step": 40600 }, { "epoch": 0.6863101324117185, "grad_norm": 0.022609353065490723, "learning_rate": 8.304555371540803e-06, "loss": 0.0015, "step": 40610 }, { "epoch": 0.6864791326905689, "grad_norm": 0.1110212653875351, "learning_rate": 8.303448421255748e-06, "loss": 0.0021, "step": 40620 }, { "epoch": 0.6866481329694194, "grad_norm": 0.03907875344157219, "learning_rate": 8.302341183554676e-06, "loss": 0.0018, "step": 40630 }, { "epoch": 0.6868171332482699, "grad_norm": 0.09183419495820999, "learning_rate": 8.30123365853392e-06, "loss": 0.0023, "step": 40640 }, { "epoch": 0.6869861335271203, "grad_norm": 0.0381489135324955, "learning_rate": 8.30012584628984e-06, "loss": 0.0015, "step": 40650 }, { "epoch": 0.6871551338059708, "grad_norm": 0.02272479236125946, "learning_rate": 8.299017746918823e-06, "loss": 0.0025, "step": 40660 }, { "epoch": 0.6873241340848213, "grad_norm": 0.13928182423114777, "learning_rate": 8.297909360517279e-06, "loss": 0.0022, "step": 40670 }, { "epoch": 0.6874931343636717, "grad_norm": 0.051701080054044724, "learning_rate": 8.296800687181638e-06, "loss": 0.0019, "step": 40680 }, { "epoch": 0.6876621346425221, "grad_norm": 0.06480807065963745, "learning_rate": 8.295691727008366e-06, "loss": 0.0027, "step": 40690 }, { "epoch": 0.6878311349213726, "grad_norm": 0.030916791409254074, "learning_rate": 8.294582480093947e-06, "loss": 0.0026, "step": 40700 }, { "epoch": 0.6880001352002231, "grad_norm": 0.024703755974769592, "learning_rate": 8.293472946534888e-06, "loss": 0.0022, "step": 40710 }, { "epoch": 0.6881691354790735, "grad_norm": 0.028602372854948044, "learning_rate": 8.292363126427725e-06, "loss": 0.0015, "step": 40720 }, { "epoch": 0.688338135757924, "grad_norm": 0.23439855873584747, "learning_rate": 8.29125301986902e-06, "loss": 0.0015, "step": 40730 }, { "epoch": 0.6885071360367745, "grad_norm": 0.10362856090068817, "learning_rate": 8.290142626955355e-06, "loss": 0.0016, "step": 40740 }, { "epoch": 0.688676136315625, "grad_norm": 0.07249779999256134, "learning_rate": 8.28903194778334e-06, "loss": 0.0017, "step": 40750 }, { "epoch": 0.6888451365944753, "grad_norm": 0.10326391458511353, "learning_rate": 8.287920982449611e-06, "loss": 0.0016, "step": 40760 }, { "epoch": 0.6890141368733258, "grad_norm": 0.1046016737818718, "learning_rate": 8.286809731050824e-06, "loss": 0.0031, "step": 40770 }, { "epoch": 0.6891831371521763, "grad_norm": 0.06414433568716049, "learning_rate": 8.285698193683667e-06, "loss": 0.0019, "step": 40780 }, { "epoch": 0.6893521374310267, "grad_norm": 0.0254384633153677, "learning_rate": 8.284586370444847e-06, "loss": 0.0015, "step": 40790 }, { "epoch": 0.6895211377098772, "grad_norm": 0.11462149024009705, "learning_rate": 8.2834742614311e-06, "loss": 0.0018, "step": 40800 }, { "epoch": 0.6896901379887277, "grad_norm": 0.07652153074741364, "learning_rate": 8.282361866739181e-06, "loss": 0.0017, "step": 40810 }, { "epoch": 0.6898591382675782, "grad_norm": 0.290982186794281, "learning_rate": 8.281249186465879e-06, "loss": 0.0021, "step": 40820 }, { "epoch": 0.6900281385464286, "grad_norm": 0.101005919277668, "learning_rate": 8.280136220707998e-06, "loss": 0.0044, "step": 40830 }, { "epoch": 0.6901971388252791, "grad_norm": 0.01687195710837841, "learning_rate": 8.279022969562374e-06, "loss": 0.0023, "step": 40840 }, { "epoch": 0.6903661391041295, "grad_norm": 0.038489919155836105, "learning_rate": 8.277909433125863e-06, "loss": 0.0027, "step": 40850 }, { "epoch": 0.69053513938298, "grad_norm": 0.05620067939162254, "learning_rate": 8.276795611495351e-06, "loss": 0.0021, "step": 40860 }, { "epoch": 0.6907041396618304, "grad_norm": 0.05303415283560753, "learning_rate": 8.275681504767742e-06, "loss": 0.0016, "step": 40870 }, { "epoch": 0.6908731399406809, "grad_norm": 0.03398628905415535, "learning_rate": 8.274567113039974e-06, "loss": 0.0012, "step": 40880 }, { "epoch": 0.6910421402195314, "grad_norm": 0.04521585628390312, "learning_rate": 8.273452436409e-06, "loss": 0.0027, "step": 40890 }, { "epoch": 0.6912111404983818, "grad_norm": 0.1029910147190094, "learning_rate": 8.272337474971804e-06, "loss": 0.0057, "step": 40900 }, { "epoch": 0.6913801407772323, "grad_norm": 0.09532245248556137, "learning_rate": 8.271222228825393e-06, "loss": 0.0013, "step": 40910 }, { "epoch": 0.6915491410560828, "grad_norm": 0.12107133120298386, "learning_rate": 8.270106698066798e-06, "loss": 0.003, "step": 40920 }, { "epoch": 0.6917181413349333, "grad_norm": 0.054512280970811844, "learning_rate": 8.268990882793078e-06, "loss": 0.0031, "step": 40930 }, { "epoch": 0.6918871416137836, "grad_norm": 0.0330926850438118, "learning_rate": 8.26787478310131e-06, "loss": 0.01, "step": 40940 }, { "epoch": 0.6920561418926341, "grad_norm": 0.044844236224889755, "learning_rate": 8.266758399088603e-06, "loss": 0.0019, "step": 40950 }, { "epoch": 0.6922251421714846, "grad_norm": 0.05058744177222252, "learning_rate": 8.26564173085209e-06, "loss": 0.0018, "step": 40960 }, { "epoch": 0.692394142450335, "grad_norm": 0.04564560204744339, "learning_rate": 8.264524778488923e-06, "loss": 0.0014, "step": 40970 }, { "epoch": 0.6925631427291855, "grad_norm": 0.008947601541876793, "learning_rate": 8.263407542096282e-06, "loss": 0.0015, "step": 40980 }, { "epoch": 0.692732143008036, "grad_norm": 0.13389982283115387, "learning_rate": 8.262290021771374e-06, "loss": 0.003, "step": 40990 }, { "epoch": 0.6929011432868865, "grad_norm": 0.08067138493061066, "learning_rate": 8.261172217611429e-06, "loss": 0.0025, "step": 41000 }, { "epoch": 0.6930701435657369, "grad_norm": 0.026305221021175385, "learning_rate": 8.260054129713699e-06, "loss": 0.0016, "step": 41010 }, { "epoch": 0.6932391438445873, "grad_norm": 0.1137649267911911, "learning_rate": 8.258935758175463e-06, "loss": 0.0025, "step": 41020 }, { "epoch": 0.6934081441234378, "grad_norm": 0.07438289374113083, "learning_rate": 8.257817103094027e-06, "loss": 0.0017, "step": 41030 }, { "epoch": 0.6935771444022882, "grad_norm": 0.029260028153657913, "learning_rate": 8.25669816456672e-06, "loss": 0.0021, "step": 41040 }, { "epoch": 0.6937461446811387, "grad_norm": 0.1459071934223175, "learning_rate": 8.255578942690895e-06, "loss": 0.0057, "step": 41050 }, { "epoch": 0.6939151449599892, "grad_norm": 0.07163554430007935, "learning_rate": 8.254459437563927e-06, "loss": 0.0019, "step": 41060 }, { "epoch": 0.6940841452388397, "grad_norm": 0.11349696666002274, "learning_rate": 8.253339649283217e-06, "loss": 0.0019, "step": 41070 }, { "epoch": 0.6942531455176901, "grad_norm": 0.15381790697574615, "learning_rate": 8.252219577946196e-06, "loss": 0.0027, "step": 41080 }, { "epoch": 0.6944221457965406, "grad_norm": 0.11119570583105087, "learning_rate": 8.251099223650317e-06, "loss": 0.0021, "step": 41090 }, { "epoch": 0.6945911460753911, "grad_norm": 0.08238446712493896, "learning_rate": 8.249978586493052e-06, "loss": 0.002, "step": 41100 }, { "epoch": 0.6947601463542414, "grad_norm": 0.02086692675948143, "learning_rate": 8.248857666571903e-06, "loss": 0.0019, "step": 41110 }, { "epoch": 0.6949291466330919, "grad_norm": 0.20051395893096924, "learning_rate": 8.247736463984395e-06, "loss": 0.0019, "step": 41120 }, { "epoch": 0.6950981469119424, "grad_norm": 0.12535539269447327, "learning_rate": 8.24661497882808e-06, "loss": 0.0022, "step": 41130 }, { "epoch": 0.6952671471907929, "grad_norm": 0.14362570643424988, "learning_rate": 8.245493211200532e-06, "loss": 0.0026, "step": 41140 }, { "epoch": 0.6954361474696433, "grad_norm": 0.21121813356876373, "learning_rate": 8.244371161199351e-06, "loss": 0.0062, "step": 41150 }, { "epoch": 0.6956051477484938, "grad_norm": 0.21628457307815552, "learning_rate": 8.243248828922157e-06, "loss": 0.002, "step": 41160 }, { "epoch": 0.6957741480273443, "grad_norm": 0.07503283023834229, "learning_rate": 8.242126214466602e-06, "loss": 0.0019, "step": 41170 }, { "epoch": 0.6959431483061947, "grad_norm": 0.11699981987476349, "learning_rate": 8.241003317930359e-06, "loss": 0.001, "step": 41180 }, { "epoch": 0.6961121485850452, "grad_norm": 0.2051926851272583, "learning_rate": 8.239880139411122e-06, "loss": 0.0019, "step": 41190 }, { "epoch": 0.6962811488638956, "grad_norm": 0.1709057241678238, "learning_rate": 8.238756679006618e-06, "loss": 0.004, "step": 41200 }, { "epoch": 0.6964501491427461, "grad_norm": 0.10599850118160248, "learning_rate": 8.237632936814589e-06, "loss": 0.0032, "step": 41210 }, { "epoch": 0.6966191494215965, "grad_norm": 0.12379378080368042, "learning_rate": 8.236508912932809e-06, "loss": 0.0011, "step": 41220 }, { "epoch": 0.696788149700447, "grad_norm": 0.03391696885228157, "learning_rate": 8.23538460745907e-06, "loss": 0.0016, "step": 41230 }, { "epoch": 0.6969571499792975, "grad_norm": 0.026621650904417038, "learning_rate": 8.234260020491196e-06, "loss": 0.0024, "step": 41240 }, { "epoch": 0.697126150258148, "grad_norm": 0.10871417820453644, "learning_rate": 8.233135152127029e-06, "loss": 0.0019, "step": 41250 }, { "epoch": 0.6972951505369984, "grad_norm": 0.03548885136842728, "learning_rate": 8.232010002464441e-06, "loss": 0.0015, "step": 41260 }, { "epoch": 0.6974641508158489, "grad_norm": 0.06144818663597107, "learning_rate": 8.230884571601322e-06, "loss": 0.001, "step": 41270 }, { "epoch": 0.6976331510946993, "grad_norm": 0.06657146662473679, "learning_rate": 8.229758859635592e-06, "loss": 0.0025, "step": 41280 }, { "epoch": 0.6978021513735497, "grad_norm": 0.03593665733933449, "learning_rate": 8.228632866665191e-06, "loss": 0.002, "step": 41290 }, { "epoch": 0.6979711516524002, "grad_norm": 0.05789535865187645, "learning_rate": 8.227506592788089e-06, "loss": 0.0023, "step": 41300 }, { "epoch": 0.6981401519312507, "grad_norm": 0.09060919284820557, "learning_rate": 8.226380038102274e-06, "loss": 0.0017, "step": 41310 }, { "epoch": 0.6983091522101011, "grad_norm": 0.04523943364620209, "learning_rate": 8.225253202705766e-06, "loss": 0.002, "step": 41320 }, { "epoch": 0.6984781524889516, "grad_norm": 0.01592904143035412, "learning_rate": 8.224126086696603e-06, "loss": 0.0018, "step": 41330 }, { "epoch": 0.6986471527678021, "grad_norm": 0.055878762155771255, "learning_rate": 8.222998690172847e-06, "loss": 0.0017, "step": 41340 }, { "epoch": 0.6988161530466526, "grad_norm": 0.028551537543535233, "learning_rate": 8.22187101323259e-06, "loss": 0.0028, "step": 41350 }, { "epoch": 0.698985153325503, "grad_norm": 0.013488137163221836, "learning_rate": 8.220743055973943e-06, "loss": 0.0017, "step": 41360 }, { "epoch": 0.6991541536043534, "grad_norm": 0.011277561075985432, "learning_rate": 8.219614818495047e-06, "loss": 0.0013, "step": 41370 }, { "epoch": 0.6993231538832039, "grad_norm": 0.039525020867586136, "learning_rate": 8.218486300894061e-06, "loss": 0.0025, "step": 41380 }, { "epoch": 0.6994921541620543, "grad_norm": 0.027259863913059235, "learning_rate": 8.217357503269175e-06, "loss": 0.0017, "step": 41390 }, { "epoch": 0.6996611544409048, "grad_norm": 0.050522465258836746, "learning_rate": 8.216228425718596e-06, "loss": 0.0021, "step": 41400 }, { "epoch": 0.6998301547197553, "grad_norm": 0.04927310720086098, "learning_rate": 8.215099068340562e-06, "loss": 0.0011, "step": 41410 }, { "epoch": 0.6999991549986058, "grad_norm": 0.03349223732948303, "learning_rate": 8.213969431233332e-06, "loss": 0.0011, "step": 41420 }, { "epoch": 0.7001681552774562, "grad_norm": 0.0998213067650795, "learning_rate": 8.212839514495189e-06, "loss": 0.0018, "step": 41430 }, { "epoch": 0.7003371555563067, "grad_norm": 0.12260545790195465, "learning_rate": 8.211709318224442e-06, "loss": 0.0018, "step": 41440 }, { "epoch": 0.7005061558351571, "grad_norm": 0.16003373265266418, "learning_rate": 8.21057884251942e-06, "loss": 0.0017, "step": 41450 }, { "epoch": 0.7006751561140075, "grad_norm": 0.07196174561977386, "learning_rate": 8.209448087478486e-06, "loss": 0.0036, "step": 41460 }, { "epoch": 0.700844156392858, "grad_norm": 0.05935538932681084, "learning_rate": 8.208317053200017e-06, "loss": 0.0019, "step": 41470 }, { "epoch": 0.7010131566717085, "grad_norm": 0.1089865043759346, "learning_rate": 8.207185739782422e-06, "loss": 0.0014, "step": 41480 }, { "epoch": 0.701182156950559, "grad_norm": 0.10229632258415222, "learning_rate": 8.206054147324127e-06, "loss": 0.0013, "step": 41490 }, { "epoch": 0.7013511572294094, "grad_norm": 0.053856879472732544, "learning_rate": 8.204922275923587e-06, "loss": 0.0019, "step": 41500 }, { "epoch": 0.7015201575082599, "grad_norm": 0.07578303664922714, "learning_rate": 8.203790125679281e-06, "loss": 0.0022, "step": 41510 }, { "epoch": 0.7016891577871104, "grad_norm": 0.38969898223876953, "learning_rate": 8.202657696689713e-06, "loss": 0.0024, "step": 41520 }, { "epoch": 0.7018581580659609, "grad_norm": 0.04809711128473282, "learning_rate": 8.201524989053406e-06, "loss": 0.0017, "step": 41530 }, { "epoch": 0.7020271583448112, "grad_norm": 0.0871298536658287, "learning_rate": 8.200392002868914e-06, "loss": 0.0028, "step": 41540 }, { "epoch": 0.7021961586236617, "grad_norm": 0.07256130129098892, "learning_rate": 8.199258738234812e-06, "loss": 0.0012, "step": 41550 }, { "epoch": 0.7023651589025122, "grad_norm": 0.04088551551103592, "learning_rate": 8.198125195249697e-06, "loss": 0.0022, "step": 41560 }, { "epoch": 0.7025341591813626, "grad_norm": 0.06312688440084457, "learning_rate": 8.196991374012197e-06, "loss": 0.0018, "step": 41570 }, { "epoch": 0.7027031594602131, "grad_norm": 0.08025973290205002, "learning_rate": 8.195857274620958e-06, "loss": 0.0018, "step": 41580 }, { "epoch": 0.7028721597390636, "grad_norm": 0.0011194974649697542, "learning_rate": 8.19472289717465e-06, "loss": 0.0035, "step": 41590 }, { "epoch": 0.7030411600179141, "grad_norm": 0.13875657320022583, "learning_rate": 8.193588241771973e-06, "loss": 0.0048, "step": 41600 }, { "epoch": 0.7032101602967645, "grad_norm": 0.244681254029274, "learning_rate": 8.192453308511644e-06, "loss": 0.0031, "step": 41610 }, { "epoch": 0.703379160575615, "grad_norm": 0.09452541172504425, "learning_rate": 8.19131809749241e-06, "loss": 0.0033, "step": 41620 }, { "epoch": 0.7035481608544654, "grad_norm": 0.034787777811288834, "learning_rate": 8.19018260881304e-06, "loss": 0.0029, "step": 41630 }, { "epoch": 0.7037171611333158, "grad_norm": 0.04553893581032753, "learning_rate": 8.189046842572324e-06, "loss": 0.0027, "step": 41640 }, { "epoch": 0.7038861614121663, "grad_norm": 0.05115741491317749, "learning_rate": 8.187910798869082e-06, "loss": 0.0021, "step": 41650 }, { "epoch": 0.7040551616910168, "grad_norm": 0.026538031175732613, "learning_rate": 8.186774477802158e-06, "loss": 0.0018, "step": 41660 }, { "epoch": 0.7042241619698673, "grad_norm": 0.10690360516309738, "learning_rate": 8.185637879470412e-06, "loss": 0.0016, "step": 41670 }, { "epoch": 0.7043931622487177, "grad_norm": 0.04989476874470711, "learning_rate": 8.184501003972734e-06, "loss": 0.0024, "step": 41680 }, { "epoch": 0.7045621625275682, "grad_norm": 0.045074157416820526, "learning_rate": 8.183363851408041e-06, "loss": 0.0014, "step": 41690 }, { "epoch": 0.7047311628064187, "grad_norm": 0.06971005350351334, "learning_rate": 8.18222642187527e-06, "loss": 0.002, "step": 41700 }, { "epoch": 0.704900163085269, "grad_norm": 0.08040384203195572, "learning_rate": 8.181088715473381e-06, "loss": 0.0022, "step": 41710 }, { "epoch": 0.7050691633641195, "grad_norm": 0.08214668184518814, "learning_rate": 8.17995073230136e-06, "loss": 0.0016, "step": 41720 }, { "epoch": 0.70523816364297, "grad_norm": 0.008188508450984955, "learning_rate": 8.17881247245822e-06, "loss": 0.0008, "step": 41730 }, { "epoch": 0.7054071639218205, "grad_norm": 0.036291953176259995, "learning_rate": 8.177673936042992e-06, "loss": 0.0023, "step": 41740 }, { "epoch": 0.7055761642006709, "grad_norm": 0.08641888946294785, "learning_rate": 8.176535123154736e-06, "loss": 0.002, "step": 41750 }, { "epoch": 0.7057451644795214, "grad_norm": 0.025727005675435066, "learning_rate": 8.175396033892533e-06, "loss": 0.0023, "step": 41760 }, { "epoch": 0.7059141647583719, "grad_norm": 0.07313386350870132, "learning_rate": 8.174256668355491e-06, "loss": 0.0024, "step": 41770 }, { "epoch": 0.7060831650372223, "grad_norm": 0.326588898897171, "learning_rate": 8.173117026642738e-06, "loss": 0.0016, "step": 41780 }, { "epoch": 0.7062521653160728, "grad_norm": 0.10961807519197464, "learning_rate": 8.17197710885343e-06, "loss": 0.0021, "step": 41790 }, { "epoch": 0.7064211655949232, "grad_norm": 0.033026713877916336, "learning_rate": 8.170836915086744e-06, "loss": 0.0022, "step": 41800 }, { "epoch": 0.7065901658737737, "grad_norm": 0.07057605683803558, "learning_rate": 8.169696445441886e-06, "loss": 0.0018, "step": 41810 }, { "epoch": 0.7067591661526241, "grad_norm": 0.17566239833831787, "learning_rate": 8.168555700018077e-06, "loss": 0.0017, "step": 41820 }, { "epoch": 0.7069281664314746, "grad_norm": 0.1394510418176651, "learning_rate": 8.167414678914572e-06, "loss": 0.0024, "step": 41830 }, { "epoch": 0.7070971667103251, "grad_norm": 0.03784767538309097, "learning_rate": 8.166273382230642e-06, "loss": 0.003, "step": 41840 }, { "epoch": 0.7072661669891755, "grad_norm": 0.10037712007761002, "learning_rate": 8.165131810065587e-06, "loss": 0.0032, "step": 41850 }, { "epoch": 0.707435167268026, "grad_norm": 0.04333595931529999, "learning_rate": 8.16398996251873e-06, "loss": 0.004, "step": 41860 }, { "epoch": 0.7076041675468765, "grad_norm": 0.20991283655166626, "learning_rate": 8.162847839689417e-06, "loss": 0.0024, "step": 41870 }, { "epoch": 0.707773167825727, "grad_norm": 0.1015588790178299, "learning_rate": 8.161705441677015e-06, "loss": 0.0032, "step": 41880 }, { "epoch": 0.7079421681045773, "grad_norm": 0.007563309278339148, "learning_rate": 8.160562768580922e-06, "loss": 0.0025, "step": 41890 }, { "epoch": 0.7081111683834278, "grad_norm": 0.05943568795919418, "learning_rate": 8.159419820500555e-06, "loss": 0.0021, "step": 41900 }, { "epoch": 0.7082801686622783, "grad_norm": 0.03560509532690048, "learning_rate": 8.158276597535358e-06, "loss": 0.0029, "step": 41910 }, { "epoch": 0.7084491689411287, "grad_norm": 0.09235816448926926, "learning_rate": 8.157133099784791e-06, "loss": 0.0019, "step": 41920 }, { "epoch": 0.7086181692199792, "grad_norm": 0.3876766264438629, "learning_rate": 8.15598932734835e-06, "loss": 0.0021, "step": 41930 }, { "epoch": 0.7087871694988297, "grad_norm": 0.077243372797966, "learning_rate": 8.154845280325545e-06, "loss": 0.0018, "step": 41940 }, { "epoch": 0.7089561697776802, "grad_norm": 0.10611037909984589, "learning_rate": 8.153700958815917e-06, "loss": 0.0013, "step": 41950 }, { "epoch": 0.7091251700565306, "grad_norm": 0.1063697338104248, "learning_rate": 8.152556362919024e-06, "loss": 0.0012, "step": 41960 }, { "epoch": 0.709294170335381, "grad_norm": 0.08025922626256943, "learning_rate": 8.151411492734454e-06, "loss": 0.0011, "step": 41970 }, { "epoch": 0.7094631706142315, "grad_norm": 0.027767019346356392, "learning_rate": 8.150266348361814e-06, "loss": 0.0018, "step": 41980 }, { "epoch": 0.709632170893082, "grad_norm": 0.08533176779747009, "learning_rate": 8.149120929900738e-06, "loss": 0.0026, "step": 41990 }, { "epoch": 0.7098011711719324, "grad_norm": 0.060593731701374054, "learning_rate": 8.147975237450885e-06, "loss": 0.0012, "step": 42000 }, { "epoch": 0.7099701714507829, "grad_norm": 0.08707921206951141, "learning_rate": 8.146829271111933e-06, "loss": 0.0018, "step": 42010 }, { "epoch": 0.7101391717296334, "grad_norm": 0.05134487897157669, "learning_rate": 8.145683030983588e-06, "loss": 0.0016, "step": 42020 }, { "epoch": 0.7103081720084838, "grad_norm": 0.06112463027238846, "learning_rate": 8.144536517165578e-06, "loss": 0.0026, "step": 42030 }, { "epoch": 0.7104771722873343, "grad_norm": 0.1436644345521927, "learning_rate": 8.143389729757655e-06, "loss": 0.0019, "step": 42040 }, { "epoch": 0.7106461725661848, "grad_norm": 0.07123779505491257, "learning_rate": 8.142242668859597e-06, "loss": 0.0019, "step": 42050 }, { "epoch": 0.7108151728450351, "grad_norm": 0.03608975559473038, "learning_rate": 8.141095334571201e-06, "loss": 0.0014, "step": 42060 }, { "epoch": 0.7109841731238856, "grad_norm": 0.047314874827861786, "learning_rate": 8.139947726992292e-06, "loss": 0.0014, "step": 42070 }, { "epoch": 0.7111531734027361, "grad_norm": 0.027236802503466606, "learning_rate": 8.138799846222716e-06, "loss": 0.0011, "step": 42080 }, { "epoch": 0.7113221736815866, "grad_norm": 0.04754794016480446, "learning_rate": 8.137651692362347e-06, "loss": 0.0038, "step": 42090 }, { "epoch": 0.711491173960437, "grad_norm": 0.1344657838344574, "learning_rate": 8.13650326551108e-06, "loss": 0.0019, "step": 42100 }, { "epoch": 0.7116601742392875, "grad_norm": 0.0192538034170866, "learning_rate": 8.13535456576883e-06, "loss": 0.0024, "step": 42110 }, { "epoch": 0.711829174518138, "grad_norm": 0.1750638335943222, "learning_rate": 8.134205593235543e-06, "loss": 0.0041, "step": 42120 }, { "epoch": 0.7119981747969885, "grad_norm": 0.030514687299728394, "learning_rate": 8.13305634801118e-06, "loss": 0.0022, "step": 42130 }, { "epoch": 0.7121671750758388, "grad_norm": 0.14213408529758453, "learning_rate": 8.131906830195739e-06, "loss": 0.0041, "step": 42140 }, { "epoch": 0.7123361753546893, "grad_norm": 0.0784611701965332, "learning_rate": 8.130757039889229e-06, "loss": 0.0024, "step": 42150 }, { "epoch": 0.7125051756335398, "grad_norm": 0.02702018804848194, "learning_rate": 8.129606977191686e-06, "loss": 0.003, "step": 42160 }, { "epoch": 0.7126741759123902, "grad_norm": 0.0408581905066967, "learning_rate": 8.128456642203174e-06, "loss": 0.0034, "step": 42170 }, { "epoch": 0.7128431761912407, "grad_norm": 0.05134117975831032, "learning_rate": 8.127306035023776e-06, "loss": 0.0015, "step": 42180 }, { "epoch": 0.7130121764700912, "grad_norm": 0.17410869896411896, "learning_rate": 8.126155155753601e-06, "loss": 0.0038, "step": 42190 }, { "epoch": 0.7131811767489417, "grad_norm": 0.01948716677725315, "learning_rate": 8.12500400449278e-06, "loss": 0.003, "step": 42200 }, { "epoch": 0.7133501770277921, "grad_norm": 0.22991220653057098, "learning_rate": 8.12385258134147e-06, "loss": 0.0037, "step": 42210 }, { "epoch": 0.7135191773066426, "grad_norm": 0.1062299907207489, "learning_rate": 8.12270088639985e-06, "loss": 0.0014, "step": 42220 }, { "epoch": 0.713688177585493, "grad_norm": 0.018809964880347252, "learning_rate": 8.121548919768124e-06, "loss": 0.0023, "step": 42230 }, { "epoch": 0.7138571778643434, "grad_norm": 0.06033628433942795, "learning_rate": 8.120396681546516e-06, "loss": 0.001, "step": 42240 }, { "epoch": 0.7140261781431939, "grad_norm": 0.03717151656746864, "learning_rate": 8.119244171835279e-06, "loss": 0.0021, "step": 42250 }, { "epoch": 0.7141951784220444, "grad_norm": 0.07586640119552612, "learning_rate": 8.118091390734686e-06, "loss": 0.0018, "step": 42260 }, { "epoch": 0.7143641787008949, "grad_norm": 0.039691995829343796, "learning_rate": 8.116938338345035e-06, "loss": 0.0017, "step": 42270 }, { "epoch": 0.7145331789797453, "grad_norm": 0.01930839754641056, "learning_rate": 8.115785014766646e-06, "loss": 0.0022, "step": 42280 }, { "epoch": 0.7147021792585958, "grad_norm": 0.13970836997032166, "learning_rate": 8.114631420099865e-06, "loss": 0.0019, "step": 42290 }, { "epoch": 0.7148711795374463, "grad_norm": 0.021033965051174164, "learning_rate": 8.113477554445058e-06, "loss": 0.0012, "step": 42300 }, { "epoch": 0.7150401798162968, "grad_norm": 0.14079996943473816, "learning_rate": 8.11232341790262e-06, "loss": 0.0021, "step": 42310 }, { "epoch": 0.7152091800951471, "grad_norm": 0.06294325739145279, "learning_rate": 8.111169010572967e-06, "loss": 0.0032, "step": 42320 }, { "epoch": 0.7153781803739976, "grad_norm": 0.08401281386613846, "learning_rate": 8.110014332556533e-06, "loss": 0.0015, "step": 42330 }, { "epoch": 0.7155471806528481, "grad_norm": 0.04065125808119774, "learning_rate": 8.108859383953785e-06, "loss": 0.0013, "step": 42340 }, { "epoch": 0.7157161809316985, "grad_norm": 0.08111557364463806, "learning_rate": 8.107704164865207e-06, "loss": 0.0027, "step": 42350 }, { "epoch": 0.715885181210549, "grad_norm": 0.2308748960494995, "learning_rate": 8.10654867539131e-06, "loss": 0.0041, "step": 42360 }, { "epoch": 0.7160541814893995, "grad_norm": 0.04631703719496727, "learning_rate": 8.105392915632626e-06, "loss": 0.0017, "step": 42370 }, { "epoch": 0.71622318176825, "grad_norm": 0.036768991500139236, "learning_rate": 8.104236885689713e-06, "loss": 0.0023, "step": 42380 }, { "epoch": 0.7163921820471004, "grad_norm": 0.04694832116365433, "learning_rate": 8.103080585663151e-06, "loss": 0.0015, "step": 42390 }, { "epoch": 0.7165611823259508, "grad_norm": 0.0976409986615181, "learning_rate": 8.101924015653543e-06, "loss": 0.0017, "step": 42400 }, { "epoch": 0.7167301826048013, "grad_norm": 0.29701071977615356, "learning_rate": 8.100767175761517e-06, "loss": 0.0045, "step": 42410 }, { "epoch": 0.7168991828836517, "grad_norm": 0.08202585577964783, "learning_rate": 8.099610066087721e-06, "loss": 0.0017, "step": 42420 }, { "epoch": 0.7170681831625022, "grad_norm": 0.06088138371706009, "learning_rate": 8.098452686732834e-06, "loss": 0.0023, "step": 42430 }, { "epoch": 0.7172371834413527, "grad_norm": 0.08535436540842056, "learning_rate": 8.09729503779755e-06, "loss": 0.0027, "step": 42440 }, { "epoch": 0.7174061837202031, "grad_norm": 0.1157093346118927, "learning_rate": 8.09613711938259e-06, "loss": 0.0016, "step": 42450 }, { "epoch": 0.7175751839990536, "grad_norm": 0.07748487591743469, "learning_rate": 8.0949789315887e-06, "loss": 0.0023, "step": 42460 }, { "epoch": 0.7177441842779041, "grad_norm": 0.06741594523191452, "learning_rate": 8.093820474516648e-06, "loss": 0.0015, "step": 42470 }, { "epoch": 0.7179131845567546, "grad_norm": 0.3889932632446289, "learning_rate": 8.092661748267223e-06, "loss": 0.006, "step": 42480 }, { "epoch": 0.7180821848356049, "grad_norm": 0.17957502603530884, "learning_rate": 8.091502752941245e-06, "loss": 0.0031, "step": 42490 }, { "epoch": 0.7182511851144554, "grad_norm": 0.043044958263635635, "learning_rate": 8.090343488639547e-06, "loss": 0.0013, "step": 42500 }, { "epoch": 0.7184201853933059, "grad_norm": 0.04258865863084793, "learning_rate": 8.08918395546299e-06, "loss": 0.0012, "step": 42510 }, { "epoch": 0.7185891856721563, "grad_norm": 0.06326818466186523, "learning_rate": 8.088024153512465e-06, "loss": 0.0019, "step": 42520 }, { "epoch": 0.7187581859510068, "grad_norm": 0.2828580141067505, "learning_rate": 8.086864082888876e-06, "loss": 0.0035, "step": 42530 }, { "epoch": 0.7189271862298573, "grad_norm": 0.1426171511411667, "learning_rate": 8.085703743693155e-06, "loss": 0.0019, "step": 42540 }, { "epoch": 0.7190961865087078, "grad_norm": 0.039468914270401, "learning_rate": 8.084543136026257e-06, "loss": 0.0018, "step": 42550 }, { "epoch": 0.7192651867875582, "grad_norm": 0.046568624675273895, "learning_rate": 8.083382259989164e-06, "loss": 0.003, "step": 42560 }, { "epoch": 0.7194341870664087, "grad_norm": 0.16173051297664642, "learning_rate": 8.082221115682872e-06, "loss": 0.0015, "step": 42570 }, { "epoch": 0.7196031873452591, "grad_norm": 0.07199832797050476, "learning_rate": 8.08105970320841e-06, "loss": 0.0023, "step": 42580 }, { "epoch": 0.7197721876241095, "grad_norm": 0.2582061290740967, "learning_rate": 8.079898022666827e-06, "loss": 0.0021, "step": 42590 }, { "epoch": 0.71994118790296, "grad_norm": 0.0317123606801033, "learning_rate": 8.078736074159193e-06, "loss": 0.0016, "step": 42600 }, { "epoch": 0.7201101881818105, "grad_norm": 0.03768402338027954, "learning_rate": 8.077573857786603e-06, "loss": 0.0012, "step": 42610 }, { "epoch": 0.720279188460661, "grad_norm": 0.15136878192424774, "learning_rate": 8.076411373650177e-06, "loss": 0.0067, "step": 42620 }, { "epoch": 0.7204481887395114, "grad_norm": 0.05533058941364288, "learning_rate": 8.075248621851056e-06, "loss": 0.0011, "step": 42630 }, { "epoch": 0.7206171890183619, "grad_norm": 0.10668498277664185, "learning_rate": 8.074085602490404e-06, "loss": 0.001, "step": 42640 }, { "epoch": 0.7207861892972124, "grad_norm": 0.0986466333270073, "learning_rate": 8.072922315669408e-06, "loss": 0.0019, "step": 42650 }, { "epoch": 0.7209551895760627, "grad_norm": 0.5844259262084961, "learning_rate": 8.071758761489287e-06, "loss": 0.003, "step": 42660 }, { "epoch": 0.7211241898549132, "grad_norm": 0.08260132372379303, "learning_rate": 8.070594940051269e-06, "loss": 0.0018, "step": 42670 }, { "epoch": 0.7212931901337637, "grad_norm": 0.045917097479104996, "learning_rate": 8.069430851456612e-06, "loss": 0.0025, "step": 42680 }, { "epoch": 0.7214621904126142, "grad_norm": 0.0059732492081820965, "learning_rate": 8.068266495806601e-06, "loss": 0.0025, "step": 42690 }, { "epoch": 0.7216311906914646, "grad_norm": 0.09748756140470505, "learning_rate": 8.067101873202539e-06, "loss": 0.0027, "step": 42700 }, { "epoch": 0.7218001909703151, "grad_norm": 0.0393015593290329, "learning_rate": 8.065936983745753e-06, "loss": 0.0017, "step": 42710 }, { "epoch": 0.7219691912491656, "grad_norm": 0.04428603872656822, "learning_rate": 8.064771827537595e-06, "loss": 0.0012, "step": 42720 }, { "epoch": 0.7221381915280161, "grad_norm": 0.09002875536680222, "learning_rate": 8.063606404679437e-06, "loss": 0.0021, "step": 42730 }, { "epoch": 0.7223071918068665, "grad_norm": 0.1135852113366127, "learning_rate": 8.06244071527268e-06, "loss": 0.0019, "step": 42740 }, { "epoch": 0.7224761920857169, "grad_norm": 0.02696043998003006, "learning_rate": 8.06127475941874e-06, "loss": 0.0029, "step": 42750 }, { "epoch": 0.7226451923645674, "grad_norm": 0.03349917009472847, "learning_rate": 8.060108537219067e-06, "loss": 0.0022, "step": 42760 }, { "epoch": 0.7228141926434178, "grad_norm": 0.03234555572271347, "learning_rate": 8.058942048775125e-06, "loss": 0.001, "step": 42770 }, { "epoch": 0.7229831929222683, "grad_norm": 0.20898035168647766, "learning_rate": 8.057775294188401e-06, "loss": 0.0013, "step": 42780 }, { "epoch": 0.7231521932011188, "grad_norm": 0.2018376737833023, "learning_rate": 8.056608273560414e-06, "loss": 0.0035, "step": 42790 }, { "epoch": 0.7233211934799693, "grad_norm": 0.13457611203193665, "learning_rate": 8.055440986992696e-06, "loss": 0.0032, "step": 42800 }, { "epoch": 0.7234901937588197, "grad_norm": 0.11162465065717697, "learning_rate": 8.054273434586808e-06, "loss": 0.0016, "step": 42810 }, { "epoch": 0.7236591940376702, "grad_norm": 0.030697602778673172, "learning_rate": 8.053105616444334e-06, "loss": 0.0013, "step": 42820 }, { "epoch": 0.7238281943165206, "grad_norm": 0.12240386754274368, "learning_rate": 8.051937532666878e-06, "loss": 0.0015, "step": 42830 }, { "epoch": 0.723997194595371, "grad_norm": 0.0318816602230072, "learning_rate": 8.050769183356071e-06, "loss": 0.0019, "step": 42840 }, { "epoch": 0.7241661948742215, "grad_norm": 0.02436286211013794, "learning_rate": 8.049600568613563e-06, "loss": 0.0025, "step": 42850 }, { "epoch": 0.724335195153072, "grad_norm": 0.11259996145963669, "learning_rate": 8.048431688541028e-06, "loss": 0.0019, "step": 42860 }, { "epoch": 0.7245041954319225, "grad_norm": 0.05099119618535042, "learning_rate": 8.047262543240169e-06, "loss": 0.0016, "step": 42870 }, { "epoch": 0.7246731957107729, "grad_norm": 0.1111067682504654, "learning_rate": 8.046093132812703e-06, "loss": 0.002, "step": 42880 }, { "epoch": 0.7248421959896234, "grad_norm": 0.037433598190546036, "learning_rate": 8.044923457360376e-06, "loss": 0.0014, "step": 42890 }, { "epoch": 0.7250111962684739, "grad_norm": 0.10939794778823853, "learning_rate": 8.043753516984954e-06, "loss": 0.0018, "step": 42900 }, { "epoch": 0.7251801965473244, "grad_norm": 0.09101684391498566, "learning_rate": 8.04258331178823e-06, "loss": 0.0014, "step": 42910 }, { "epoch": 0.7253491968261747, "grad_norm": 0.006667679641395807, "learning_rate": 8.041412841872016e-06, "loss": 0.0011, "step": 42920 }, { "epoch": 0.7255181971050252, "grad_norm": 0.10800191760063171, "learning_rate": 8.040242107338147e-06, "loss": 0.002, "step": 42930 }, { "epoch": 0.7256871973838757, "grad_norm": 0.0515403188765049, "learning_rate": 8.039071108288488e-06, "loss": 0.002, "step": 42940 }, { "epoch": 0.7258561976627261, "grad_norm": 0.24728050827980042, "learning_rate": 8.037899844824916e-06, "loss": 0.001, "step": 42950 }, { "epoch": 0.7260251979415766, "grad_norm": 0.05296405404806137, "learning_rate": 8.036728317049339e-06, "loss": 0.0011, "step": 42960 }, { "epoch": 0.7261941982204271, "grad_norm": 0.0076844641007483006, "learning_rate": 8.035556525063684e-06, "loss": 0.0026, "step": 42970 }, { "epoch": 0.7263631984992776, "grad_norm": 0.07447580993175507, "learning_rate": 8.034384468969905e-06, "loss": 0.002, "step": 42980 }, { "epoch": 0.726532198778128, "grad_norm": 0.0800366997718811, "learning_rate": 8.033212148869973e-06, "loss": 0.0039, "step": 42990 }, { "epoch": 0.7267011990569785, "grad_norm": 0.0012301752576604486, "learning_rate": 8.03203956486589e-06, "loss": 0.0014, "step": 43000 }, { "epoch": 0.7268701993358289, "grad_norm": 0.026477621868252754, "learning_rate": 8.030866717059673e-06, "loss": 0.0015, "step": 43010 }, { "epoch": 0.7270391996146793, "grad_norm": 0.05160536244511604, "learning_rate": 8.029693605553367e-06, "loss": 0.0021, "step": 43020 }, { "epoch": 0.7272081998935298, "grad_norm": 0.055757176131010056, "learning_rate": 8.02852023044904e-06, "loss": 0.0017, "step": 43030 }, { "epoch": 0.7273772001723803, "grad_norm": 0.05494154617190361, "learning_rate": 8.027346591848777e-06, "loss": 0.0026, "step": 43040 }, { "epoch": 0.7275462004512308, "grad_norm": 4.639435768127441, "learning_rate": 8.026172689854694e-06, "loss": 0.0164, "step": 43050 }, { "epoch": 0.7277152007300812, "grad_norm": 0.10933412611484528, "learning_rate": 8.024998524568925e-06, "loss": 0.0013, "step": 43060 }, { "epoch": 0.7278842010089317, "grad_norm": 0.15748070180416107, "learning_rate": 8.023824096093628e-06, "loss": 0.0018, "step": 43070 }, { "epoch": 0.7280532012877822, "grad_norm": 0.08826566487550735, "learning_rate": 8.022649404530981e-06, "loss": 0.0015, "step": 43080 }, { "epoch": 0.7282222015666325, "grad_norm": 0.06301755458116531, "learning_rate": 8.021474449983195e-06, "loss": 0.0023, "step": 43090 }, { "epoch": 0.728391201845483, "grad_norm": 0.05063905566930771, "learning_rate": 8.020299232552491e-06, "loss": 0.0019, "step": 43100 }, { "epoch": 0.7285602021243335, "grad_norm": 0.024068308994174004, "learning_rate": 8.019123752341119e-06, "loss": 0.003, "step": 43110 }, { "epoch": 0.728729202403184, "grad_norm": 0.07127765566110611, "learning_rate": 8.017948009451352e-06, "loss": 0.0016, "step": 43120 }, { "epoch": 0.7288982026820344, "grad_norm": 0.05042535439133644, "learning_rate": 8.016772003985488e-06, "loss": 0.002, "step": 43130 }, { "epoch": 0.7290672029608849, "grad_norm": 0.11436517536640167, "learning_rate": 8.015595736045842e-06, "loss": 0.0013, "step": 43140 }, { "epoch": 0.7292362032397354, "grad_norm": 0.3207210302352905, "learning_rate": 8.014419205734756e-06, "loss": 0.0021, "step": 43150 }, { "epoch": 0.7294052035185858, "grad_norm": 0.01475997269153595, "learning_rate": 8.013242413154596e-06, "loss": 0.0011, "step": 43160 }, { "epoch": 0.7295742037974363, "grad_norm": 0.09151560068130493, "learning_rate": 8.012065358407743e-06, "loss": 0.0021, "step": 43170 }, { "epoch": 0.7297432040762867, "grad_norm": 0.25620636343955994, "learning_rate": 8.010888041596611e-06, "loss": 0.0017, "step": 43180 }, { "epoch": 0.7299122043551372, "grad_norm": 0.1682850569486618, "learning_rate": 8.009710462823632e-06, "loss": 0.0015, "step": 43190 }, { "epoch": 0.7300812046339876, "grad_norm": 0.04165157303214073, "learning_rate": 8.00853262219126e-06, "loss": 0.0027, "step": 43200 }, { "epoch": 0.7302502049128381, "grad_norm": 0.012091743759810925, "learning_rate": 8.007354519801975e-06, "loss": 0.0014, "step": 43210 }, { "epoch": 0.7304192051916886, "grad_norm": 0.03053271770477295, "learning_rate": 8.006176155758274e-06, "loss": 0.0013, "step": 43220 }, { "epoch": 0.730588205470539, "grad_norm": 0.006675877142697573, "learning_rate": 8.004997530162683e-06, "loss": 0.0022, "step": 43230 }, { "epoch": 0.7307572057493895, "grad_norm": 0.08256799727678299, "learning_rate": 8.003818643117746e-06, "loss": 0.0023, "step": 43240 }, { "epoch": 0.73092620602824, "grad_norm": 0.03869280219078064, "learning_rate": 8.002639494726034e-06, "loss": 0.0007, "step": 43250 }, { "epoch": 0.7310952063070905, "grad_norm": 0.064326211810112, "learning_rate": 8.001460085090138e-06, "loss": 0.0013, "step": 43260 }, { "epoch": 0.7312642065859408, "grad_norm": 0.128001868724823, "learning_rate": 8.000280414312672e-06, "loss": 0.002, "step": 43270 }, { "epoch": 0.7314332068647913, "grad_norm": 0.04683956503868103, "learning_rate": 7.999100482496273e-06, "loss": 0.002, "step": 43280 }, { "epoch": 0.7316022071436418, "grad_norm": 0.02895090915262699, "learning_rate": 7.997920289743601e-06, "loss": 0.0023, "step": 43290 }, { "epoch": 0.7317712074224922, "grad_norm": 0.16094285249710083, "learning_rate": 7.996739836157338e-06, "loss": 0.0021, "step": 43300 }, { "epoch": 0.7319402077013427, "grad_norm": 0.11246848106384277, "learning_rate": 7.995559121840192e-06, "loss": 0.0018, "step": 43310 }, { "epoch": 0.7321092079801932, "grad_norm": 0.03383997455239296, "learning_rate": 7.994378146894887e-06, "loss": 0.0018, "step": 43320 }, { "epoch": 0.7322782082590437, "grad_norm": 0.09549947082996368, "learning_rate": 7.993196911424174e-06, "loss": 0.004, "step": 43330 }, { "epoch": 0.7324472085378941, "grad_norm": 0.019019361585378647, "learning_rate": 7.99201541553083e-06, "loss": 0.0014, "step": 43340 }, { "epoch": 0.7326162088167445, "grad_norm": 0.21534746885299683, "learning_rate": 7.990833659317648e-06, "loss": 0.0062, "step": 43350 }, { "epoch": 0.732785209095595, "grad_norm": 0.03729574382305145, "learning_rate": 7.989651642887445e-06, "loss": 0.002, "step": 43360 }, { "epoch": 0.7329542093744454, "grad_norm": 0.15042176842689514, "learning_rate": 7.988469366343063e-06, "loss": 0.0018, "step": 43370 }, { "epoch": 0.7331232096532959, "grad_norm": 0.06487035751342773, "learning_rate": 7.987286829787369e-06, "loss": 0.0011, "step": 43380 }, { "epoch": 0.7332922099321464, "grad_norm": 0.07418685406446457, "learning_rate": 7.986104033323246e-06, "loss": 0.0017, "step": 43390 }, { "epoch": 0.7334612102109969, "grad_norm": 0.10712029784917831, "learning_rate": 7.984920977053606e-06, "loss": 0.0028, "step": 43400 }, { "epoch": 0.7336302104898473, "grad_norm": 0.04626644402742386, "learning_rate": 7.983737661081376e-06, "loss": 0.0016, "step": 43410 }, { "epoch": 0.7337992107686978, "grad_norm": 0.04742048308253288, "learning_rate": 7.982554085509512e-06, "loss": 0.0016, "step": 43420 }, { "epoch": 0.7339682110475483, "grad_norm": 0.0983252003788948, "learning_rate": 7.981370250440996e-06, "loss": 0.0025, "step": 43430 }, { "epoch": 0.7341372113263986, "grad_norm": 0.138091579079628, "learning_rate": 7.98018615597882e-06, "loss": 0.0016, "step": 43440 }, { "epoch": 0.7343062116052491, "grad_norm": 0.06672213226556778, "learning_rate": 7.97900180222601e-06, "loss": 0.002, "step": 43450 }, { "epoch": 0.7344752118840996, "grad_norm": 0.06542443484067917, "learning_rate": 7.977817189285609e-06, "loss": 0.0018, "step": 43460 }, { "epoch": 0.7346442121629501, "grad_norm": 0.04221392050385475, "learning_rate": 7.976632317260686e-06, "loss": 0.0019, "step": 43470 }, { "epoch": 0.7348132124418005, "grad_norm": 0.06752992421388626, "learning_rate": 7.975447186254327e-06, "loss": 0.0027, "step": 43480 }, { "epoch": 0.734982212720651, "grad_norm": 0.0305518489331007, "learning_rate": 7.97426179636965e-06, "loss": 0.0014, "step": 43490 }, { "epoch": 0.7351512129995015, "grad_norm": 0.0345865860581398, "learning_rate": 7.973076147709782e-06, "loss": 0.0018, "step": 43500 }, { "epoch": 0.735320213278352, "grad_norm": 0.11416380852460861, "learning_rate": 7.971890240377886e-06, "loss": 0.0019, "step": 43510 }, { "epoch": 0.7354892135572023, "grad_norm": 0.2210749387741089, "learning_rate": 7.97070407447714e-06, "loss": 0.0036, "step": 43520 }, { "epoch": 0.7356582138360528, "grad_norm": 0.04454043507575989, "learning_rate": 7.969517650110747e-06, "loss": 0.0021, "step": 43530 }, { "epoch": 0.7358272141149033, "grad_norm": 0.1723904013633728, "learning_rate": 7.968330967381931e-06, "loss": 0.0032, "step": 43540 }, { "epoch": 0.7359962143937537, "grad_norm": 0.036471135914325714, "learning_rate": 7.967144026393939e-06, "loss": 0.0014, "step": 43550 }, { "epoch": 0.7361652146726042, "grad_norm": 0.026985328644514084, "learning_rate": 7.965956827250038e-06, "loss": 0.0011, "step": 43560 }, { "epoch": 0.7363342149514547, "grad_norm": 0.14341233670711517, "learning_rate": 7.964769370053525e-06, "loss": 0.0028, "step": 43570 }, { "epoch": 0.7365032152303052, "grad_norm": 0.04602760449051857, "learning_rate": 7.96358165490771e-06, "loss": 0.0025, "step": 43580 }, { "epoch": 0.7366722155091556, "grad_norm": 0.09611208736896515, "learning_rate": 7.962393681915934e-06, "loss": 0.0012, "step": 43590 }, { "epoch": 0.7368412157880061, "grad_norm": 0.04702535644173622, "learning_rate": 7.961205451181555e-06, "loss": 0.0011, "step": 43600 }, { "epoch": 0.7370102160668565, "grad_norm": 0.08078736811876297, "learning_rate": 7.960016962807952e-06, "loss": 0.0015, "step": 43610 }, { "epoch": 0.7371792163457069, "grad_norm": 0.029118545353412628, "learning_rate": 7.958828216898535e-06, "loss": 0.0014, "step": 43620 }, { "epoch": 0.7373482166245574, "grad_norm": 0.03096819296479225, "learning_rate": 7.957639213556725e-06, "loss": 0.0018, "step": 43630 }, { "epoch": 0.7375172169034079, "grad_norm": 0.10239429771900177, "learning_rate": 7.956449952885973e-06, "loss": 0.0026, "step": 43640 }, { "epoch": 0.7376862171822584, "grad_norm": 0.16455641388893127, "learning_rate": 7.955260434989752e-06, "loss": 0.0022, "step": 43650 }, { "epoch": 0.7378552174611088, "grad_norm": 0.08374723047018051, "learning_rate": 7.954070659971554e-06, "loss": 0.0012, "step": 43660 }, { "epoch": 0.7380242177399593, "grad_norm": 0.03553815931081772, "learning_rate": 7.952880627934896e-06, "loss": 0.0044, "step": 43670 }, { "epoch": 0.7381932180188098, "grad_norm": 0.020006325095891953, "learning_rate": 7.951690338983317e-06, "loss": 0.0016, "step": 43680 }, { "epoch": 0.7383622182976602, "grad_norm": 0.19382953643798828, "learning_rate": 7.950499793220377e-06, "loss": 0.005, "step": 43690 }, { "epoch": 0.7385312185765106, "grad_norm": 0.0368359349668026, "learning_rate": 7.949308990749658e-06, "loss": 0.0026, "step": 43700 }, { "epoch": 0.7387002188553611, "grad_norm": 0.059048112481832504, "learning_rate": 7.948117931674769e-06, "loss": 0.0013, "step": 43710 }, { "epoch": 0.7388692191342116, "grad_norm": 0.025906303897500038, "learning_rate": 7.946926616099334e-06, "loss": 0.0017, "step": 43720 }, { "epoch": 0.739038219413062, "grad_norm": 0.09001726657152176, "learning_rate": 7.945735044127006e-06, "loss": 0.0021, "step": 43730 }, { "epoch": 0.7392072196919125, "grad_norm": 0.06536002457141876, "learning_rate": 7.944543215861458e-06, "loss": 0.0016, "step": 43740 }, { "epoch": 0.739376219970763, "grad_norm": 0.021984895691275597, "learning_rate": 7.943351131406381e-06, "loss": 0.0013, "step": 43750 }, { "epoch": 0.7395452202496134, "grad_norm": 0.07657831907272339, "learning_rate": 7.942158790865496e-06, "loss": 0.0015, "step": 43760 }, { "epoch": 0.7397142205284639, "grad_norm": 0.32660388946533203, "learning_rate": 7.94096619434254e-06, "loss": 0.0037, "step": 43770 }, { "epoch": 0.7398832208073143, "grad_norm": 0.024858536198735237, "learning_rate": 7.93977334194128e-06, "loss": 0.0013, "step": 43780 }, { "epoch": 0.7400522210861648, "grad_norm": 0.08623042702674866, "learning_rate": 7.938580233765492e-06, "loss": 0.0017, "step": 43790 }, { "epoch": 0.7402212213650152, "grad_norm": 0.05644140765070915, "learning_rate": 7.937386869918986e-06, "loss": 0.002, "step": 43800 }, { "epoch": 0.7403902216438657, "grad_norm": 0.03610311076045036, "learning_rate": 7.93619325050559e-06, "loss": 0.0021, "step": 43810 }, { "epoch": 0.7405592219227162, "grad_norm": 0.040945619344711304, "learning_rate": 7.934999375629158e-06, "loss": 0.0009, "step": 43820 }, { "epoch": 0.7407282222015666, "grad_norm": 0.16894613206386566, "learning_rate": 7.933805245393558e-06, "loss": 0.0033, "step": 43830 }, { "epoch": 0.7408972224804171, "grad_norm": 0.12098273634910583, "learning_rate": 7.932610859902688e-06, "loss": 0.0018, "step": 43840 }, { "epoch": 0.7410662227592676, "grad_norm": 0.027642671018838882, "learning_rate": 7.931416219260462e-06, "loss": 0.0014, "step": 43850 }, { "epoch": 0.7412352230381181, "grad_norm": 0.03056027740240097, "learning_rate": 7.930221323570824e-06, "loss": 0.0017, "step": 43860 }, { "epoch": 0.7414042233169684, "grad_norm": 0.05739610642194748, "learning_rate": 7.929026172937732e-06, "loss": 0.001, "step": 43870 }, { "epoch": 0.7415732235958189, "grad_norm": 0.09340202063322067, "learning_rate": 7.927830767465174e-06, "loss": 0.0025, "step": 43880 }, { "epoch": 0.7417422238746694, "grad_norm": 0.06758419424295425, "learning_rate": 7.926635107257152e-06, "loss": 0.0012, "step": 43890 }, { "epoch": 0.7419112241535198, "grad_norm": 0.06023062393069267, "learning_rate": 7.925439192417695e-06, "loss": 0.0021, "step": 43900 }, { "epoch": 0.7420802244323703, "grad_norm": 0.04593726247549057, "learning_rate": 7.924243023050855e-06, "loss": 0.0014, "step": 43910 }, { "epoch": 0.7422492247112208, "grad_norm": 0.09606166929006577, "learning_rate": 7.923046599260704e-06, "loss": 0.0034, "step": 43920 }, { "epoch": 0.7424182249900713, "grad_norm": 0.11645695567131042, "learning_rate": 7.921849921151337e-06, "loss": 0.0014, "step": 43930 }, { "epoch": 0.7425872252689217, "grad_norm": 0.056003108620643616, "learning_rate": 7.920652988826868e-06, "loss": 0.0019, "step": 43940 }, { "epoch": 0.7427562255477722, "grad_norm": 0.02698294259607792, "learning_rate": 7.919455802391439e-06, "loss": 0.002, "step": 43950 }, { "epoch": 0.7429252258266226, "grad_norm": 0.042158737778663635, "learning_rate": 7.918258361949211e-06, "loss": 0.0027, "step": 43960 }, { "epoch": 0.743094226105473, "grad_norm": 0.002662374172359705, "learning_rate": 7.917060667604365e-06, "loss": 0.0015, "step": 43970 }, { "epoch": 0.7432632263843235, "grad_norm": 0.04010889679193497, "learning_rate": 7.91586271946111e-06, "loss": 0.0015, "step": 43980 }, { "epoch": 0.743432226663174, "grad_norm": 0.019668444991111755, "learning_rate": 7.914664517623668e-06, "loss": 0.0023, "step": 43990 }, { "epoch": 0.7436012269420245, "grad_norm": 0.07019349187612534, "learning_rate": 7.913466062196291e-06, "loss": 0.0026, "step": 44000 }, { "epoch": 0.7437702272208749, "grad_norm": 0.05831541866064072, "learning_rate": 7.912267353283251e-06, "loss": 0.0016, "step": 44010 }, { "epoch": 0.7439392274997254, "grad_norm": 0.015604469925165176, "learning_rate": 7.91106839098884e-06, "loss": 0.0017, "step": 44020 }, { "epoch": 0.7441082277785759, "grad_norm": 0.15227645635604858, "learning_rate": 7.909869175417378e-06, "loss": 0.0025, "step": 44030 }, { "epoch": 0.7442772280574262, "grad_norm": 0.12750785052776337, "learning_rate": 7.908669706673197e-06, "loss": 0.0039, "step": 44040 }, { "epoch": 0.7444462283362767, "grad_norm": 0.07931360602378845, "learning_rate": 7.907469984860658e-06, "loss": 0.0019, "step": 44050 }, { "epoch": 0.7446152286151272, "grad_norm": 0.07363370805978775, "learning_rate": 7.906270010084145e-06, "loss": 0.0033, "step": 44060 }, { "epoch": 0.7447842288939777, "grad_norm": 0.01361178420484066, "learning_rate": 7.905069782448059e-06, "loss": 0.0017, "step": 44070 }, { "epoch": 0.7449532291728281, "grad_norm": 0.1368517130613327, "learning_rate": 7.903869302056825e-06, "loss": 0.0021, "step": 44080 }, { "epoch": 0.7451222294516786, "grad_norm": 0.1019308865070343, "learning_rate": 7.902668569014895e-06, "loss": 0.0013, "step": 44090 }, { "epoch": 0.7452912297305291, "grad_norm": 0.10239925980567932, "learning_rate": 7.901467583426735e-06, "loss": 0.0024, "step": 44100 }, { "epoch": 0.7454602300093796, "grad_norm": 0.0578392818570137, "learning_rate": 7.900266345396837e-06, "loss": 0.002, "step": 44110 }, { "epoch": 0.74562923028823, "grad_norm": 0.016515590250492096, "learning_rate": 7.899064855029713e-06, "loss": 0.0008, "step": 44120 }, { "epoch": 0.7457982305670804, "grad_norm": 0.04481057822704315, "learning_rate": 7.897863112429902e-06, "loss": 0.0023, "step": 44130 }, { "epoch": 0.7459672308459309, "grad_norm": 0.01487799547612667, "learning_rate": 7.89666111770196e-06, "loss": 0.0008, "step": 44140 }, { "epoch": 0.7461362311247813, "grad_norm": 0.043098077178001404, "learning_rate": 7.895458870950465e-06, "loss": 0.0021, "step": 44150 }, { "epoch": 0.7463052314036318, "grad_norm": 0.07052203267812729, "learning_rate": 7.89425637228002e-06, "loss": 0.002, "step": 44160 }, { "epoch": 0.7464742316824823, "grad_norm": 0.08470898866653442, "learning_rate": 7.893053621795249e-06, "loss": 0.0019, "step": 44170 }, { "epoch": 0.7466432319613328, "grad_norm": 0.09704260528087616, "learning_rate": 7.891850619600794e-06, "loss": 0.0032, "step": 44180 }, { "epoch": 0.7468122322401832, "grad_norm": 0.07925360649824142, "learning_rate": 7.890647365801325e-06, "loss": 0.0015, "step": 44190 }, { "epoch": 0.7469812325190337, "grad_norm": 0.01802569441497326, "learning_rate": 7.889443860501529e-06, "loss": 0.002, "step": 44200 }, { "epoch": 0.7471502327978841, "grad_norm": 0.032090552151203156, "learning_rate": 7.888240103806117e-06, "loss": 0.0013, "step": 44210 }, { "epoch": 0.7473192330767345, "grad_norm": 0.2693747282028198, "learning_rate": 7.887036095819822e-06, "loss": 0.002, "step": 44220 }, { "epoch": 0.747488233355585, "grad_norm": 0.31655386090278625, "learning_rate": 7.8858318366474e-06, "loss": 0.0022, "step": 44230 }, { "epoch": 0.7476572336344355, "grad_norm": 0.05500679463148117, "learning_rate": 7.884627326393624e-06, "loss": 0.0023, "step": 44240 }, { "epoch": 0.747826233913286, "grad_norm": 0.03424282744526863, "learning_rate": 7.883422565163296e-06, "loss": 0.0014, "step": 44250 }, { "epoch": 0.7479952341921364, "grad_norm": 0.08011564612388611, "learning_rate": 7.882217553061234e-06, "loss": 0.0016, "step": 44260 }, { "epoch": 0.7481642344709869, "grad_norm": 0.08712951093912125, "learning_rate": 7.881012290192279e-06, "loss": 0.0026, "step": 44270 }, { "epoch": 0.7483332347498374, "grad_norm": 0.09755299985408783, "learning_rate": 7.879806776661298e-06, "loss": 0.0023, "step": 44280 }, { "epoch": 0.7485022350286878, "grad_norm": 0.1405857801437378, "learning_rate": 7.878601012573173e-06, "loss": 0.003, "step": 44290 }, { "epoch": 0.7486712353075382, "grad_norm": 0.007532436866313219, "learning_rate": 7.877394998032812e-06, "loss": 0.002, "step": 44300 }, { "epoch": 0.7488402355863887, "grad_norm": 0.08370806276798248, "learning_rate": 7.876188733145147e-06, "loss": 0.0021, "step": 44310 }, { "epoch": 0.7490092358652392, "grad_norm": 0.001437056460417807, "learning_rate": 7.874982218015126e-06, "loss": 0.0012, "step": 44320 }, { "epoch": 0.7491782361440896, "grad_norm": 0.04407551512122154, "learning_rate": 7.87377545274772e-06, "loss": 0.0021, "step": 44330 }, { "epoch": 0.7493472364229401, "grad_norm": 0.010266945697367191, "learning_rate": 7.87256843744793e-06, "loss": 0.0031, "step": 44340 }, { "epoch": 0.7495162367017906, "grad_norm": 0.17960795760154724, "learning_rate": 7.871361172220765e-06, "loss": 0.0016, "step": 44350 }, { "epoch": 0.749685236980641, "grad_norm": 0.038601137697696686, "learning_rate": 7.870153657171267e-06, "loss": 0.0031, "step": 44360 }, { "epoch": 0.7498542372594915, "grad_norm": 0.00812953058630228, "learning_rate": 7.868945892404496e-06, "loss": 0.0017, "step": 44370 }, { "epoch": 0.750023237538342, "grad_norm": 0.036118436604738235, "learning_rate": 7.86773787802553e-06, "loss": 0.0011, "step": 44380 }, { "epoch": 0.7501922378171924, "grad_norm": 0.03733135759830475, "learning_rate": 7.866529614139476e-06, "loss": 0.0012, "step": 44390 }, { "epoch": 0.7503612380960428, "grad_norm": 0.058701254427433014, "learning_rate": 7.865321100851456e-06, "loss": 0.0027, "step": 44400 }, { "epoch": 0.7505302383748933, "grad_norm": 0.05390680581331253, "learning_rate": 7.864112338266617e-06, "loss": 0.0093, "step": 44410 }, { "epoch": 0.7506992386537438, "grad_norm": 0.1075315922498703, "learning_rate": 7.862903326490126e-06, "loss": 0.0019, "step": 44420 }, { "epoch": 0.7508682389325942, "grad_norm": 0.12688098847866058, "learning_rate": 7.861694065627178e-06, "loss": 0.0012, "step": 44430 }, { "epoch": 0.7510372392114447, "grad_norm": 0.030170833691954613, "learning_rate": 7.860484555782977e-06, "loss": 0.0011, "step": 44440 }, { "epoch": 0.7512062394902952, "grad_norm": 0.05321137607097626, "learning_rate": 7.859274797062764e-06, "loss": 0.002, "step": 44450 }, { "epoch": 0.7513752397691457, "grad_norm": 0.052268922328948975, "learning_rate": 7.858064789571787e-06, "loss": 0.0017, "step": 44460 }, { "epoch": 0.751544240047996, "grad_norm": 0.004613368771970272, "learning_rate": 7.856854533415328e-06, "loss": 0.0018, "step": 44470 }, { "epoch": 0.7517132403268465, "grad_norm": 0.029189545661211014, "learning_rate": 7.855644028698683e-06, "loss": 0.002, "step": 44480 }, { "epoch": 0.751882240605697, "grad_norm": 0.09404749423265457, "learning_rate": 7.85443327552717e-06, "loss": 0.0028, "step": 44490 }, { "epoch": 0.7520512408845474, "grad_norm": 0.05537321791052818, "learning_rate": 7.853222274006134e-06, "loss": 0.0017, "step": 44500 }, { "epoch": 0.7522202411633979, "grad_norm": 0.11430644989013672, "learning_rate": 7.852011024240933e-06, "loss": 0.0033, "step": 44510 }, { "epoch": 0.7523892414422484, "grad_norm": 0.059698112308979034, "learning_rate": 7.850799526336956e-06, "loss": 0.0015, "step": 44520 }, { "epoch": 0.7525582417210989, "grad_norm": 0.03818683326244354, "learning_rate": 7.849587780399608e-06, "loss": 0.001, "step": 44530 }, { "epoch": 0.7527272419999493, "grad_norm": 0.12590816617012024, "learning_rate": 7.848375786534316e-06, "loss": 0.0022, "step": 44540 }, { "epoch": 0.7528962422787998, "grad_norm": 0.07889236509799957, "learning_rate": 7.847163544846532e-06, "loss": 0.0021, "step": 44550 }, { "epoch": 0.7530652425576502, "grad_norm": 0.013532519340515137, "learning_rate": 7.845951055441723e-06, "loss": 0.0017, "step": 44560 }, { "epoch": 0.7532342428365006, "grad_norm": 0.08118347078561783, "learning_rate": 7.844738318425384e-06, "loss": 0.0022, "step": 44570 }, { "epoch": 0.7534032431153511, "grad_norm": 0.04712570086121559, "learning_rate": 7.843525333903028e-06, "loss": 0.0017, "step": 44580 }, { "epoch": 0.7535722433942016, "grad_norm": 0.056935735046863556, "learning_rate": 7.842312101980189e-06, "loss": 0.0019, "step": 44590 }, { "epoch": 0.7537412436730521, "grad_norm": 0.35753145813941956, "learning_rate": 7.84109862276243e-06, "loss": 0.0033, "step": 44600 }, { "epoch": 0.7539102439519025, "grad_norm": 0.06886877864599228, "learning_rate": 7.83988489635532e-06, "loss": 0.0021, "step": 44610 }, { "epoch": 0.754079244230753, "grad_norm": 0.05907822027802467, "learning_rate": 7.838670922864468e-06, "loss": 0.0022, "step": 44620 }, { "epoch": 0.7542482445096035, "grad_norm": 0.04054168239235878, "learning_rate": 7.83745670239549e-06, "loss": 0.0041, "step": 44630 }, { "epoch": 0.754417244788454, "grad_norm": 0.027725081890821457, "learning_rate": 7.836242235054033e-06, "loss": 0.0016, "step": 44640 }, { "epoch": 0.7545862450673043, "grad_norm": 0.23544612526893616, "learning_rate": 7.83502752094576e-06, "loss": 0.0014, "step": 44650 }, { "epoch": 0.7547552453461548, "grad_norm": 0.41741564869880676, "learning_rate": 7.833812560176355e-06, "loss": 0.0026, "step": 44660 }, { "epoch": 0.7549242456250053, "grad_norm": 0.08145532011985779, "learning_rate": 7.832597352851527e-06, "loss": 0.0021, "step": 44670 }, { "epoch": 0.7550932459038557, "grad_norm": 0.08561309427022934, "learning_rate": 7.831381899077007e-06, "loss": 0.003, "step": 44680 }, { "epoch": 0.7552622461827062, "grad_norm": 0.09209153056144714, "learning_rate": 7.830166198958543e-06, "loss": 0.0019, "step": 44690 }, { "epoch": 0.7554312464615567, "grad_norm": 0.07907470315694809, "learning_rate": 7.828950252601905e-06, "loss": 0.0027, "step": 44700 }, { "epoch": 0.7556002467404072, "grad_norm": 0.06275120377540588, "learning_rate": 7.827734060112889e-06, "loss": 0.0015, "step": 44710 }, { "epoch": 0.7557692470192576, "grad_norm": 0.07097408175468445, "learning_rate": 7.826517621597309e-06, "loss": 0.0029, "step": 44720 }, { "epoch": 0.755938247298108, "grad_norm": 0.044329412281513214, "learning_rate": 7.825300937161003e-06, "loss": 0.0014, "step": 44730 }, { "epoch": 0.7561072475769585, "grad_norm": 0.026017211377620697, "learning_rate": 7.824084006909827e-06, "loss": 0.0012, "step": 44740 }, { "epoch": 0.7562762478558089, "grad_norm": 0.08053651452064514, "learning_rate": 7.822866830949656e-06, "loss": 0.0026, "step": 44750 }, { "epoch": 0.7564452481346594, "grad_norm": 0.026170331984758377, "learning_rate": 7.821649409386396e-06, "loss": 0.0027, "step": 44760 }, { "epoch": 0.7566142484135099, "grad_norm": 0.034089915454387665, "learning_rate": 7.820431742325967e-06, "loss": 0.0103, "step": 44770 }, { "epoch": 0.7567832486923604, "grad_norm": 0.10103444755077362, "learning_rate": 7.819213829874308e-06, "loss": 0.0023, "step": 44780 }, { "epoch": 0.7569522489712108, "grad_norm": 0.042457886040210724, "learning_rate": 7.817995672137388e-06, "loss": 0.0017, "step": 44790 }, { "epoch": 0.7571212492500613, "grad_norm": 0.07881304621696472, "learning_rate": 7.816777269221192e-06, "loss": 0.0014, "step": 44800 }, { "epoch": 0.7572902495289118, "grad_norm": 0.04411272704601288, "learning_rate": 7.815558621231724e-06, "loss": 0.0042, "step": 44810 }, { "epoch": 0.7574592498077621, "grad_norm": 0.17249134182929993, "learning_rate": 7.814339728275017e-06, "loss": 0.0026, "step": 44820 }, { "epoch": 0.7576282500866126, "grad_norm": 0.2123340666294098, "learning_rate": 7.813120590457116e-06, "loss": 0.0019, "step": 44830 }, { "epoch": 0.7577972503654631, "grad_norm": 0.09830141812562943, "learning_rate": 7.811901207884094e-06, "loss": 0.0017, "step": 44840 }, { "epoch": 0.7579662506443136, "grad_norm": 0.14184986054897308, "learning_rate": 7.810681580662044e-06, "loss": 0.0033, "step": 44850 }, { "epoch": 0.758135250923164, "grad_norm": 0.03080633282661438, "learning_rate": 7.809461708897076e-06, "loss": 0.0011, "step": 44860 }, { "epoch": 0.7583042512020145, "grad_norm": 0.0676361694931984, "learning_rate": 7.80824159269533e-06, "loss": 0.0017, "step": 44870 }, { "epoch": 0.758473251480865, "grad_norm": 0.010642868466675282, "learning_rate": 7.807021232162956e-06, "loss": 0.0008, "step": 44880 }, { "epoch": 0.7586422517597154, "grad_norm": 0.07253513485193253, "learning_rate": 7.805800627406138e-06, "loss": 0.001, "step": 44890 }, { "epoch": 0.7588112520385658, "grad_norm": 0.07711581885814667, "learning_rate": 7.80457977853107e-06, "loss": 0.0021, "step": 44900 }, { "epoch": 0.7589802523174163, "grad_norm": 0.08204604685306549, "learning_rate": 7.80335868564397e-06, "loss": 0.0038, "step": 44910 }, { "epoch": 0.7591492525962668, "grad_norm": 0.02592233009636402, "learning_rate": 7.802137348851084e-06, "loss": 0.0026, "step": 44920 }, { "epoch": 0.7593182528751172, "grad_norm": 0.0777740627527237, "learning_rate": 7.800915768258673e-06, "loss": 0.0028, "step": 44930 }, { "epoch": 0.7594872531539677, "grad_norm": 0.01917082443833351, "learning_rate": 7.799693943973019e-06, "loss": 0.0019, "step": 44940 }, { "epoch": 0.7596562534328182, "grad_norm": 0.08356926590204239, "learning_rate": 7.798471876100423e-06, "loss": 0.0043, "step": 44950 }, { "epoch": 0.7598252537116686, "grad_norm": 0.07758168131113052, "learning_rate": 7.79724956474722e-06, "loss": 0.0024, "step": 44960 }, { "epoch": 0.7599942539905191, "grad_norm": 0.014164702966809273, "learning_rate": 7.796027010019751e-06, "loss": 0.0007, "step": 44970 }, { "epoch": 0.7601632542693696, "grad_norm": 0.13659994304180145, "learning_rate": 7.794804212024383e-06, "loss": 0.0012, "step": 44980 }, { "epoch": 0.76033225454822, "grad_norm": 0.23086796700954437, "learning_rate": 7.793581170867509e-06, "loss": 0.003, "step": 44990 }, { "epoch": 0.7605012548270704, "grad_norm": 0.014240220189094543, "learning_rate": 7.792357886655537e-06, "loss": 0.0017, "step": 45000 }, { "epoch": 0.7606702551059209, "grad_norm": 0.01912778429687023, "learning_rate": 7.791134359494902e-06, "loss": 0.0015, "step": 45010 }, { "epoch": 0.7608392553847714, "grad_norm": 0.05160927772521973, "learning_rate": 7.789910589492052e-06, "loss": 0.0029, "step": 45020 }, { "epoch": 0.7610082556636218, "grad_norm": 0.0658368393778801, "learning_rate": 7.788686576753462e-06, "loss": 0.0014, "step": 45030 }, { "epoch": 0.7611772559424723, "grad_norm": 0.1321316808462143, "learning_rate": 7.78746232138563e-06, "loss": 0.0022, "step": 45040 }, { "epoch": 0.7613462562213228, "grad_norm": 0.03437534719705582, "learning_rate": 7.78623782349507e-06, "loss": 0.0014, "step": 45050 }, { "epoch": 0.7615152565001733, "grad_norm": 0.06349821388721466, "learning_rate": 7.78501308318832e-06, "loss": 0.0023, "step": 45060 }, { "epoch": 0.7616842567790237, "grad_norm": 0.07751069217920303, "learning_rate": 7.783788100571939e-06, "loss": 0.0021, "step": 45070 }, { "epoch": 0.7618532570578741, "grad_norm": 0.02845473401248455, "learning_rate": 7.782562875752504e-06, "loss": 0.0018, "step": 45080 }, { "epoch": 0.7620222573367246, "grad_norm": 0.07686923444271088, "learning_rate": 7.781337408836618e-06, "loss": 0.0017, "step": 45090 }, { "epoch": 0.762191257615575, "grad_norm": 0.04277089238166809, "learning_rate": 7.7801116999309e-06, "loss": 0.0024, "step": 45100 }, { "epoch": 0.7623602578944255, "grad_norm": 0.04295666515827179, "learning_rate": 7.778885749141997e-06, "loss": 0.0014, "step": 45110 }, { "epoch": 0.762529258173276, "grad_norm": 0.04123243689537048, "learning_rate": 7.777659556576567e-06, "loss": 0.0014, "step": 45120 }, { "epoch": 0.7626982584521265, "grad_norm": 0.14625336229801178, "learning_rate": 7.7764331223413e-06, "loss": 0.0022, "step": 45130 }, { "epoch": 0.7628672587309769, "grad_norm": 0.09272245317697525, "learning_rate": 7.775206446542898e-06, "loss": 0.0015, "step": 45140 }, { "epoch": 0.7630362590098274, "grad_norm": 0.03592293709516525, "learning_rate": 7.77397952928809e-06, "loss": 0.0008, "step": 45150 }, { "epoch": 0.7632052592886778, "grad_norm": 0.0790335014462471, "learning_rate": 7.77275237068362e-06, "loss": 0.0014, "step": 45160 }, { "epoch": 0.7633742595675282, "grad_norm": 0.037326548248529434, "learning_rate": 7.771524970836261e-06, "loss": 0.0025, "step": 45170 }, { "epoch": 0.7635432598463787, "grad_norm": 0.08837980777025223, "learning_rate": 7.770297329852801e-06, "loss": 0.0018, "step": 45180 }, { "epoch": 0.7637122601252292, "grad_norm": 0.10220792889595032, "learning_rate": 7.769069447840051e-06, "loss": 0.0025, "step": 45190 }, { "epoch": 0.7638812604040797, "grad_norm": 0.04530085623264313, "learning_rate": 7.767841324904843e-06, "loss": 0.0024, "step": 45200 }, { "epoch": 0.7640502606829301, "grad_norm": 0.1346471905708313, "learning_rate": 7.766612961154029e-06, "loss": 0.003, "step": 45210 }, { "epoch": 0.7642192609617806, "grad_norm": 0.0872393548488617, "learning_rate": 7.765384356694483e-06, "loss": 0.002, "step": 45220 }, { "epoch": 0.7643882612406311, "grad_norm": 0.019787881523370743, "learning_rate": 7.7641555116331e-06, "loss": 0.0019, "step": 45230 }, { "epoch": 0.7645572615194816, "grad_norm": 0.12332823127508163, "learning_rate": 7.762926426076793e-06, "loss": 0.0015, "step": 45240 }, { "epoch": 0.7647262617983319, "grad_norm": 0.02287132292985916, "learning_rate": 7.761697100132502e-06, "loss": 0.002, "step": 45250 }, { "epoch": 0.7648952620771824, "grad_norm": 0.038460344076156616, "learning_rate": 7.760467533907182e-06, "loss": 0.0015, "step": 45260 }, { "epoch": 0.7650642623560329, "grad_norm": 0.07872225344181061, "learning_rate": 7.759237727507811e-06, "loss": 0.0019, "step": 45270 }, { "epoch": 0.7652332626348833, "grad_norm": 0.06950631737709045, "learning_rate": 7.758007681041391e-06, "loss": 0.0018, "step": 45280 }, { "epoch": 0.7654022629137338, "grad_norm": 0.14089111983776093, "learning_rate": 7.756777394614937e-06, "loss": 0.0015, "step": 45290 }, { "epoch": 0.7655712631925843, "grad_norm": 0.02855812944471836, "learning_rate": 7.755546868335494e-06, "loss": 0.0011, "step": 45300 }, { "epoch": 0.7657402634714348, "grad_norm": 0.05885821208357811, "learning_rate": 7.754316102310121e-06, "loss": 0.0012, "step": 45310 }, { "epoch": 0.7659092637502852, "grad_norm": 0.04385798051953316, "learning_rate": 7.753085096645906e-06, "loss": 0.0013, "step": 45320 }, { "epoch": 0.7660782640291357, "grad_norm": 0.04075145721435547, "learning_rate": 7.751853851449947e-06, "loss": 0.0013, "step": 45330 }, { "epoch": 0.7662472643079861, "grad_norm": 0.10517627000808716, "learning_rate": 7.750622366829368e-06, "loss": 0.0022, "step": 45340 }, { "epoch": 0.7664162645868365, "grad_norm": 0.050586577504873276, "learning_rate": 7.749390642891317e-06, "loss": 0.0019, "step": 45350 }, { "epoch": 0.766585264865687, "grad_norm": 0.09203847497701645, "learning_rate": 7.748158679742961e-06, "loss": 0.004, "step": 45360 }, { "epoch": 0.7667542651445375, "grad_norm": 0.09800178557634354, "learning_rate": 7.746926477491481e-06, "loss": 0.0024, "step": 45370 }, { "epoch": 0.766923265423388, "grad_norm": 0.042162712663412094, "learning_rate": 7.74569403624409e-06, "loss": 0.0017, "step": 45380 }, { "epoch": 0.7670922657022384, "grad_norm": 0.06394949555397034, "learning_rate": 7.744461356108016e-06, "loss": 0.0018, "step": 45390 }, { "epoch": 0.7672612659810889, "grad_norm": 0.21785315871238708, "learning_rate": 7.743228437190507e-06, "loss": 0.0023, "step": 45400 }, { "epoch": 0.7674302662599394, "grad_norm": 0.1498011201620102, "learning_rate": 7.741995279598831e-06, "loss": 0.0017, "step": 45410 }, { "epoch": 0.7675992665387897, "grad_norm": 0.03804841265082359, "learning_rate": 7.74076188344028e-06, "loss": 0.0031, "step": 45420 }, { "epoch": 0.7677682668176402, "grad_norm": 0.03907634690403938, "learning_rate": 7.739528248822168e-06, "loss": 0.0018, "step": 45430 }, { "epoch": 0.7679372670964907, "grad_norm": 0.03149436414241791, "learning_rate": 7.738294375851823e-06, "loss": 0.0016, "step": 45440 }, { "epoch": 0.7681062673753412, "grad_norm": 0.05602965131402016, "learning_rate": 7.737060264636601e-06, "loss": 0.0011, "step": 45450 }, { "epoch": 0.7682752676541916, "grad_norm": 0.045581117272377014, "learning_rate": 7.735825915283874e-06, "loss": 0.0024, "step": 45460 }, { "epoch": 0.7684442679330421, "grad_norm": 0.17535948753356934, "learning_rate": 7.734591327901037e-06, "loss": 0.0015, "step": 45470 }, { "epoch": 0.7686132682118926, "grad_norm": 0.06803618371486664, "learning_rate": 7.733356502595506e-06, "loss": 0.002, "step": 45480 }, { "epoch": 0.768782268490743, "grad_norm": 0.13037042319774628, "learning_rate": 7.732121439474716e-06, "loss": 0.0021, "step": 45490 }, { "epoch": 0.7689512687695935, "grad_norm": 0.048493217676877975, "learning_rate": 7.730886138646121e-06, "loss": 0.0013, "step": 45500 }, { "epoch": 0.7691202690484439, "grad_norm": 0.07940898835659027, "learning_rate": 7.729650600217204e-06, "loss": 0.0019, "step": 45510 }, { "epoch": 0.7692892693272944, "grad_norm": 0.16000744700431824, "learning_rate": 7.728414824295456e-06, "loss": 0.0046, "step": 45520 }, { "epoch": 0.7694582696061448, "grad_norm": 0.10460644215345383, "learning_rate": 7.7271788109884e-06, "loss": 0.0064, "step": 45530 }, { "epoch": 0.7696272698849953, "grad_norm": 0.16053324937820435, "learning_rate": 7.725942560403574e-06, "loss": 0.0022, "step": 45540 }, { "epoch": 0.7697962701638458, "grad_norm": 0.024782394990324974, "learning_rate": 7.724706072648537e-06, "loss": 0.0029, "step": 45550 }, { "epoch": 0.7699652704426962, "grad_norm": 0.03775293380022049, "learning_rate": 7.723469347830871e-06, "loss": 0.0029, "step": 45560 }, { "epoch": 0.7701342707215467, "grad_norm": 0.05658729374408722, "learning_rate": 7.722232386058178e-06, "loss": 0.0029, "step": 45570 }, { "epoch": 0.7703032710003972, "grad_norm": 0.3801966905593872, "learning_rate": 7.720995187438077e-06, "loss": 0.0057, "step": 45580 }, { "epoch": 0.7704722712792476, "grad_norm": 0.055622778832912445, "learning_rate": 7.71975775207821e-06, "loss": 0.004, "step": 45590 }, { "epoch": 0.770641271558098, "grad_norm": 0.06273094564676285, "learning_rate": 7.71852008008624e-06, "loss": 0.0015, "step": 45600 }, { "epoch": 0.7708102718369485, "grad_norm": 0.048402734100818634, "learning_rate": 7.717282171569853e-06, "loss": 0.0027, "step": 45610 }, { "epoch": 0.770979272115799, "grad_norm": 0.16744104027748108, "learning_rate": 7.716044026636753e-06, "loss": 0.0022, "step": 45620 }, { "epoch": 0.7711482723946494, "grad_norm": 0.11811065673828125, "learning_rate": 7.71480564539466e-06, "loss": 0.0011, "step": 45630 }, { "epoch": 0.7713172726734999, "grad_norm": 0.04153404012322426, "learning_rate": 7.713567027951325e-06, "loss": 0.0019, "step": 45640 }, { "epoch": 0.7714862729523504, "grad_norm": 0.1193832978606224, "learning_rate": 7.71232817441451e-06, "loss": 0.0013, "step": 45650 }, { "epoch": 0.7716552732312009, "grad_norm": 0.06552287191152573, "learning_rate": 7.711089084892001e-06, "loss": 0.0013, "step": 45660 }, { "epoch": 0.7718242735100513, "grad_norm": 0.0960426852107048, "learning_rate": 7.709849759491607e-06, "loss": 0.0034, "step": 45670 }, { "epoch": 0.7719932737889017, "grad_norm": 0.0046477243304252625, "learning_rate": 7.708610198321155e-06, "loss": 0.0012, "step": 45680 }, { "epoch": 0.7721622740677522, "grad_norm": 0.08189505338668823, "learning_rate": 7.707370401488494e-06, "loss": 0.0017, "step": 45690 }, { "epoch": 0.7723312743466026, "grad_norm": 0.14401870965957642, "learning_rate": 7.706130369101487e-06, "loss": 0.0036, "step": 45700 }, { "epoch": 0.7725002746254531, "grad_norm": 0.09131434559822083, "learning_rate": 7.704890101268028e-06, "loss": 0.0031, "step": 45710 }, { "epoch": 0.7726692749043036, "grad_norm": 0.06472761929035187, "learning_rate": 7.703649598096025e-06, "loss": 0.0014, "step": 45720 }, { "epoch": 0.7728382751831541, "grad_norm": 0.0034629059955477715, "learning_rate": 7.702408859693406e-06, "loss": 0.0018, "step": 45730 }, { "epoch": 0.7730072754620045, "grad_norm": 0.03553701192140579, "learning_rate": 7.701167886168124e-06, "loss": 0.0013, "step": 45740 }, { "epoch": 0.773176275740855, "grad_norm": 0.06160648912191391, "learning_rate": 7.699926677628148e-06, "loss": 0.0024, "step": 45750 }, { "epoch": 0.7733452760197055, "grad_norm": 0.037900350987911224, "learning_rate": 7.698685234181471e-06, "loss": 0.0022, "step": 45760 }, { "epoch": 0.7735142762985558, "grad_norm": 0.10797583311796188, "learning_rate": 7.697443555936102e-06, "loss": 0.0018, "step": 45770 }, { "epoch": 0.7736832765774063, "grad_norm": 0.3845529854297638, "learning_rate": 7.696201643000075e-06, "loss": 0.0023, "step": 45780 }, { "epoch": 0.7738522768562568, "grad_norm": 0.043602894991636276, "learning_rate": 7.69495949548144e-06, "loss": 0.0013, "step": 45790 }, { "epoch": 0.7740212771351073, "grad_norm": 0.05094465613365173, "learning_rate": 7.693717113488273e-06, "loss": 0.0016, "step": 45800 }, { "epoch": 0.7741902774139577, "grad_norm": 0.03464687988162041, "learning_rate": 7.692474497128665e-06, "loss": 0.002, "step": 45810 }, { "epoch": 0.7743592776928082, "grad_norm": 0.032336119562387466, "learning_rate": 7.691231646510731e-06, "loss": 0.0009, "step": 45820 }, { "epoch": 0.7745282779716587, "grad_norm": 0.12466021627187729, "learning_rate": 7.689988561742603e-06, "loss": 0.0013, "step": 45830 }, { "epoch": 0.7746972782505092, "grad_norm": 0.010928106494247913, "learning_rate": 7.688745242932439e-06, "loss": 0.002, "step": 45840 }, { "epoch": 0.7748662785293595, "grad_norm": 0.049774374812841415, "learning_rate": 7.687501690188409e-06, "loss": 0.0016, "step": 45850 }, { "epoch": 0.77503527880821, "grad_norm": 0.14102478325366974, "learning_rate": 7.686257903618713e-06, "loss": 0.0036, "step": 45860 }, { "epoch": 0.7752042790870605, "grad_norm": 0.06147325783967972, "learning_rate": 7.685013883331562e-06, "loss": 0.002, "step": 45870 }, { "epoch": 0.7753732793659109, "grad_norm": 0.0031127941329032183, "learning_rate": 7.683769629435195e-06, "loss": 0.0016, "step": 45880 }, { "epoch": 0.7755422796447614, "grad_norm": 0.1135169044137001, "learning_rate": 7.682525142037869e-06, "loss": 0.0025, "step": 45890 }, { "epoch": 0.7757112799236119, "grad_norm": 0.07466382533311844, "learning_rate": 7.681280421247856e-06, "loss": 0.0015, "step": 45900 }, { "epoch": 0.7758802802024624, "grad_norm": 0.3025516867637634, "learning_rate": 7.680035467173456e-06, "loss": 0.0017, "step": 45910 }, { "epoch": 0.7760492804813128, "grad_norm": 0.07543689012527466, "learning_rate": 7.678790279922987e-06, "loss": 0.003, "step": 45920 }, { "epoch": 0.7762182807601633, "grad_norm": 0.060232553631067276, "learning_rate": 7.677544859604782e-06, "loss": 0.0027, "step": 45930 }, { "epoch": 0.7763872810390137, "grad_norm": 0.09006506949663162, "learning_rate": 7.676299206327202e-06, "loss": 0.0014, "step": 45940 }, { "epoch": 0.7765562813178641, "grad_norm": 0.21766416728496552, "learning_rate": 7.675053320198624e-06, "loss": 0.0011, "step": 45950 }, { "epoch": 0.7767252815967146, "grad_norm": 0.07666429877281189, "learning_rate": 7.673807201327448e-06, "loss": 0.0026, "step": 45960 }, { "epoch": 0.7768942818755651, "grad_norm": 0.07516634464263916, "learning_rate": 7.672560849822088e-06, "loss": 0.0011, "step": 45970 }, { "epoch": 0.7770632821544156, "grad_norm": 0.0709579735994339, "learning_rate": 7.671314265790987e-06, "loss": 0.0008, "step": 45980 }, { "epoch": 0.777232282433266, "grad_norm": 0.06900666654109955, "learning_rate": 7.670067449342602e-06, "loss": 0.0011, "step": 45990 }, { "epoch": 0.7774012827121165, "grad_norm": 0.04497666656970978, "learning_rate": 7.668820400585411e-06, "loss": 0.0018, "step": 46000 }, { "epoch": 0.777570282990967, "grad_norm": 0.03760146722197533, "learning_rate": 7.667573119627915e-06, "loss": 0.0047, "step": 46010 }, { "epoch": 0.7777392832698174, "grad_norm": 0.038645707070827484, "learning_rate": 7.666325606578633e-06, "loss": 0.0023, "step": 46020 }, { "epoch": 0.7779082835486678, "grad_norm": 0.014442669227719307, "learning_rate": 7.665077861546104e-06, "loss": 0.0021, "step": 46030 }, { "epoch": 0.7780772838275183, "grad_norm": 0.05108046531677246, "learning_rate": 7.66382988463889e-06, "loss": 0.0013, "step": 46040 }, { "epoch": 0.7782462841063688, "grad_norm": 0.07995917648077011, "learning_rate": 7.662581675965568e-06, "loss": 0.0015, "step": 46050 }, { "epoch": 0.7784152843852192, "grad_norm": 0.09550514817237854, "learning_rate": 7.66133323563474e-06, "loss": 0.0023, "step": 46060 }, { "epoch": 0.7785842846640697, "grad_norm": 0.08962764590978622, "learning_rate": 7.660084563755025e-06, "loss": 0.0022, "step": 46070 }, { "epoch": 0.7787532849429202, "grad_norm": 0.008625727146863937, "learning_rate": 7.658835660435066e-06, "loss": 0.0012, "step": 46080 }, { "epoch": 0.7789222852217706, "grad_norm": 0.023746177554130554, "learning_rate": 7.657586525783523e-06, "loss": 0.0009, "step": 46090 }, { "epoch": 0.7790912855006211, "grad_norm": 0.14935356378555298, "learning_rate": 7.656337159909074e-06, "loss": 0.0025, "step": 46100 }, { "epoch": 0.7792602857794715, "grad_norm": 0.10728771239519119, "learning_rate": 7.655087562920423e-06, "loss": 0.0022, "step": 46110 }, { "epoch": 0.779429286058322, "grad_norm": 0.06259474903345108, "learning_rate": 7.65383773492629e-06, "loss": 0.0021, "step": 46120 }, { "epoch": 0.7795982863371724, "grad_norm": 0.0671854317188263, "learning_rate": 7.652587676035414e-06, "loss": 0.0019, "step": 46130 }, { "epoch": 0.7797672866160229, "grad_norm": 0.21874253451824188, "learning_rate": 7.65133738635656e-06, "loss": 0.0025, "step": 46140 }, { "epoch": 0.7799362868948734, "grad_norm": 0.0705648735165596, "learning_rate": 7.650086865998508e-06, "loss": 0.0022, "step": 46150 }, { "epoch": 0.7801052871737238, "grad_norm": 0.10782632231712341, "learning_rate": 7.648836115070057e-06, "loss": 0.0015, "step": 46160 }, { "epoch": 0.7802742874525743, "grad_norm": 0.002489664824679494, "learning_rate": 7.64758513368003e-06, "loss": 0.0009, "step": 46170 }, { "epoch": 0.7804432877314248, "grad_norm": 0.08178409188985825, "learning_rate": 7.646333921937269e-06, "loss": 0.0018, "step": 46180 }, { "epoch": 0.7806122880102753, "grad_norm": 0.11821407824754715, "learning_rate": 7.645082479950635e-06, "loss": 0.0033, "step": 46190 }, { "epoch": 0.7807812882891256, "grad_norm": 0.019284335896372795, "learning_rate": 7.64383080782901e-06, "loss": 0.0016, "step": 46200 }, { "epoch": 0.7809502885679761, "grad_norm": 0.32529857754707336, "learning_rate": 7.642578905681295e-06, "loss": 0.0022, "step": 46210 }, { "epoch": 0.7811192888468266, "grad_norm": 0.0022389700170606375, "learning_rate": 7.641326773616411e-06, "loss": 0.0017, "step": 46220 }, { "epoch": 0.781288289125677, "grad_norm": 0.08486919850111008, "learning_rate": 7.6400744117433e-06, "loss": 0.0019, "step": 46230 }, { "epoch": 0.7814572894045275, "grad_norm": 0.16006088256835938, "learning_rate": 7.638821820170925e-06, "loss": 0.0026, "step": 46240 }, { "epoch": 0.781626289683378, "grad_norm": 0.060759540647268295, "learning_rate": 7.637568999008265e-06, "loss": 0.0017, "step": 46250 }, { "epoch": 0.7817952899622285, "grad_norm": 0.07992040365934372, "learning_rate": 7.636315948364323e-06, "loss": 0.0009, "step": 46260 }, { "epoch": 0.7819642902410789, "grad_norm": 0.03660142421722412, "learning_rate": 7.635062668348122e-06, "loss": 0.001, "step": 46270 }, { "epoch": 0.7821332905199293, "grad_norm": 0.03165186569094658, "learning_rate": 7.633809159068699e-06, "loss": 0.0013, "step": 46280 }, { "epoch": 0.7823022907987798, "grad_norm": 0.07846418023109436, "learning_rate": 7.63255542063512e-06, "loss": 0.0019, "step": 46290 }, { "epoch": 0.7824712910776302, "grad_norm": 0.23081886768341064, "learning_rate": 7.631301453156464e-06, "loss": 0.0024, "step": 46300 }, { "epoch": 0.7826402913564807, "grad_norm": 0.09403436630964279, "learning_rate": 7.63004725674183e-06, "loss": 0.0013, "step": 46310 }, { "epoch": 0.7828092916353312, "grad_norm": 0.06292977929115295, "learning_rate": 7.628792831500345e-06, "loss": 0.0013, "step": 46320 }, { "epoch": 0.7829782919141817, "grad_norm": 0.0667533427476883, "learning_rate": 7.627538177541145e-06, "loss": 0.0016, "step": 46330 }, { "epoch": 0.7831472921930321, "grad_norm": 0.04467438533902168, "learning_rate": 7.626283294973394e-06, "loss": 0.0008, "step": 46340 }, { "epoch": 0.7833162924718826, "grad_norm": 0.045477494597435, "learning_rate": 7.625028183906272e-06, "loss": 0.0017, "step": 46350 }, { "epoch": 0.7834852927507331, "grad_norm": 0.08229431509971619, "learning_rate": 7.62377284444898e-06, "loss": 0.002, "step": 46360 }, { "epoch": 0.7836542930295834, "grad_norm": 0.16802437603473663, "learning_rate": 7.622517276710737e-06, "loss": 0.0022, "step": 46370 }, { "epoch": 0.7838232933084339, "grad_norm": 0.05041522905230522, "learning_rate": 7.6212614808007835e-06, "loss": 0.0025, "step": 46380 }, { "epoch": 0.7839922935872844, "grad_norm": 0.03219315782189369, "learning_rate": 7.620005456828383e-06, "loss": 0.0038, "step": 46390 }, { "epoch": 0.7841612938661349, "grad_norm": 0.05192848667502403, "learning_rate": 7.618749204902812e-06, "loss": 0.0015, "step": 46400 }, { "epoch": 0.7843302941449853, "grad_norm": 0.04018478840589523, "learning_rate": 7.617492725133372e-06, "loss": 0.0015, "step": 46410 }, { "epoch": 0.7844992944238358, "grad_norm": 0.016311295330524445, "learning_rate": 7.616236017629383e-06, "loss": 0.0022, "step": 46420 }, { "epoch": 0.7846682947026863, "grad_norm": 0.04850300773978233, "learning_rate": 7.614979082500185e-06, "loss": 0.0021, "step": 46430 }, { "epoch": 0.7848372949815368, "grad_norm": 0.013393869623541832, "learning_rate": 7.613721919855137e-06, "loss": 0.0023, "step": 46440 }, { "epoch": 0.7850062952603872, "grad_norm": 0.10269208252429962, "learning_rate": 7.612464529803618e-06, "loss": 0.0013, "step": 46450 }, { "epoch": 0.7851752955392376, "grad_norm": 0.3491341173648834, "learning_rate": 7.611206912455027e-06, "loss": 0.0014, "step": 46460 }, { "epoch": 0.7853442958180881, "grad_norm": 0.03559425100684166, "learning_rate": 7.609949067918785e-06, "loss": 0.0015, "step": 46470 }, { "epoch": 0.7855132960969385, "grad_norm": 0.07604513317346573, "learning_rate": 7.608690996304327e-06, "loss": 0.0024, "step": 46480 }, { "epoch": 0.785682296375789, "grad_norm": 0.1284206509590149, "learning_rate": 7.607432697721112e-06, "loss": 0.003, "step": 46490 }, { "epoch": 0.7858512966546395, "grad_norm": 0.06238522008061409, "learning_rate": 7.60617417227862e-06, "loss": 0.001, "step": 46500 }, { "epoch": 0.78602029693349, "grad_norm": 0.045297060161828995, "learning_rate": 7.604915420086348e-06, "loss": 0.0012, "step": 46510 }, { "epoch": 0.7861892972123404, "grad_norm": 0.020996225997805595, "learning_rate": 7.603656441253811e-06, "loss": 0.0012, "step": 46520 }, { "epoch": 0.7863582974911909, "grad_norm": 0.01850290037691593, "learning_rate": 7.602397235890551e-06, "loss": 0.0014, "step": 46530 }, { "epoch": 0.7865272977700413, "grad_norm": 0.06593858450651169, "learning_rate": 7.6011378041061225e-06, "loss": 0.0017, "step": 46540 }, { "epoch": 0.7866962980488917, "grad_norm": 0.08187979459762573, "learning_rate": 7.5998781460101026e-06, "loss": 0.0026, "step": 46550 }, { "epoch": 0.7868652983277422, "grad_norm": 0.12238030880689621, "learning_rate": 7.598618261712087e-06, "loss": 0.0015, "step": 46560 }, { "epoch": 0.7870342986065927, "grad_norm": 0.05987643823027611, "learning_rate": 7.59735815132169e-06, "loss": 0.0028, "step": 46570 }, { "epoch": 0.7872032988854432, "grad_norm": 0.09325841069221497, "learning_rate": 7.596097814948551e-06, "loss": 0.0019, "step": 46580 }, { "epoch": 0.7873722991642936, "grad_norm": 0.05478665232658386, "learning_rate": 7.594837252702324e-06, "loss": 0.0014, "step": 46590 }, { "epoch": 0.7875412994431441, "grad_norm": 0.07214100658893585, "learning_rate": 7.593576464692684e-06, "loss": 0.0015, "step": 46600 }, { "epoch": 0.7877102997219946, "grad_norm": 0.15464851260185242, "learning_rate": 7.592315451029324e-06, "loss": 0.0026, "step": 46610 }, { "epoch": 0.787879300000845, "grad_norm": 0.0955459251999855, "learning_rate": 7.59105421182196e-06, "loss": 0.0024, "step": 46620 }, { "epoch": 0.7880483002796954, "grad_norm": 0.0869637057185173, "learning_rate": 7.589792747180327e-06, "loss": 0.0017, "step": 46630 }, { "epoch": 0.7882173005585459, "grad_norm": 0.04810710251331329, "learning_rate": 7.588531057214175e-06, "loss": 0.0014, "step": 46640 }, { "epoch": 0.7883863008373964, "grad_norm": 0.02471080981194973, "learning_rate": 7.587269142033281e-06, "loss": 0.0025, "step": 46650 }, { "epoch": 0.7885553011162468, "grad_norm": 0.02822282165288925, "learning_rate": 7.586007001747436e-06, "loss": 0.0021, "step": 46660 }, { "epoch": 0.7887243013950973, "grad_norm": 0.060560278594493866, "learning_rate": 7.584744636466453e-06, "loss": 0.0019, "step": 46670 }, { "epoch": 0.7888933016739478, "grad_norm": 0.04017733037471771, "learning_rate": 7.583482046300161e-06, "loss": 0.0013, "step": 46680 }, { "epoch": 0.7890623019527983, "grad_norm": 0.0022847475484013557, "learning_rate": 7.582219231358415e-06, "loss": 0.0014, "step": 46690 }, { "epoch": 0.7892313022316487, "grad_norm": 0.006776855327188969, "learning_rate": 7.580956191751084e-06, "loss": 0.0014, "step": 46700 }, { "epoch": 0.7894003025104992, "grad_norm": 0.06477966159582138, "learning_rate": 7.57969292758806e-06, "loss": 0.0013, "step": 46710 }, { "epoch": 0.7895693027893496, "grad_norm": 0.14177727699279785, "learning_rate": 7.5784294389792535e-06, "loss": 0.001, "step": 46720 }, { "epoch": 0.7897383030682, "grad_norm": 0.06468930095434189, "learning_rate": 7.5771657260345924e-06, "loss": 0.0015, "step": 46730 }, { "epoch": 0.7899073033470505, "grad_norm": 0.08887003362178802, "learning_rate": 7.575901788864025e-06, "loss": 0.0019, "step": 46740 }, { "epoch": 0.790076303625901, "grad_norm": 0.07502151280641556, "learning_rate": 7.574637627577524e-06, "loss": 0.0016, "step": 46750 }, { "epoch": 0.7902453039047515, "grad_norm": 0.08747242391109467, "learning_rate": 7.573373242285073e-06, "loss": 0.0006, "step": 46760 }, { "epoch": 0.7904143041836019, "grad_norm": 0.016262156888842583, "learning_rate": 7.572108633096682e-06, "loss": 0.0033, "step": 46770 }, { "epoch": 0.7905833044624524, "grad_norm": 0.02630412019789219, "learning_rate": 7.570843800122377e-06, "loss": 0.0014, "step": 46780 }, { "epoch": 0.7907523047413029, "grad_norm": 0.04608182981610298, "learning_rate": 7.569578743472206e-06, "loss": 0.0019, "step": 46790 }, { "epoch": 0.7909213050201532, "grad_norm": 0.16504621505737305, "learning_rate": 7.568313463256234e-06, "loss": 0.0017, "step": 46800 }, { "epoch": 0.7910903052990037, "grad_norm": 0.050347063690423965, "learning_rate": 7.567047959584548e-06, "loss": 0.0014, "step": 46810 }, { "epoch": 0.7912593055778542, "grad_norm": 0.09296340495347977, "learning_rate": 7.565782232567252e-06, "loss": 0.0012, "step": 46820 }, { "epoch": 0.7914283058567047, "grad_norm": 0.059468045830726624, "learning_rate": 7.564516282314469e-06, "loss": 0.0016, "step": 46830 }, { "epoch": 0.7915973061355551, "grad_norm": 0.06091529503464699, "learning_rate": 7.563250108936344e-06, "loss": 0.0016, "step": 46840 }, { "epoch": 0.7917663064144056, "grad_norm": 0.023602819070219994, "learning_rate": 7.561983712543042e-06, "loss": 0.0012, "step": 46850 }, { "epoch": 0.7919353066932561, "grad_norm": 0.07649361342191696, "learning_rate": 7.560717093244743e-06, "loss": 0.0014, "step": 46860 }, { "epoch": 0.7921043069721065, "grad_norm": 0.37941059470176697, "learning_rate": 7.55945025115165e-06, "loss": 0.0029, "step": 46870 }, { "epoch": 0.792273307250957, "grad_norm": 0.046618007123470306, "learning_rate": 7.558183186373984e-06, "loss": 0.0019, "step": 46880 }, { "epoch": 0.7924423075298074, "grad_norm": 0.17688718438148499, "learning_rate": 7.556915899021986e-06, "loss": 0.0049, "step": 46890 }, { "epoch": 0.7926113078086579, "grad_norm": 0.02750130370259285, "learning_rate": 7.5556483892059165e-06, "loss": 0.0022, "step": 46900 }, { "epoch": 0.7927803080875083, "grad_norm": 0.14209090173244476, "learning_rate": 7.5543806570360545e-06, "loss": 0.0025, "step": 46910 }, { "epoch": 0.7929493083663588, "grad_norm": 0.06824956834316254, "learning_rate": 7.5531127026227e-06, "loss": 0.0023, "step": 46920 }, { "epoch": 0.7931183086452093, "grad_norm": 0.14599743485450745, "learning_rate": 7.551844526076169e-06, "loss": 0.0023, "step": 46930 }, { "epoch": 0.7932873089240597, "grad_norm": 0.06734983623027802, "learning_rate": 7.5505761275068015e-06, "loss": 0.0014, "step": 46940 }, { "epoch": 0.7934563092029102, "grad_norm": 0.028400149196386337, "learning_rate": 7.549307507024952e-06, "loss": 0.0029, "step": 46950 }, { "epoch": 0.7936253094817607, "grad_norm": 0.15007208287715912, "learning_rate": 7.548038664740999e-06, "loss": 0.0053, "step": 46960 }, { "epoch": 0.793794309760611, "grad_norm": 0.04033247381448746, "learning_rate": 7.546769600765336e-06, "loss": 0.0016, "step": 46970 }, { "epoch": 0.7939633100394615, "grad_norm": 0.030600406229496002, "learning_rate": 7.545500315208377e-06, "loss": 0.0015, "step": 46980 }, { "epoch": 0.794132310318312, "grad_norm": 0.11937287449836731, "learning_rate": 7.544230808180559e-06, "loss": 0.002, "step": 46990 }, { "epoch": 0.7943013105971625, "grad_norm": 0.014048217795789242, "learning_rate": 7.542961079792333e-06, "loss": 0.002, "step": 47000 }, { "epoch": 0.7944703108760129, "grad_norm": 0.05937180668115616, "learning_rate": 7.541691130154172e-06, "loss": 0.0016, "step": 47010 }, { "epoch": 0.7946393111548634, "grad_norm": 0.04418695718050003, "learning_rate": 7.540420959376569e-06, "loss": 0.0006, "step": 47020 }, { "epoch": 0.7948083114337139, "grad_norm": 0.021853217855095863, "learning_rate": 7.5391505675700325e-06, "loss": 0.0025, "step": 47030 }, { "epoch": 0.7949773117125644, "grad_norm": 0.08879373222589493, "learning_rate": 7.537879954845095e-06, "loss": 0.0018, "step": 47040 }, { "epoch": 0.7951463119914148, "grad_norm": 0.0371592752635479, "learning_rate": 7.536609121312305e-06, "loss": 0.001, "step": 47050 }, { "epoch": 0.7953153122702652, "grad_norm": 0.02704356424510479, "learning_rate": 7.5353380670822314e-06, "loss": 0.0009, "step": 47060 }, { "epoch": 0.7954843125491157, "grad_norm": 0.16344471275806427, "learning_rate": 7.534066792265461e-06, "loss": 0.0016, "step": 47070 }, { "epoch": 0.7956533128279661, "grad_norm": 0.0721210241317749, "learning_rate": 7.532795296972602e-06, "loss": 0.0023, "step": 47080 }, { "epoch": 0.7958223131068166, "grad_norm": 0.12913252413272858, "learning_rate": 7.53152358131428e-06, "loss": 0.0012, "step": 47090 }, { "epoch": 0.7959913133856671, "grad_norm": 0.036034420132637024, "learning_rate": 7.530251645401143e-06, "loss": 0.0021, "step": 47100 }, { "epoch": 0.7961603136645176, "grad_norm": 0.22235152125358582, "learning_rate": 7.528979489343853e-06, "loss": 0.0028, "step": 47110 }, { "epoch": 0.796329313943368, "grad_norm": 0.06673062592744827, "learning_rate": 7.527707113253093e-06, "loss": 0.0019, "step": 47120 }, { "epoch": 0.7964983142222185, "grad_norm": 0.13184094429016113, "learning_rate": 7.526434517239568e-06, "loss": 0.0031, "step": 47130 }, { "epoch": 0.796667314501069, "grad_norm": 0.05706809088587761, "learning_rate": 7.525161701413999e-06, "loss": 0.0017, "step": 47140 }, { "epoch": 0.7968363147799193, "grad_norm": 0.16983768343925476, "learning_rate": 7.523888665887127e-06, "loss": 0.0017, "step": 47150 }, { "epoch": 0.7970053150587698, "grad_norm": 0.03151121735572815, "learning_rate": 7.522615410769714e-06, "loss": 0.0015, "step": 47160 }, { "epoch": 0.7971743153376203, "grad_norm": 0.11783250421285629, "learning_rate": 7.521341936172536e-06, "loss": 0.0034, "step": 47170 }, { "epoch": 0.7973433156164708, "grad_norm": 0.016078723594546318, "learning_rate": 7.520068242206393e-06, "loss": 0.0012, "step": 47180 }, { "epoch": 0.7975123158953212, "grad_norm": 0.0879022553563118, "learning_rate": 7.518794328982104e-06, "loss": 0.0017, "step": 47190 }, { "epoch": 0.7976813161741717, "grad_norm": 0.05336923897266388, "learning_rate": 7.5175201966105045e-06, "loss": 0.0009, "step": 47200 }, { "epoch": 0.7978503164530222, "grad_norm": 0.06102690473198891, "learning_rate": 7.516245845202451e-06, "loss": 0.0013, "step": 47210 }, { "epoch": 0.7980193167318727, "grad_norm": 0.0765201672911644, "learning_rate": 7.514971274868817e-06, "loss": 0.0016, "step": 47220 }, { "epoch": 0.798188317010723, "grad_norm": 0.023189343512058258, "learning_rate": 7.513696485720496e-06, "loss": 0.0015, "step": 47230 }, { "epoch": 0.7983573172895735, "grad_norm": 0.081252820789814, "learning_rate": 7.512421477868402e-06, "loss": 0.003, "step": 47240 }, { "epoch": 0.798526317568424, "grad_norm": 0.027922332286834717, "learning_rate": 7.511146251423467e-06, "loss": 0.0015, "step": 47250 }, { "epoch": 0.7986953178472744, "grad_norm": 0.05592355132102966, "learning_rate": 7.509870806496642e-06, "loss": 0.0024, "step": 47260 }, { "epoch": 0.7988643181261249, "grad_norm": 0.03578708693385124, "learning_rate": 7.508595143198894e-06, "loss": 0.0012, "step": 47270 }, { "epoch": 0.7990333184049754, "grad_norm": 0.04387123882770538, "learning_rate": 7.507319261641215e-06, "loss": 0.0038, "step": 47280 }, { "epoch": 0.7992023186838259, "grad_norm": 0.0392196886241436, "learning_rate": 7.506043161934613e-06, "loss": 0.001, "step": 47290 }, { "epoch": 0.7993713189626763, "grad_norm": 0.09163472056388855, "learning_rate": 7.5047668441901124e-06, "loss": 0.001, "step": 47300 }, { "epoch": 0.7995403192415268, "grad_norm": 0.059991274029016495, "learning_rate": 7.503490308518761e-06, "loss": 0.0011, "step": 47310 }, { "epoch": 0.7997093195203772, "grad_norm": 0.045976802706718445, "learning_rate": 7.502213555031623e-06, "loss": 0.0046, "step": 47320 }, { "epoch": 0.7998783197992276, "grad_norm": 0.03616219386458397, "learning_rate": 7.500936583839782e-06, "loss": 0.0027, "step": 47330 }, { "epoch": 0.8000473200780781, "grad_norm": 0.03759624436497688, "learning_rate": 7.499659395054342e-06, "loss": 0.0025, "step": 47340 }, { "epoch": 0.8002163203569286, "grad_norm": 0.07205487787723541, "learning_rate": 7.498381988786423e-06, "loss": 0.0014, "step": 47350 }, { "epoch": 0.800385320635779, "grad_norm": 0.02757079340517521, "learning_rate": 7.4971043651471654e-06, "loss": 0.0017, "step": 47360 }, { "epoch": 0.8005543209146295, "grad_norm": 0.06361888349056244, "learning_rate": 7.49582652424773e-06, "loss": 0.0022, "step": 47370 }, { "epoch": 0.80072332119348, "grad_norm": 0.054886069148778915, "learning_rate": 7.494548466199294e-06, "loss": 0.0013, "step": 47380 }, { "epoch": 0.8008923214723305, "grad_norm": 0.08569411933422089, "learning_rate": 7.493270191113054e-06, "loss": 0.0017, "step": 47390 }, { "epoch": 0.8010613217511809, "grad_norm": 0.02172980271279812, "learning_rate": 7.4919916991002295e-06, "loss": 0.0011, "step": 47400 }, { "epoch": 0.8012303220300313, "grad_norm": 0.048847027122974396, "learning_rate": 7.490712990272052e-06, "loss": 0.0014, "step": 47410 }, { "epoch": 0.8013993223088818, "grad_norm": 0.21808849275112152, "learning_rate": 7.489434064739777e-06, "loss": 0.0034, "step": 47420 }, { "epoch": 0.8015683225877323, "grad_norm": 0.025194497779011726, "learning_rate": 7.488154922614677e-06, "loss": 0.0017, "step": 47430 }, { "epoch": 0.8017373228665827, "grad_norm": 0.008024209178984165, "learning_rate": 7.486875564008043e-06, "loss": 0.0015, "step": 47440 }, { "epoch": 0.8019063231454332, "grad_norm": 0.009351897984743118, "learning_rate": 7.485595989031186e-06, "loss": 0.0009, "step": 47450 }, { "epoch": 0.8020753234242837, "grad_norm": 0.06636117398738861, "learning_rate": 7.484316197795434e-06, "loss": 0.0008, "step": 47460 }, { "epoch": 0.8022443237031341, "grad_norm": 0.029118984937667847, "learning_rate": 7.483036190412136e-06, "loss": 0.0014, "step": 47470 }, { "epoch": 0.8024133239819846, "grad_norm": 0.14868061244487762, "learning_rate": 7.48175596699266e-06, "loss": 0.0023, "step": 47480 }, { "epoch": 0.802582324260835, "grad_norm": 0.011304926127195358, "learning_rate": 7.48047552764839e-06, "loss": 0.0026, "step": 47490 }, { "epoch": 0.8027513245396855, "grad_norm": 0.10953537374734879, "learning_rate": 7.479194872490731e-06, "loss": 0.0039, "step": 47500 }, { "epoch": 0.8029203248185359, "grad_norm": 0.0011343724327161908, "learning_rate": 7.477914001631106e-06, "loss": 0.0026, "step": 47510 }, { "epoch": 0.8030893250973864, "grad_norm": 0.05069960653781891, "learning_rate": 7.4766329151809565e-06, "loss": 0.0011, "step": 47520 }, { "epoch": 0.8032583253762369, "grad_norm": 0.14260950684547424, "learning_rate": 7.475351613251744e-06, "loss": 0.002, "step": 47530 }, { "epoch": 0.8034273256550873, "grad_norm": 0.014285523444414139, "learning_rate": 7.474070095954948e-06, "loss": 0.0022, "step": 47540 }, { "epoch": 0.8035963259339378, "grad_norm": 0.04909459874033928, "learning_rate": 7.472788363402068e-06, "loss": 0.0015, "step": 47550 }, { "epoch": 0.8037653262127883, "grad_norm": 0.12003076076507568, "learning_rate": 7.471506415704617e-06, "loss": 0.0026, "step": 47560 }, { "epoch": 0.8039343264916388, "grad_norm": 0.07390342652797699, "learning_rate": 7.470224252974134e-06, "loss": 0.0014, "step": 47570 }, { "epoch": 0.8041033267704891, "grad_norm": 0.08915874361991882, "learning_rate": 7.468941875322173e-06, "loss": 0.0019, "step": 47580 }, { "epoch": 0.8042723270493396, "grad_norm": 0.05357299745082855, "learning_rate": 7.467659282860306e-06, "loss": 0.0016, "step": 47590 }, { "epoch": 0.8044413273281901, "grad_norm": 0.03945795074105263, "learning_rate": 7.466376475700126e-06, "loss": 0.0018, "step": 47600 }, { "epoch": 0.8046103276070405, "grad_norm": 0.04925532266497612, "learning_rate": 7.465093453953241e-06, "loss": 0.0013, "step": 47610 }, { "epoch": 0.804779327885891, "grad_norm": 0.04004979878664017, "learning_rate": 7.463810217731283e-06, "loss": 0.0017, "step": 47620 }, { "epoch": 0.8049483281647415, "grad_norm": 0.06461845338344574, "learning_rate": 7.462526767145899e-06, "loss": 0.0019, "step": 47630 }, { "epoch": 0.805117328443592, "grad_norm": 0.061418574303388596, "learning_rate": 7.461243102308755e-06, "loss": 0.001, "step": 47640 }, { "epoch": 0.8052863287224424, "grad_norm": 0.04609359800815582, "learning_rate": 7.459959223331537e-06, "loss": 0.0008, "step": 47650 }, { "epoch": 0.8054553290012928, "grad_norm": 0.0352080874145031, "learning_rate": 7.4586751303259455e-06, "loss": 0.0018, "step": 47660 }, { "epoch": 0.8056243292801433, "grad_norm": 0.07530752569437027, "learning_rate": 7.457390823403706e-06, "loss": 0.0011, "step": 47670 }, { "epoch": 0.8057933295589937, "grad_norm": 0.010745921172201633, "learning_rate": 7.456106302676559e-06, "loss": 0.0025, "step": 47680 }, { "epoch": 0.8059623298378442, "grad_norm": 0.052886590361595154, "learning_rate": 7.454821568256263e-06, "loss": 0.0024, "step": 47690 }, { "epoch": 0.8061313301166947, "grad_norm": 0.0723862424492836, "learning_rate": 7.453536620254598e-06, "loss": 0.0017, "step": 47700 }, { "epoch": 0.8063003303955452, "grad_norm": 0.006251264829188585, "learning_rate": 7.452251458783359e-06, "loss": 0.0017, "step": 47710 }, { "epoch": 0.8064693306743956, "grad_norm": 0.08329357951879501, "learning_rate": 7.450966083954361e-06, "loss": 0.0014, "step": 47720 }, { "epoch": 0.8066383309532461, "grad_norm": 0.15640972554683685, "learning_rate": 7.449680495879439e-06, "loss": 0.0018, "step": 47730 }, { "epoch": 0.8068073312320966, "grad_norm": 0.11291138082742691, "learning_rate": 7.4483946946704445e-06, "loss": 0.0015, "step": 47740 }, { "epoch": 0.8069763315109469, "grad_norm": 0.0313311368227005, "learning_rate": 7.447108680439248e-06, "loss": 0.0025, "step": 47750 }, { "epoch": 0.8071453317897974, "grad_norm": 0.06498327106237411, "learning_rate": 7.44582245329774e-06, "loss": 0.0014, "step": 47760 }, { "epoch": 0.8073143320686479, "grad_norm": 0.012859559617936611, "learning_rate": 7.44453601335783e-06, "loss": 0.0025, "step": 47770 }, { "epoch": 0.8074833323474984, "grad_norm": 0.07249462604522705, "learning_rate": 7.4432493607314405e-06, "loss": 0.0014, "step": 47780 }, { "epoch": 0.8076523326263488, "grad_norm": 0.09710532426834106, "learning_rate": 7.4419624955305205e-06, "loss": 0.0016, "step": 47790 }, { "epoch": 0.8078213329051993, "grad_norm": 0.03200296685099602, "learning_rate": 7.440675417867031e-06, "loss": 0.001, "step": 47800 }, { "epoch": 0.8079903331840498, "grad_norm": 0.15108339488506317, "learning_rate": 7.4393881278529555e-06, "loss": 0.0027, "step": 47810 }, { "epoch": 0.8081593334629003, "grad_norm": 0.2669786214828491, "learning_rate": 7.438100625600293e-06, "loss": 0.0021, "step": 47820 }, { "epoch": 0.8083283337417507, "grad_norm": 0.1186618059873581, "learning_rate": 7.436812911221064e-06, "loss": 0.0022, "step": 47830 }, { "epoch": 0.8084973340206011, "grad_norm": 0.08381953835487366, "learning_rate": 7.435524984827304e-06, "loss": 0.0015, "step": 47840 }, { "epoch": 0.8086663342994516, "grad_norm": 0.18541555106639862, "learning_rate": 7.434236846531071e-06, "loss": 0.0018, "step": 47850 }, { "epoch": 0.808835334578302, "grad_norm": 0.09643784165382385, "learning_rate": 7.432948496444437e-06, "loss": 0.0019, "step": 47860 }, { "epoch": 0.8090043348571525, "grad_norm": 0.03466213867068291, "learning_rate": 7.431659934679496e-06, "loss": 0.0021, "step": 47870 }, { "epoch": 0.809173335136003, "grad_norm": 0.059613388031721115, "learning_rate": 7.43037116134836e-06, "loss": 0.0025, "step": 47880 }, { "epoch": 0.8093423354148535, "grad_norm": 0.006972366478294134, "learning_rate": 7.429082176563157e-06, "loss": 0.0012, "step": 47890 }, { "epoch": 0.8095113356937039, "grad_norm": 0.02702466771006584, "learning_rate": 7.427792980436036e-06, "loss": 0.0024, "step": 47900 }, { "epoch": 0.8096803359725544, "grad_norm": 0.11114290356636047, "learning_rate": 7.426503573079162e-06, "loss": 0.0014, "step": 47910 }, { "epoch": 0.8098493362514048, "grad_norm": 0.05626865476369858, "learning_rate": 7.425213954604722e-06, "loss": 0.0016, "step": 47920 }, { "epoch": 0.8100183365302552, "grad_norm": 0.09901049733161926, "learning_rate": 7.4239241251249165e-06, "loss": 0.0019, "step": 47930 }, { "epoch": 0.8101873368091057, "grad_norm": 0.04712087661027908, "learning_rate": 7.422634084751967e-06, "loss": 0.0021, "step": 47940 }, { "epoch": 0.8103563370879562, "grad_norm": 0.0625760555267334, "learning_rate": 7.421343833598115e-06, "loss": 0.0026, "step": 47950 }, { "epoch": 0.8105253373668067, "grad_norm": 0.02616805210709572, "learning_rate": 7.420053371775618e-06, "loss": 0.002, "step": 47960 }, { "epoch": 0.8106943376456571, "grad_norm": 0.007708234712481499, "learning_rate": 7.418762699396752e-06, "loss": 0.0017, "step": 47970 }, { "epoch": 0.8108633379245076, "grad_norm": 0.07194136083126068, "learning_rate": 7.417471816573812e-06, "loss": 0.0014, "step": 47980 }, { "epoch": 0.8110323382033581, "grad_norm": 0.03533967584371567, "learning_rate": 7.416180723419112e-06, "loss": 0.0008, "step": 47990 }, { "epoch": 0.8112013384822085, "grad_norm": 0.06678535044193268, "learning_rate": 7.414889420044982e-06, "loss": 0.0013, "step": 48000 }, { "epoch": 0.8113703387610589, "grad_norm": 0.05515426769852638, "learning_rate": 7.413597906563771e-06, "loss": 0.0015, "step": 48010 }, { "epoch": 0.8115393390399094, "grad_norm": 0.025319591164588928, "learning_rate": 7.412306183087849e-06, "loss": 0.0014, "step": 48020 }, { "epoch": 0.8117083393187599, "grad_norm": 0.06620465219020844, "learning_rate": 7.4110142497296e-06, "loss": 0.0023, "step": 48030 }, { "epoch": 0.8118773395976103, "grad_norm": 0.0314614363014698, "learning_rate": 7.4097221066014306e-06, "loss": 0.0014, "step": 48040 }, { "epoch": 0.8120463398764608, "grad_norm": 0.10447190701961517, "learning_rate": 7.408429753815762e-06, "loss": 0.0021, "step": 48050 }, { "epoch": 0.8122153401553113, "grad_norm": 0.12224993854761124, "learning_rate": 7.407137191485036e-06, "loss": 0.0026, "step": 48060 }, { "epoch": 0.8123843404341617, "grad_norm": 0.05101926624774933, "learning_rate": 7.405844419721712e-06, "loss": 0.0014, "step": 48070 }, { "epoch": 0.8125533407130122, "grad_norm": 0.1120670735836029, "learning_rate": 7.404551438638265e-06, "loss": 0.0013, "step": 48080 }, { "epoch": 0.8127223409918627, "grad_norm": 0.03729407116770744, "learning_rate": 7.403258248347195e-06, "loss": 0.001, "step": 48090 }, { "epoch": 0.812891341270713, "grad_norm": 0.05864240974187851, "learning_rate": 7.401964848961012e-06, "loss": 0.0022, "step": 48100 }, { "epoch": 0.8130603415495635, "grad_norm": 0.008091941475868225, "learning_rate": 7.40067124059225e-06, "loss": 0.0017, "step": 48110 }, { "epoch": 0.813229341828414, "grad_norm": 0.10884171724319458, "learning_rate": 7.399377423353457e-06, "loss": 0.0016, "step": 48120 }, { "epoch": 0.8133983421072645, "grad_norm": 0.03182748705148697, "learning_rate": 7.398083397357205e-06, "loss": 0.0024, "step": 48130 }, { "epoch": 0.8135673423861149, "grad_norm": 0.10057803243398666, "learning_rate": 7.396789162716076e-06, "loss": 0.0018, "step": 48140 }, { "epoch": 0.8137363426649654, "grad_norm": 0.01269405148923397, "learning_rate": 7.395494719542679e-06, "loss": 0.0029, "step": 48150 }, { "epoch": 0.8139053429438159, "grad_norm": 0.07138130068778992, "learning_rate": 7.394200067949635e-06, "loss": 0.0021, "step": 48160 }, { "epoch": 0.8140743432226664, "grad_norm": 0.05810423940420151, "learning_rate": 7.392905208049585e-06, "loss": 0.0056, "step": 48170 }, { "epoch": 0.8142433435015167, "grad_norm": 0.05451294779777527, "learning_rate": 7.391610139955187e-06, "loss": 0.0024, "step": 48180 }, { "epoch": 0.8144123437803672, "grad_norm": 0.050688549876213074, "learning_rate": 7.39031486377912e-06, "loss": 0.0017, "step": 48190 }, { "epoch": 0.8145813440592177, "grad_norm": 0.274347186088562, "learning_rate": 7.389019379634078e-06, "loss": 0.0017, "step": 48200 }, { "epoch": 0.8147503443380681, "grad_norm": 0.11318135261535645, "learning_rate": 7.387723687632775e-06, "loss": 0.0025, "step": 48210 }, { "epoch": 0.8149193446169186, "grad_norm": 0.219595804810524, "learning_rate": 7.386427787887943e-06, "loss": 0.0018, "step": 48220 }, { "epoch": 0.8150883448957691, "grad_norm": 0.04114341735839844, "learning_rate": 7.38513168051233e-06, "loss": 0.002, "step": 48230 }, { "epoch": 0.8152573451746196, "grad_norm": 0.058271802961826324, "learning_rate": 7.383835365618706e-06, "loss": 0.0017, "step": 48240 }, { "epoch": 0.81542634545347, "grad_norm": 0.16201192140579224, "learning_rate": 7.382538843319853e-06, "loss": 0.0026, "step": 48250 }, { "epoch": 0.8155953457323205, "grad_norm": 0.06540218740701675, "learning_rate": 7.381242113728579e-06, "loss": 0.0025, "step": 48260 }, { "epoch": 0.8157643460111709, "grad_norm": 0.003137104446068406, "learning_rate": 7.3799451769577036e-06, "loss": 0.0015, "step": 48270 }, { "epoch": 0.8159333462900213, "grad_norm": 0.1325099766254425, "learning_rate": 7.378648033120066e-06, "loss": 0.0033, "step": 48280 }, { "epoch": 0.8161023465688718, "grad_norm": 0.04073556885123253, "learning_rate": 7.377350682328525e-06, "loss": 0.0028, "step": 48290 }, { "epoch": 0.8162713468477223, "grad_norm": 0.0781053751707077, "learning_rate": 7.3760531246959555e-06, "loss": 0.0024, "step": 48300 }, { "epoch": 0.8164403471265728, "grad_norm": 0.04935900494456291, "learning_rate": 7.374755360335253e-06, "loss": 0.0028, "step": 48310 }, { "epoch": 0.8166093474054232, "grad_norm": 0.044646281749010086, "learning_rate": 7.373457389359327e-06, "loss": 0.002, "step": 48320 }, { "epoch": 0.8167783476842737, "grad_norm": 0.008936772122979164, "learning_rate": 7.372159211881109e-06, "loss": 0.0018, "step": 48330 }, { "epoch": 0.8169473479631242, "grad_norm": 0.1457815021276474, "learning_rate": 7.370860828013546e-06, "loss": 0.0024, "step": 48340 }, { "epoch": 0.8171163482419745, "grad_norm": 0.09478127211332321, "learning_rate": 7.3695622378696045e-06, "loss": 0.0027, "step": 48350 }, { "epoch": 0.817285348520825, "grad_norm": 0.11928408592939377, "learning_rate": 7.368263441562266e-06, "loss": 0.0021, "step": 48360 }, { "epoch": 0.8174543487996755, "grad_norm": 0.20206162333488464, "learning_rate": 7.366964439204535e-06, "loss": 0.0015, "step": 48370 }, { "epoch": 0.817623349078526, "grad_norm": 0.014312900602817535, "learning_rate": 7.365665230909429e-06, "loss": 0.0012, "step": 48380 }, { "epoch": 0.8177923493573764, "grad_norm": 0.023647421970963478, "learning_rate": 7.364365816789987e-06, "loss": 0.0018, "step": 48390 }, { "epoch": 0.8179613496362269, "grad_norm": 0.08986024558544159, "learning_rate": 7.363066196959262e-06, "loss": 0.0024, "step": 48400 }, { "epoch": 0.8181303499150774, "grad_norm": 0.11756408214569092, "learning_rate": 7.361766371530329e-06, "loss": 0.0015, "step": 48410 }, { "epoch": 0.8182993501939279, "grad_norm": 0.07477547973394394, "learning_rate": 7.360466340616279e-06, "loss": 0.0024, "step": 48420 }, { "epoch": 0.8184683504727783, "grad_norm": 0.06278149038553238, "learning_rate": 7.35916610433022e-06, "loss": 0.0029, "step": 48430 }, { "epoch": 0.8186373507516287, "grad_norm": 0.07817571610212326, "learning_rate": 7.35786566278528e-06, "loss": 0.0016, "step": 48440 }, { "epoch": 0.8188063510304792, "grad_norm": 0.10295785218477249, "learning_rate": 7.3565650160946036e-06, "loss": 0.0012, "step": 48450 }, { "epoch": 0.8189753513093296, "grad_norm": 0.07114120572805405, "learning_rate": 7.355264164371352e-06, "loss": 0.0023, "step": 48460 }, { "epoch": 0.8191443515881801, "grad_norm": 0.039024025201797485, "learning_rate": 7.353963107728708e-06, "loss": 0.0009, "step": 48470 }, { "epoch": 0.8193133518670306, "grad_norm": 0.10922796279191971, "learning_rate": 7.352661846279867e-06, "loss": 0.0026, "step": 48480 }, { "epoch": 0.819482352145881, "grad_norm": 0.003510313807055354, "learning_rate": 7.351360380138046e-06, "loss": 0.0014, "step": 48490 }, { "epoch": 0.8196513524247315, "grad_norm": 0.06770562380552292, "learning_rate": 7.350058709416481e-06, "loss": 0.002, "step": 48500 }, { "epoch": 0.819820352703582, "grad_norm": 0.14002270996570587, "learning_rate": 7.348756834228421e-06, "loss": 0.0013, "step": 48510 }, { "epoch": 0.8199893529824325, "grad_norm": 0.30503541231155396, "learning_rate": 7.347454754687136e-06, "loss": 0.0035, "step": 48520 }, { "epoch": 0.8201583532612828, "grad_norm": 0.043744396418333054, "learning_rate": 7.346152470905915e-06, "loss": 0.001, "step": 48530 }, { "epoch": 0.8203273535401333, "grad_norm": 0.04082144796848297, "learning_rate": 7.344849982998061e-06, "loss": 0.0012, "step": 48540 }, { "epoch": 0.8204963538189838, "grad_norm": 0.036161936819553375, "learning_rate": 7.343547291076898e-06, "loss": 0.0021, "step": 48550 }, { "epoch": 0.8206653540978343, "grad_norm": 0.008398571982979774, "learning_rate": 7.342244395255765e-06, "loss": 0.0023, "step": 48560 }, { "epoch": 0.8208343543766847, "grad_norm": 0.0453002005815506, "learning_rate": 7.3409412956480216e-06, "loss": 0.0016, "step": 48570 }, { "epoch": 0.8210033546555352, "grad_norm": 0.3084333837032318, "learning_rate": 7.339637992367044e-06, "loss": 0.0027, "step": 48580 }, { "epoch": 0.8211723549343857, "grad_norm": 0.14241471886634827, "learning_rate": 7.338334485526224e-06, "loss": 0.0024, "step": 48590 }, { "epoch": 0.8213413552132361, "grad_norm": 0.07546312361955643, "learning_rate": 7.337030775238976e-06, "loss": 0.0017, "step": 48600 }, { "epoch": 0.8215103554920865, "grad_norm": 0.04120171442627907, "learning_rate": 7.3357268616187235e-06, "loss": 0.0023, "step": 48610 }, { "epoch": 0.821679355770937, "grad_norm": 0.05609271302819252, "learning_rate": 7.3344227447789204e-06, "loss": 0.0036, "step": 48620 }, { "epoch": 0.8218483560497875, "grad_norm": 0.18657422065734863, "learning_rate": 7.333118424833028e-06, "loss": 0.0015, "step": 48630 }, { "epoch": 0.8220173563286379, "grad_norm": 0.018935708329081535, "learning_rate": 7.331813901894526e-06, "loss": 0.0015, "step": 48640 }, { "epoch": 0.8221863566074884, "grad_norm": 0.016682909801602364, "learning_rate": 7.33050917607692e-06, "loss": 0.0015, "step": 48650 }, { "epoch": 0.8223553568863389, "grad_norm": 0.07267876714468002, "learning_rate": 7.329204247493722e-06, "loss": 0.0016, "step": 48660 }, { "epoch": 0.8225243571651893, "grad_norm": 0.10636164247989655, "learning_rate": 7.32789911625847e-06, "loss": 0.0012, "step": 48670 }, { "epoch": 0.8226933574440398, "grad_norm": 0.08027669787406921, "learning_rate": 7.326593782484716e-06, "loss": 0.0013, "step": 48680 }, { "epoch": 0.8228623577228903, "grad_norm": 0.06711503863334656, "learning_rate": 7.325288246286031e-06, "loss": 0.0015, "step": 48690 }, { "epoch": 0.8230313580017407, "grad_norm": 0.062487997114658356, "learning_rate": 7.3239825077760015e-06, "loss": 0.0018, "step": 48700 }, { "epoch": 0.8232003582805911, "grad_norm": 0.05867687985301018, "learning_rate": 7.322676567068234e-06, "loss": 0.0019, "step": 48710 }, { "epoch": 0.8233693585594416, "grad_norm": 0.13448190689086914, "learning_rate": 7.321370424276351e-06, "loss": 0.0022, "step": 48720 }, { "epoch": 0.8235383588382921, "grad_norm": 0.02572086825966835, "learning_rate": 7.320064079513993e-06, "loss": 0.0022, "step": 48730 }, { "epoch": 0.8237073591171425, "grad_norm": 0.11511944234371185, "learning_rate": 7.31875753289482e-06, "loss": 0.003, "step": 48740 }, { "epoch": 0.823876359395993, "grad_norm": 0.05535471439361572, "learning_rate": 7.3174507845325085e-06, "loss": 0.0008, "step": 48750 }, { "epoch": 0.8240453596748435, "grad_norm": 0.19832754135131836, "learning_rate": 7.316143834540749e-06, "loss": 0.002, "step": 48760 }, { "epoch": 0.824214359953694, "grad_norm": 0.09600799530744553, "learning_rate": 7.314836683033254e-06, "loss": 0.0025, "step": 48770 }, { "epoch": 0.8243833602325444, "grad_norm": 0.026862656697630882, "learning_rate": 7.313529330123752e-06, "loss": 0.002, "step": 48780 }, { "epoch": 0.8245523605113948, "grad_norm": 0.12975527346134186, "learning_rate": 7.312221775925989e-06, "loss": 0.002, "step": 48790 }, { "epoch": 0.8247213607902453, "grad_norm": 0.07842448353767395, "learning_rate": 7.310914020553728e-06, "loss": 0.0011, "step": 48800 }, { "epoch": 0.8248903610690957, "grad_norm": 0.016468260437250137, "learning_rate": 7.309606064120751e-06, "loss": 0.0005, "step": 48810 }, { "epoch": 0.8250593613479462, "grad_norm": 0.001380032510496676, "learning_rate": 7.308297906740856e-06, "loss": 0.0016, "step": 48820 }, { "epoch": 0.8252283616267967, "grad_norm": 0.09836116433143616, "learning_rate": 7.306989548527859e-06, "loss": 0.0016, "step": 48830 }, { "epoch": 0.8253973619056472, "grad_norm": 0.03906187042593956, "learning_rate": 7.305680989595595e-06, "loss": 0.0016, "step": 48840 }, { "epoch": 0.8255663621844976, "grad_norm": 0.024572791531682014, "learning_rate": 7.304372230057913e-06, "loss": 0.0019, "step": 48850 }, { "epoch": 0.8257353624633481, "grad_norm": 0.06552372127771378, "learning_rate": 7.303063270028681e-06, "loss": 0.0009, "step": 48860 }, { "epoch": 0.8259043627421985, "grad_norm": 0.1286754161119461, "learning_rate": 7.301754109621786e-06, "loss": 0.0024, "step": 48870 }, { "epoch": 0.826073363021049, "grad_norm": 0.009336556307971478, "learning_rate": 7.300444748951132e-06, "loss": 0.0014, "step": 48880 }, { "epoch": 0.8262423632998994, "grad_norm": 0.12424356490373611, "learning_rate": 7.299135188130639e-06, "loss": 0.0019, "step": 48890 }, { "epoch": 0.8264113635787499, "grad_norm": 0.028109529986977577, "learning_rate": 7.2978254272742445e-06, "loss": 0.0013, "step": 48900 }, { "epoch": 0.8265803638576004, "grad_norm": 0.10972272604703903, "learning_rate": 7.296515466495903e-06, "loss": 0.002, "step": 48910 }, { "epoch": 0.8267493641364508, "grad_norm": 0.1797754168510437, "learning_rate": 7.2952053059095915e-06, "loss": 0.0014, "step": 48920 }, { "epoch": 0.8269183644153013, "grad_norm": 0.59943687915802, "learning_rate": 7.293894945629298e-06, "loss": 0.003, "step": 48930 }, { "epoch": 0.8270873646941518, "grad_norm": 0.0860147625207901, "learning_rate": 7.2925843857690295e-06, "loss": 0.0023, "step": 48940 }, { "epoch": 0.8272563649730023, "grad_norm": 0.07308562099933624, "learning_rate": 7.291273626442812e-06, "loss": 0.0014, "step": 48950 }, { "epoch": 0.8274253652518526, "grad_norm": 0.05884837359189987, "learning_rate": 7.289962667764688e-06, "loss": 0.002, "step": 48960 }, { "epoch": 0.8275943655307031, "grad_norm": 0.024381542578339577, "learning_rate": 7.288651509848715e-06, "loss": 0.0019, "step": 48970 }, { "epoch": 0.8277633658095536, "grad_norm": 0.06704352796077728, "learning_rate": 7.287340152808973e-06, "loss": 0.0013, "step": 48980 }, { "epoch": 0.827932366088404, "grad_norm": 0.0669189840555191, "learning_rate": 7.286028596759555e-06, "loss": 0.0013, "step": 48990 }, { "epoch": 0.8281013663672545, "grad_norm": 0.024829840287566185, "learning_rate": 7.2847168418145716e-06, "loss": 0.0017, "step": 49000 }, { "epoch": 0.828270366646105, "grad_norm": 0.07830045372247696, "learning_rate": 7.2834048880881545e-06, "loss": 0.0027, "step": 49010 }, { "epoch": 0.8284393669249555, "grad_norm": 0.08251339942216873, "learning_rate": 7.282092735694449e-06, "loss": 0.0032, "step": 49020 }, { "epoch": 0.8286083672038059, "grad_norm": 0.018376484513282776, "learning_rate": 7.280780384747619e-06, "loss": 0.0013, "step": 49030 }, { "epoch": 0.8287773674826564, "grad_norm": 0.17820164561271667, "learning_rate": 7.279467835361844e-06, "loss": 0.0015, "step": 49040 }, { "epoch": 0.8289463677615068, "grad_norm": 0.13392888009548187, "learning_rate": 7.278155087651324e-06, "loss": 0.0024, "step": 49050 }, { "epoch": 0.8291153680403572, "grad_norm": 0.09335264563560486, "learning_rate": 7.276842141730273e-06, "loss": 0.001, "step": 49060 }, { "epoch": 0.8292843683192077, "grad_norm": 0.06904011964797974, "learning_rate": 7.275528997712924e-06, "loss": 0.0008, "step": 49070 }, { "epoch": 0.8294533685980582, "grad_norm": 0.04905041307210922, "learning_rate": 7.2742156557135265e-06, "loss": 0.0013, "step": 49080 }, { "epoch": 0.8296223688769087, "grad_norm": 0.1327400803565979, "learning_rate": 7.272902115846347e-06, "loss": 0.003, "step": 49090 }, { "epoch": 0.8297913691557591, "grad_norm": 0.08582969009876251, "learning_rate": 7.271588378225673e-06, "loss": 0.0015, "step": 49100 }, { "epoch": 0.8299603694346096, "grad_norm": 0.023333383724093437, "learning_rate": 7.270274442965804e-06, "loss": 0.0011, "step": 49110 }, { "epoch": 0.8301293697134601, "grad_norm": 0.07024768739938736, "learning_rate": 7.268960310181057e-06, "loss": 0.0013, "step": 49120 }, { "epoch": 0.8302983699923104, "grad_norm": 0.09488508850336075, "learning_rate": 7.2676459799857716e-06, "loss": 0.0011, "step": 49130 }, { "epoch": 0.8304673702711609, "grad_norm": 0.057504843920469284, "learning_rate": 7.2663314524942975e-06, "loss": 0.0012, "step": 49140 }, { "epoch": 0.8306363705500114, "grad_norm": 0.06765588372945786, "learning_rate": 7.265016727821008e-06, "loss": 0.0021, "step": 49150 }, { "epoch": 0.8308053708288619, "grad_norm": 0.060480304062366486, "learning_rate": 7.263701806080288e-06, "loss": 0.0028, "step": 49160 }, { "epoch": 0.8309743711077123, "grad_norm": 0.15077215433120728, "learning_rate": 7.2623866873865424e-06, "loss": 0.0013, "step": 49170 }, { "epoch": 0.8311433713865628, "grad_norm": 0.05140575021505356, "learning_rate": 7.261071371854195e-06, "loss": 0.0014, "step": 49180 }, { "epoch": 0.8313123716654133, "grad_norm": 0.06735235452651978, "learning_rate": 7.259755859597681e-06, "loss": 0.0011, "step": 49190 }, { "epoch": 0.8314813719442637, "grad_norm": 0.017180589959025383, "learning_rate": 7.2584401507314595e-06, "loss": 0.0014, "step": 49200 }, { "epoch": 0.8316503722231142, "grad_norm": 0.11614830791950226, "learning_rate": 7.257124245370003e-06, "loss": 0.0014, "step": 49210 }, { "epoch": 0.8318193725019646, "grad_norm": 0.01634841412305832, "learning_rate": 7.2558081436278e-06, "loss": 0.0026, "step": 49220 }, { "epoch": 0.831988372780815, "grad_norm": 0.04172159731388092, "learning_rate": 7.25449184561936e-06, "loss": 0.0015, "step": 49230 }, { "epoch": 0.8321573730596655, "grad_norm": 0.041454069316387177, "learning_rate": 7.2531753514592065e-06, "loss": 0.0018, "step": 49240 }, { "epoch": 0.832326373338516, "grad_norm": 0.016928566619753838, "learning_rate": 7.25185866126188e-06, "loss": 0.002, "step": 49250 }, { "epoch": 0.8324953736173665, "grad_norm": 0.11260402947664261, "learning_rate": 7.25054177514194e-06, "loss": 0.0013, "step": 49260 }, { "epoch": 0.832664373896217, "grad_norm": 0.013846118934452534, "learning_rate": 7.249224693213961e-06, "loss": 0.0014, "step": 49270 }, { "epoch": 0.8328333741750674, "grad_norm": 0.07115150988101959, "learning_rate": 7.247907415592534e-06, "loss": 0.0012, "step": 49280 }, { "epoch": 0.8330023744539179, "grad_norm": 0.04459391161799431, "learning_rate": 7.246589942392272e-06, "loss": 0.0033, "step": 49290 }, { "epoch": 0.8331713747327683, "grad_norm": 0.05890648439526558, "learning_rate": 7.2452722737278e-06, "loss": 0.0011, "step": 49300 }, { "epoch": 0.8333403750116187, "grad_norm": 0.04924681782722473, "learning_rate": 7.243954409713763e-06, "loss": 0.0011, "step": 49310 }, { "epoch": 0.8335093752904692, "grad_norm": 0.044344622641801834, "learning_rate": 7.242636350464819e-06, "loss": 0.0014, "step": 49320 }, { "epoch": 0.8336783755693197, "grad_norm": 0.06724030524492264, "learning_rate": 7.241318096095646e-06, "loss": 0.0017, "step": 49330 }, { "epoch": 0.8338473758481701, "grad_norm": 0.007544257678091526, "learning_rate": 7.23999964672094e-06, "loss": 0.0015, "step": 49340 }, { "epoch": 0.8340163761270206, "grad_norm": 0.02665545605123043, "learning_rate": 7.2386810024554125e-06, "loss": 0.0033, "step": 49350 }, { "epoch": 0.8341853764058711, "grad_norm": 0.0006780088879168034, "learning_rate": 7.2373621634137904e-06, "loss": 0.0011, "step": 49360 }, { "epoch": 0.8343543766847216, "grad_norm": 0.021561365574598312, "learning_rate": 7.236043129710818e-06, "loss": 0.0024, "step": 49370 }, { "epoch": 0.834523376963572, "grad_norm": 0.08685091137886047, "learning_rate": 7.234723901461262e-06, "loss": 0.0014, "step": 49380 }, { "epoch": 0.8346923772424224, "grad_norm": 0.08030354976654053, "learning_rate": 7.233404478779896e-06, "loss": 0.0023, "step": 49390 }, { "epoch": 0.8348613775212729, "grad_norm": 0.5395932197570801, "learning_rate": 7.232084861781522e-06, "loss": 0.0024, "step": 49400 }, { "epoch": 0.8350303778001233, "grad_norm": 0.011918571777641773, "learning_rate": 7.23076505058095e-06, "loss": 0.0026, "step": 49410 }, { "epoch": 0.8351993780789738, "grad_norm": 0.027819110080599785, "learning_rate": 7.2294450452930095e-06, "loss": 0.0022, "step": 49420 }, { "epoch": 0.8353683783578243, "grad_norm": 0.02106357552111149, "learning_rate": 7.228124846032549e-06, "loss": 0.001, "step": 49430 }, { "epoch": 0.8355373786366748, "grad_norm": 0.058657340705394745, "learning_rate": 7.226804452914431e-06, "loss": 0.0019, "step": 49440 }, { "epoch": 0.8357063789155252, "grad_norm": 0.08190031349658966, "learning_rate": 7.225483866053536e-06, "loss": 0.0013, "step": 49450 }, { "epoch": 0.8358753791943757, "grad_norm": 0.0971725806593895, "learning_rate": 7.224163085564763e-06, "loss": 0.0014, "step": 49460 }, { "epoch": 0.8360443794732262, "grad_norm": 0.020731769502162933, "learning_rate": 7.222842111563024e-06, "loss": 0.0011, "step": 49470 }, { "epoch": 0.8362133797520765, "grad_norm": 0.06420711427927017, "learning_rate": 7.221520944163252e-06, "loss": 0.0016, "step": 49480 }, { "epoch": 0.836382380030927, "grad_norm": 0.03257574141025543, "learning_rate": 7.220199583480394e-06, "loss": 0.0022, "step": 49490 }, { "epoch": 0.8365513803097775, "grad_norm": 0.013980922289192677, "learning_rate": 7.218878029629416e-06, "loss": 0.0012, "step": 49500 }, { "epoch": 0.836720380588628, "grad_norm": 0.04765333607792854, "learning_rate": 7.217556282725298e-06, "loss": 0.002, "step": 49510 }, { "epoch": 0.8368893808674784, "grad_norm": 0.02529972605407238, "learning_rate": 7.216234342883039e-06, "loss": 0.0014, "step": 49520 }, { "epoch": 0.8370583811463289, "grad_norm": 0.06483682245016098, "learning_rate": 7.214912210217655e-06, "loss": 0.0018, "step": 49530 }, { "epoch": 0.8372273814251794, "grad_norm": 0.07930611073970795, "learning_rate": 7.213589884844177e-06, "loss": 0.002, "step": 49540 }, { "epoch": 0.8373963817040299, "grad_norm": 0.029101185500621796, "learning_rate": 7.212267366877654e-06, "loss": 0.0008, "step": 49550 }, { "epoch": 0.8375653819828802, "grad_norm": 0.10520884394645691, "learning_rate": 7.210944656433151e-06, "loss": 0.0024, "step": 49560 }, { "epoch": 0.8377343822617307, "grad_norm": 0.035545896738767624, "learning_rate": 7.209621753625751e-06, "loss": 0.0023, "step": 49570 }, { "epoch": 0.8379033825405812, "grad_norm": 0.14516295492649078, "learning_rate": 7.208298658570551e-06, "loss": 0.0023, "step": 49580 }, { "epoch": 0.8380723828194316, "grad_norm": 0.04255577176809311, "learning_rate": 7.2069753713826695e-06, "loss": 0.0031, "step": 49590 }, { "epoch": 0.8382413830982821, "grad_norm": 0.04397115111351013, "learning_rate": 7.205651892177239e-06, "loss": 0.0014, "step": 49600 }, { "epoch": 0.8384103833771326, "grad_norm": 0.07351876050233841, "learning_rate": 7.2043282210694054e-06, "loss": 0.0016, "step": 49610 }, { "epoch": 0.8385793836559831, "grad_norm": 0.005482340231537819, "learning_rate": 7.2030043581743366e-06, "loss": 0.0009, "step": 49620 }, { "epoch": 0.8387483839348335, "grad_norm": 0.06633653491735458, "learning_rate": 7.2016803036072144e-06, "loss": 0.0018, "step": 49630 }, { "epoch": 0.838917384213684, "grad_norm": 0.03800305351614952, "learning_rate": 7.20035605748324e-06, "loss": 0.0021, "step": 49640 }, { "epoch": 0.8390863844925344, "grad_norm": 0.04843270033597946, "learning_rate": 7.199031619917627e-06, "loss": 0.0017, "step": 49650 }, { "epoch": 0.8392553847713848, "grad_norm": 0.01804737001657486, "learning_rate": 7.197706991025608e-06, "loss": 0.0013, "step": 49660 }, { "epoch": 0.8394243850502353, "grad_norm": 0.12135551124811172, "learning_rate": 7.196382170922432e-06, "loss": 0.0027, "step": 49670 }, { "epoch": 0.8395933853290858, "grad_norm": 0.041261736303567886, "learning_rate": 7.195057159723366e-06, "loss": 0.0023, "step": 49680 }, { "epoch": 0.8397623856079363, "grad_norm": 0.03335694223642349, "learning_rate": 7.193731957543693e-06, "loss": 0.002, "step": 49690 }, { "epoch": 0.8399313858867867, "grad_norm": 0.05219758301973343, "learning_rate": 7.19240656449871e-06, "loss": 0.0016, "step": 49700 }, { "epoch": 0.8401003861656372, "grad_norm": 0.10179810225963593, "learning_rate": 7.191080980703733e-06, "loss": 0.0011, "step": 49710 }, { "epoch": 0.8402693864444877, "grad_norm": 0.03386679291725159, "learning_rate": 7.189755206274095e-06, "loss": 0.001, "step": 49720 }, { "epoch": 0.8404383867233381, "grad_norm": 0.043032724410295486, "learning_rate": 7.188429241325145e-06, "loss": 0.0016, "step": 49730 }, { "epoch": 0.8406073870021885, "grad_norm": 0.06790139526128769, "learning_rate": 7.187103085972247e-06, "loss": 0.0016, "step": 49740 }, { "epoch": 0.840776387281039, "grad_norm": 0.09641426056623459, "learning_rate": 7.185776740330784e-06, "loss": 0.0017, "step": 49750 }, { "epoch": 0.8409453875598895, "grad_norm": 0.07377991080284119, "learning_rate": 7.1844502045161545e-06, "loss": 0.0017, "step": 49760 }, { "epoch": 0.8411143878387399, "grad_norm": 0.0946054458618164, "learning_rate": 7.183123478643772e-06, "loss": 0.0023, "step": 49770 }, { "epoch": 0.8412833881175904, "grad_norm": 0.016826393082737923, "learning_rate": 7.181796562829071e-06, "loss": 0.0018, "step": 49780 }, { "epoch": 0.8414523883964409, "grad_norm": 0.07942421734333038, "learning_rate": 7.180469457187498e-06, "loss": 0.0021, "step": 49790 }, { "epoch": 0.8416213886752913, "grad_norm": 0.0243277158588171, "learning_rate": 7.179142161834517e-06, "loss": 0.0015, "step": 49800 }, { "epoch": 0.8417903889541418, "grad_norm": 0.041775017976760864, "learning_rate": 7.17781467688561e-06, "loss": 0.002, "step": 49810 }, { "epoch": 0.8419593892329922, "grad_norm": 0.10232304781675339, "learning_rate": 7.176487002456274e-06, "loss": 0.0019, "step": 49820 }, { "epoch": 0.8421283895118427, "grad_norm": 0.06999991834163666, "learning_rate": 7.175159138662024e-06, "loss": 0.0022, "step": 49830 }, { "epoch": 0.8422973897906931, "grad_norm": 0.007234103046357632, "learning_rate": 7.17383108561839e-06, "loss": 0.0007, "step": 49840 }, { "epoch": 0.8424663900695436, "grad_norm": 0.009289576672017574, "learning_rate": 7.1725028434409185e-06, "loss": 0.0019, "step": 49850 }, { "epoch": 0.8426353903483941, "grad_norm": 0.020704420283436775, "learning_rate": 7.171174412245173e-06, "loss": 0.0018, "step": 49860 }, { "epoch": 0.8428043906272445, "grad_norm": 0.027486352249979973, "learning_rate": 7.1698457921467345e-06, "loss": 0.0013, "step": 49870 }, { "epoch": 0.842973390906095, "grad_norm": 0.058970868587493896, "learning_rate": 7.1685169832612e-06, "loss": 0.0019, "step": 49880 }, { "epoch": 0.8431423911849455, "grad_norm": 0.02297344245016575, "learning_rate": 7.1671879857041805e-06, "loss": 0.0012, "step": 49890 }, { "epoch": 0.843311391463796, "grad_norm": 0.06462989002466202, "learning_rate": 7.165858799591306e-06, "loss": 0.0015, "step": 49900 }, { "epoch": 0.8434803917426463, "grad_norm": 0.06035640090703964, "learning_rate": 7.1645294250382225e-06, "loss": 0.0019, "step": 49910 }, { "epoch": 0.8436493920214968, "grad_norm": 0.06446599960327148, "learning_rate": 7.163199862160591e-06, "loss": 0.0019, "step": 49920 }, { "epoch": 0.8438183923003473, "grad_norm": 0.03763274848461151, "learning_rate": 7.1618701110740905e-06, "loss": 0.0017, "step": 49930 }, { "epoch": 0.8439873925791977, "grad_norm": 0.05461042746901512, "learning_rate": 7.160540171894416e-06, "loss": 0.0016, "step": 49940 }, { "epoch": 0.8441563928580482, "grad_norm": 0.04370475932955742, "learning_rate": 7.159210044737279e-06, "loss": 0.0023, "step": 49950 }, { "epoch": 0.8443253931368987, "grad_norm": 0.015897465869784355, "learning_rate": 7.157879729718407e-06, "loss": 0.0018, "step": 49960 }, { "epoch": 0.8444943934157492, "grad_norm": 0.07231079041957855, "learning_rate": 7.156549226953542e-06, "loss": 0.0088, "step": 49970 }, { "epoch": 0.8446633936945996, "grad_norm": 0.14473244547843933, "learning_rate": 7.155218536558446e-06, "loss": 0.0027, "step": 49980 }, { "epoch": 0.84483239397345, "grad_norm": 0.08568382263183594, "learning_rate": 7.153887658648895e-06, "loss": 0.0022, "step": 49990 }, { "epoch": 0.8450013942523005, "grad_norm": 0.022534925490617752, "learning_rate": 7.152556593340683e-06, "loss": 0.0019, "step": 50000 }, { "epoch": 0.845170394531151, "grad_norm": 0.05036391317844391, "learning_rate": 7.151225340749617e-06, "loss": 0.0013, "step": 50010 }, { "epoch": 0.8453393948100014, "grad_norm": 0.04636518657207489, "learning_rate": 7.149893900991523e-06, "loss": 0.0015, "step": 50020 }, { "epoch": 0.8455083950888519, "grad_norm": 0.026759391650557518, "learning_rate": 7.148562274182243e-06, "loss": 0.0014, "step": 50030 }, { "epoch": 0.8456773953677024, "grad_norm": 0.16903258860111237, "learning_rate": 7.147230460437636e-06, "loss": 0.0025, "step": 50040 }, { "epoch": 0.8458463956465528, "grad_norm": 0.0035917949862778187, "learning_rate": 7.145898459873575e-06, "loss": 0.0013, "step": 50050 }, { "epoch": 0.8460153959254033, "grad_norm": 0.030653197318315506, "learning_rate": 7.14456627260595e-06, "loss": 0.0026, "step": 50060 }, { "epoch": 0.8461843962042538, "grad_norm": 0.09031669050455093, "learning_rate": 7.14323389875067e-06, "loss": 0.0025, "step": 50070 }, { "epoch": 0.8463533964831041, "grad_norm": 0.0398169681429863, "learning_rate": 7.141901338423657e-06, "loss": 0.0008, "step": 50080 }, { "epoch": 0.8465223967619546, "grad_norm": 0.023374401032924652, "learning_rate": 7.140568591740849e-06, "loss": 0.0017, "step": 50090 }, { "epoch": 0.8466913970408051, "grad_norm": 0.062085092067718506, "learning_rate": 7.139235658818202e-06, "loss": 0.0096, "step": 50100 }, { "epoch": 0.8468603973196556, "grad_norm": 0.04841598868370056, "learning_rate": 7.137902539771688e-06, "loss": 0.0015, "step": 50110 }, { "epoch": 0.847029397598506, "grad_norm": 0.06624577194452286, "learning_rate": 7.136569234717296e-06, "loss": 0.0013, "step": 50120 }, { "epoch": 0.8471983978773565, "grad_norm": 0.12280794978141785, "learning_rate": 7.135235743771027e-06, "loss": 0.001, "step": 50130 }, { "epoch": 0.847367398156207, "grad_norm": 0.1125815361738205, "learning_rate": 7.133902067048902e-06, "loss": 0.0014, "step": 50140 }, { "epoch": 0.8475363984350575, "grad_norm": 0.0400351881980896, "learning_rate": 7.13256820466696e-06, "loss": 0.0042, "step": 50150 }, { "epoch": 0.8477053987139079, "grad_norm": 0.17242294549942017, "learning_rate": 7.131234156741249e-06, "loss": 0.0019, "step": 50160 }, { "epoch": 0.8478743989927583, "grad_norm": 0.008832902647554874, "learning_rate": 7.129899923387843e-06, "loss": 0.0016, "step": 50170 }, { "epoch": 0.8480433992716088, "grad_norm": 0.05237210914492607, "learning_rate": 7.128565504722824e-06, "loss": 0.001, "step": 50180 }, { "epoch": 0.8482123995504592, "grad_norm": 0.11998438835144043, "learning_rate": 7.127230900862292e-06, "loss": 0.0024, "step": 50190 }, { "epoch": 0.8483813998293097, "grad_norm": 0.026236379519104958, "learning_rate": 7.125896111922366e-06, "loss": 0.0011, "step": 50200 }, { "epoch": 0.8485504001081602, "grad_norm": 0.052572187036275864, "learning_rate": 7.1245611380191775e-06, "loss": 0.0012, "step": 50210 }, { "epoch": 0.8487194003870107, "grad_norm": 0.04595402255654335, "learning_rate": 7.1232259792688755e-06, "loss": 0.0016, "step": 50220 }, { "epoch": 0.8488884006658611, "grad_norm": 0.07488039135932922, "learning_rate": 7.1218906357876275e-06, "loss": 0.002, "step": 50230 }, { "epoch": 0.8490574009447116, "grad_norm": 0.05835746228694916, "learning_rate": 7.1205551076916125e-06, "loss": 0.0013, "step": 50240 }, { "epoch": 0.849226401223562, "grad_norm": 0.07503136247396469, "learning_rate": 7.119219395097028e-06, "loss": 0.0018, "step": 50250 }, { "epoch": 0.8493954015024124, "grad_norm": 0.10128705948591232, "learning_rate": 7.117883498120088e-06, "loss": 0.0016, "step": 50260 }, { "epoch": 0.8495644017812629, "grad_norm": 0.06169435754418373, "learning_rate": 7.116547416877024e-06, "loss": 0.0014, "step": 50270 }, { "epoch": 0.8497334020601134, "grad_norm": 0.03982456028461456, "learning_rate": 7.115211151484081e-06, "loss": 0.0013, "step": 50280 }, { "epoch": 0.8499024023389639, "grad_norm": 0.14415329694747925, "learning_rate": 7.1138747020575175e-06, "loss": 0.0017, "step": 50290 }, { "epoch": 0.8500714026178143, "grad_norm": 0.04282059520483017, "learning_rate": 7.112538068713612e-06, "loss": 0.0014, "step": 50300 }, { "epoch": 0.8502404028966648, "grad_norm": 0.020503170788288116, "learning_rate": 7.11120125156866e-06, "loss": 0.0008, "step": 50310 }, { "epoch": 0.8504094031755153, "grad_norm": 0.17789845168590546, "learning_rate": 7.109864250738971e-06, "loss": 0.0018, "step": 50320 }, { "epoch": 0.8505784034543658, "grad_norm": 0.04640667513012886, "learning_rate": 7.108527066340869e-06, "loss": 0.001, "step": 50330 }, { "epoch": 0.8507474037332161, "grad_norm": 0.16030360758304596, "learning_rate": 7.1071896984906955e-06, "loss": 0.0031, "step": 50340 }, { "epoch": 0.8509164040120666, "grad_norm": 0.051341064274311066, "learning_rate": 7.105852147304809e-06, "loss": 0.0016, "step": 50350 }, { "epoch": 0.8510854042909171, "grad_norm": 0.08269865065813065, "learning_rate": 7.104514412899583e-06, "loss": 0.0015, "step": 50360 }, { "epoch": 0.8512544045697675, "grad_norm": 0.13866493105888367, "learning_rate": 7.103176495391406e-06, "loss": 0.0036, "step": 50370 }, { "epoch": 0.851423404848618, "grad_norm": 0.08190497010946274, "learning_rate": 7.101838394896685e-06, "loss": 0.0021, "step": 50380 }, { "epoch": 0.8515924051274685, "grad_norm": 0.14248859882354736, "learning_rate": 7.1005001115318386e-06, "loss": 0.0049, "step": 50390 }, { "epoch": 0.851761405406319, "grad_norm": 0.004143700003623962, "learning_rate": 7.099161645413305e-06, "loss": 0.0024, "step": 50400 }, { "epoch": 0.8519304056851694, "grad_norm": 0.04483238607645035, "learning_rate": 7.097822996657538e-06, "loss": 0.0037, "step": 50410 }, { "epoch": 0.8520994059640199, "grad_norm": 0.09904813021421432, "learning_rate": 7.096484165381007e-06, "loss": 0.0027, "step": 50420 }, { "epoch": 0.8522684062428703, "grad_norm": 0.10735070705413818, "learning_rate": 7.095145151700196e-06, "loss": 0.0018, "step": 50430 }, { "epoch": 0.8524374065217207, "grad_norm": 0.3052399456501007, "learning_rate": 7.0938059557316055e-06, "loss": 0.001, "step": 50440 }, { "epoch": 0.8526064068005712, "grad_norm": 0.06089219078421593, "learning_rate": 7.09246657759175e-06, "loss": 0.0016, "step": 50450 }, { "epoch": 0.8527754070794217, "grad_norm": 0.05923866108059883, "learning_rate": 7.091127017397166e-06, "loss": 0.0021, "step": 50460 }, { "epoch": 0.8529444073582721, "grad_norm": 0.0695035457611084, "learning_rate": 7.0897872752644e-06, "loss": 0.0013, "step": 50470 }, { "epoch": 0.8531134076371226, "grad_norm": 0.06044061854481697, "learning_rate": 7.088447351310015e-06, "loss": 0.001, "step": 50480 }, { "epoch": 0.8532824079159731, "grad_norm": 0.03624828904867172, "learning_rate": 7.087107245650592e-06, "loss": 0.0017, "step": 50490 }, { "epoch": 0.8534514081948236, "grad_norm": 0.035229530185461044, "learning_rate": 7.085766958402727e-06, "loss": 0.0012, "step": 50500 }, { "epoch": 0.8536204084736739, "grad_norm": 0.0526130348443985, "learning_rate": 7.08442648968303e-06, "loss": 0.002, "step": 50510 }, { "epoch": 0.8537894087525244, "grad_norm": 0.05172059312462807, "learning_rate": 7.083085839608128e-06, "loss": 0.0014, "step": 50520 }, { "epoch": 0.8539584090313749, "grad_norm": 0.04551641643047333, "learning_rate": 7.081745008294667e-06, "loss": 0.0013, "step": 50530 }, { "epoch": 0.8541274093102253, "grad_norm": 0.07173636555671692, "learning_rate": 7.080403995859302e-06, "loss": 0.0016, "step": 50540 }, { "epoch": 0.8542964095890758, "grad_norm": 0.028299428522586823, "learning_rate": 7.079062802418711e-06, "loss": 0.0021, "step": 50550 }, { "epoch": 0.8544654098679263, "grad_norm": 0.2714872360229492, "learning_rate": 7.077721428089583e-06, "loss": 0.0013, "step": 50560 }, { "epoch": 0.8546344101467768, "grad_norm": 0.11973876506090164, "learning_rate": 7.076379872988624e-06, "loss": 0.0017, "step": 50570 }, { "epoch": 0.8548034104256272, "grad_norm": 0.19866228103637695, "learning_rate": 7.075038137232556e-06, "loss": 0.0025, "step": 50580 }, { "epoch": 0.8549724107044777, "grad_norm": 0.044867537915706635, "learning_rate": 7.073696220938115e-06, "loss": 0.0026, "step": 50590 }, { "epoch": 0.8551414109833281, "grad_norm": 0.028597289696335793, "learning_rate": 7.072354124222058e-06, "loss": 0.001, "step": 50600 }, { "epoch": 0.8553104112621785, "grad_norm": 0.0332944430410862, "learning_rate": 7.071011847201149e-06, "loss": 0.002, "step": 50610 }, { "epoch": 0.855479411541029, "grad_norm": 0.08548944443464279, "learning_rate": 7.0696693899921755e-06, "loss": 0.0009, "step": 50620 }, { "epoch": 0.8556484118198795, "grad_norm": 0.1740231066942215, "learning_rate": 7.068326752711937e-06, "loss": 0.0022, "step": 50630 }, { "epoch": 0.85581741209873, "grad_norm": 0.15070150792598724, "learning_rate": 7.066983935477251e-06, "loss": 0.002, "step": 50640 }, { "epoch": 0.8559864123775804, "grad_norm": 0.1309231072664261, "learning_rate": 7.065640938404945e-06, "loss": 0.0023, "step": 50650 }, { "epoch": 0.8561554126564309, "grad_norm": 0.00871087796986103, "learning_rate": 7.064297761611872e-06, "loss": 0.0026, "step": 50660 }, { "epoch": 0.8563244129352814, "grad_norm": 0.09649201482534409, "learning_rate": 7.062954405214891e-06, "loss": 0.0016, "step": 50670 }, { "epoch": 0.8564934132141317, "grad_norm": 0.09451442211866379, "learning_rate": 7.061610869330881e-06, "loss": 0.0026, "step": 50680 }, { "epoch": 0.8566624134929822, "grad_norm": 0.09087509661912918, "learning_rate": 7.060267154076739e-06, "loss": 0.0012, "step": 50690 }, { "epoch": 0.8568314137718327, "grad_norm": 0.18903668224811554, "learning_rate": 7.0589232595693705e-06, "loss": 0.0013, "step": 50700 }, { "epoch": 0.8570004140506832, "grad_norm": 0.10971001535654068, "learning_rate": 7.057579185925702e-06, "loss": 0.0016, "step": 50710 }, { "epoch": 0.8571694143295336, "grad_norm": 0.051436763256788254, "learning_rate": 7.0562349332626775e-06, "loss": 0.0023, "step": 50720 }, { "epoch": 0.8573384146083841, "grad_norm": 0.23147019743919373, "learning_rate": 7.054890501697249e-06, "loss": 0.0013, "step": 50730 }, { "epoch": 0.8575074148872346, "grad_norm": 0.07230903208255768, "learning_rate": 7.053545891346391e-06, "loss": 0.0018, "step": 50740 }, { "epoch": 0.8576764151660851, "grad_norm": 0.06627970933914185, "learning_rate": 7.052201102327091e-06, "loss": 0.0035, "step": 50750 }, { "epoch": 0.8578454154449355, "grad_norm": 0.10356321185827255, "learning_rate": 7.050856134756352e-06, "loss": 0.002, "step": 50760 }, { "epoch": 0.8580144157237859, "grad_norm": 0.1255701780319214, "learning_rate": 7.049510988751193e-06, "loss": 0.0017, "step": 50770 }, { "epoch": 0.8581834160026364, "grad_norm": 0.07557336986064911, "learning_rate": 7.0481656644286475e-06, "loss": 0.0015, "step": 50780 }, { "epoch": 0.8583524162814868, "grad_norm": 0.0351254902780056, "learning_rate": 7.046820161905766e-06, "loss": 0.0022, "step": 50790 }, { "epoch": 0.8585214165603373, "grad_norm": 0.1283976435661316, "learning_rate": 7.045474481299613e-06, "loss": 0.0011, "step": 50800 }, { "epoch": 0.8586904168391878, "grad_norm": 0.018550874665379524, "learning_rate": 7.04412862272727e-06, "loss": 0.0014, "step": 50810 }, { "epoch": 0.8588594171180383, "grad_norm": 0.14388088881969452, "learning_rate": 7.042782586305832e-06, "loss": 0.0022, "step": 50820 }, { "epoch": 0.8590284173968887, "grad_norm": 0.07648961246013641, "learning_rate": 7.041436372152411e-06, "loss": 0.0018, "step": 50830 }, { "epoch": 0.8591974176757392, "grad_norm": 0.02715301141142845, "learning_rate": 7.0400899803841364e-06, "loss": 0.0027, "step": 50840 }, { "epoch": 0.8593664179545897, "grad_norm": 0.03417889028787613, "learning_rate": 7.038743411118148e-06, "loss": 0.0018, "step": 50850 }, { "epoch": 0.85953541823344, "grad_norm": 0.20179900527000427, "learning_rate": 7.037396664471605e-06, "loss": 0.0013, "step": 50860 }, { "epoch": 0.8597044185122905, "grad_norm": 0.06350982934236526, "learning_rate": 7.036049740561682e-06, "loss": 0.0019, "step": 50870 }, { "epoch": 0.859873418791141, "grad_norm": 0.003680954221636057, "learning_rate": 7.034702639505565e-06, "loss": 0.0013, "step": 50880 }, { "epoch": 0.8600424190699915, "grad_norm": 0.12450505048036575, "learning_rate": 7.033355361420461e-06, "loss": 0.0014, "step": 50890 }, { "epoch": 0.8602114193488419, "grad_norm": 0.04708400368690491, "learning_rate": 7.032007906423588e-06, "loss": 0.0011, "step": 50900 }, { "epoch": 0.8603804196276924, "grad_norm": 0.08234564960002899, "learning_rate": 7.0306602746321805e-06, "loss": 0.001, "step": 50910 }, { "epoch": 0.8605494199065429, "grad_norm": 0.02741340734064579, "learning_rate": 7.0293124661634925e-06, "loss": 0.0011, "step": 50920 }, { "epoch": 0.8607184201853934, "grad_norm": 0.08544056117534637, "learning_rate": 7.0279644811347855e-06, "loss": 0.002, "step": 50930 }, { "epoch": 0.8608874204642437, "grad_norm": 0.0741511806845665, "learning_rate": 7.026616319663346e-06, "loss": 0.001, "step": 50940 }, { "epoch": 0.8610564207430942, "grad_norm": 0.0694316178560257, "learning_rate": 7.025267981866466e-06, "loss": 0.0018, "step": 50950 }, { "epoch": 0.8612254210219447, "grad_norm": 0.17879147827625275, "learning_rate": 7.023919467861459e-06, "loss": 0.0018, "step": 50960 }, { "epoch": 0.8613944213007951, "grad_norm": 0.09562186896800995, "learning_rate": 7.022570777765651e-06, "loss": 0.0011, "step": 50970 }, { "epoch": 0.8615634215796456, "grad_norm": 0.055727630853652954, "learning_rate": 7.0212219116963875e-06, "loss": 0.0017, "step": 50980 }, { "epoch": 0.8617324218584961, "grad_norm": 0.12096261233091354, "learning_rate": 7.019872869771025e-06, "loss": 0.0028, "step": 50990 }, { "epoch": 0.8619014221373466, "grad_norm": 0.12893104553222656, "learning_rate": 7.018523652106934e-06, "loss": 0.0023, "step": 51000 }, { "epoch": 0.862070422416197, "grad_norm": 0.13929513096809387, "learning_rate": 7.0171742588215075e-06, "loss": 0.0015, "step": 51010 }, { "epoch": 0.8622394226950475, "grad_norm": 0.0476958341896534, "learning_rate": 7.015824690032146e-06, "loss": 0.0019, "step": 51020 }, { "epoch": 0.8624084229738979, "grad_norm": 0.11641071736812592, "learning_rate": 7.0144749458562686e-06, "loss": 0.0027, "step": 51030 }, { "epoch": 0.8625774232527483, "grad_norm": 0.0025708950124680996, "learning_rate": 7.013125026411313e-06, "loss": 0.0018, "step": 51040 }, { "epoch": 0.8627464235315988, "grad_norm": 0.09292472898960114, "learning_rate": 7.0117749318147256e-06, "loss": 0.0011, "step": 51050 }, { "epoch": 0.8629154238104493, "grad_norm": 0.062427133321762085, "learning_rate": 7.010424662183971e-06, "loss": 0.0009, "step": 51060 }, { "epoch": 0.8630844240892998, "grad_norm": 0.21843813359737396, "learning_rate": 7.00907421763653e-06, "loss": 0.0024, "step": 51070 }, { "epoch": 0.8632534243681502, "grad_norm": 0.013522603549063206, "learning_rate": 7.007723598289898e-06, "loss": 0.0012, "step": 51080 }, { "epoch": 0.8634224246470007, "grad_norm": 0.0820799246430397, "learning_rate": 7.006372804261586e-06, "loss": 0.0016, "step": 51090 }, { "epoch": 0.8635914249258512, "grad_norm": 0.026028936728835106, "learning_rate": 7.005021835669119e-06, "loss": 0.0024, "step": 51100 }, { "epoch": 0.8637604252047016, "grad_norm": 0.05370361730456352, "learning_rate": 7.003670692630035e-06, "loss": 0.0031, "step": 51110 }, { "epoch": 0.863929425483552, "grad_norm": 0.025905350223183632, "learning_rate": 7.002319375261895e-06, "loss": 0.0013, "step": 51120 }, { "epoch": 0.8640984257624025, "grad_norm": 0.02977590821683407, "learning_rate": 7.000967883682269e-06, "loss": 0.0026, "step": 51130 }, { "epoch": 0.864267426041253, "grad_norm": 0.09860771149396896, "learning_rate": 6.999616218008741e-06, "loss": 0.0029, "step": 51140 }, { "epoch": 0.8644364263201034, "grad_norm": 0.006294582970440388, "learning_rate": 6.998264378358915e-06, "loss": 0.001, "step": 51150 }, { "epoch": 0.8646054265989539, "grad_norm": 0.06445837020874023, "learning_rate": 6.9969123648504035e-06, "loss": 0.0018, "step": 51160 }, { "epoch": 0.8647744268778044, "grad_norm": 0.17422986030578613, "learning_rate": 6.995560177600842e-06, "loss": 0.0022, "step": 51170 }, { "epoch": 0.8649434271566548, "grad_norm": 0.050527047365903854, "learning_rate": 6.994207816727877e-06, "loss": 0.0011, "step": 51180 }, { "epoch": 0.8651124274355053, "grad_norm": 0.020956074818968773, "learning_rate": 6.992855282349169e-06, "loss": 0.0016, "step": 51190 }, { "epoch": 0.8652814277143557, "grad_norm": 0.0970766693353653, "learning_rate": 6.991502574582397e-06, "loss": 0.0016, "step": 51200 }, { "epoch": 0.8654504279932062, "grad_norm": 0.04854821041226387, "learning_rate": 6.99014969354525e-06, "loss": 0.0022, "step": 51210 }, { "epoch": 0.8656194282720566, "grad_norm": 0.08087870478630066, "learning_rate": 6.98879663935544e-06, "loss": 0.0014, "step": 51220 }, { "epoch": 0.8657884285509071, "grad_norm": 0.012758402153849602, "learning_rate": 6.987443412130684e-06, "loss": 0.0019, "step": 51230 }, { "epoch": 0.8659574288297576, "grad_norm": 0.03489887714385986, "learning_rate": 6.986090011988723e-06, "loss": 0.0008, "step": 51240 }, { "epoch": 0.866126429108608, "grad_norm": 0.03194853290915489, "learning_rate": 6.9847364390473084e-06, "loss": 0.0013, "step": 51250 }, { "epoch": 0.8662954293874585, "grad_norm": 0.024313364177942276, "learning_rate": 6.9833826934242065e-06, "loss": 0.0021, "step": 51260 }, { "epoch": 0.866464429666309, "grad_norm": 0.05284968391060829, "learning_rate": 6.9820287752372015e-06, "loss": 0.0011, "step": 51270 }, { "epoch": 0.8666334299451595, "grad_norm": 0.007368212100118399, "learning_rate": 6.9806746846040895e-06, "loss": 0.0016, "step": 51280 }, { "epoch": 0.8668024302240098, "grad_norm": 0.059824682772159576, "learning_rate": 6.9793204216426825e-06, "loss": 0.0039, "step": 51290 }, { "epoch": 0.8669714305028603, "grad_norm": 0.06650307029485703, "learning_rate": 6.97796598647081e-06, "loss": 0.0011, "step": 51300 }, { "epoch": 0.8671404307817108, "grad_norm": 0.3455488681793213, "learning_rate": 6.976611379206312e-06, "loss": 0.0016, "step": 51310 }, { "epoch": 0.8673094310605612, "grad_norm": 0.051520440727472305, "learning_rate": 6.975256599967047e-06, "loss": 0.0016, "step": 51320 }, { "epoch": 0.8674784313394117, "grad_norm": 0.07747151702642441, "learning_rate": 6.973901648870889e-06, "loss": 0.0071, "step": 51330 }, { "epoch": 0.8676474316182622, "grad_norm": 0.0553559847176075, "learning_rate": 6.972546526035723e-06, "loss": 0.0013, "step": 51340 }, { "epoch": 0.8678164318971127, "grad_norm": 0.25140562653541565, "learning_rate": 6.971191231579451e-06, "loss": 0.0024, "step": 51350 }, { "epoch": 0.8679854321759631, "grad_norm": 0.14226362109184265, "learning_rate": 6.969835765619993e-06, "loss": 0.0017, "step": 51360 }, { "epoch": 0.8681544324548135, "grad_norm": 0.024550093337893486, "learning_rate": 6.968480128275279e-06, "loss": 0.0009, "step": 51370 }, { "epoch": 0.868323432733664, "grad_norm": 0.15737617015838623, "learning_rate": 6.967124319663255e-06, "loss": 0.0013, "step": 51380 }, { "epoch": 0.8684924330125144, "grad_norm": 0.052487265318632126, "learning_rate": 6.965768339901885e-06, "loss": 0.0025, "step": 51390 }, { "epoch": 0.8686614332913649, "grad_norm": 0.051464054733514786, "learning_rate": 6.964412189109145e-06, "loss": 0.0017, "step": 51400 }, { "epoch": 0.8688304335702154, "grad_norm": 0.06501564383506775, "learning_rate": 6.963055867403027e-06, "loss": 0.0008, "step": 51410 }, { "epoch": 0.8689994338490659, "grad_norm": 0.008760693483054638, "learning_rate": 6.961699374901536e-06, "loss": 0.0016, "step": 51420 }, { "epoch": 0.8691684341279163, "grad_norm": 0.009418006055057049, "learning_rate": 6.960342711722697e-06, "loss": 0.0006, "step": 51430 }, { "epoch": 0.8693374344067668, "grad_norm": 0.0037960095796734095, "learning_rate": 6.958985877984543e-06, "loss": 0.0008, "step": 51440 }, { "epoch": 0.8695064346856173, "grad_norm": 0.03941208869218826, "learning_rate": 6.957628873805125e-06, "loss": 0.0013, "step": 51450 }, { "epoch": 0.8696754349644676, "grad_norm": 0.06688125431537628, "learning_rate": 6.9562716993025125e-06, "loss": 0.0026, "step": 51460 }, { "epoch": 0.8698444352433181, "grad_norm": 0.02947884425520897, "learning_rate": 6.954914354594782e-06, "loss": 0.0012, "step": 51470 }, { "epoch": 0.8700134355221686, "grad_norm": 0.06458774209022522, "learning_rate": 6.953556839800031e-06, "loss": 0.001, "step": 51480 }, { "epoch": 0.8701824358010191, "grad_norm": 0.02007310651242733, "learning_rate": 6.952199155036371e-06, "loss": 0.0012, "step": 51490 }, { "epoch": 0.8703514360798695, "grad_norm": 0.13680298626422882, "learning_rate": 6.950841300421923e-06, "loss": 0.0025, "step": 51500 }, { "epoch": 0.87052043635872, "grad_norm": 0.05821897089481354, "learning_rate": 6.949483276074832e-06, "loss": 0.0013, "step": 51510 }, { "epoch": 0.8706894366375705, "grad_norm": 0.028216827660799026, "learning_rate": 6.948125082113251e-06, "loss": 0.0032, "step": 51520 }, { "epoch": 0.870858436916421, "grad_norm": 0.23589423298835754, "learning_rate": 6.946766718655348e-06, "loss": 0.0028, "step": 51530 }, { "epoch": 0.8710274371952714, "grad_norm": 0.009308498352766037, "learning_rate": 6.945408185819309e-06, "loss": 0.0007, "step": 51540 }, { "epoch": 0.8711964374741218, "grad_norm": 0.11392482370138168, "learning_rate": 6.944049483723332e-06, "loss": 0.0026, "step": 51550 }, { "epoch": 0.8713654377529723, "grad_norm": 0.013795859180390835, "learning_rate": 6.94269061248563e-06, "loss": 0.0018, "step": 51560 }, { "epoch": 0.8715344380318227, "grad_norm": 0.06885958462953568, "learning_rate": 6.941331572224432e-06, "loss": 0.0024, "step": 51570 }, { "epoch": 0.8717034383106732, "grad_norm": 0.06273774057626724, "learning_rate": 6.939972363057982e-06, "loss": 0.0013, "step": 51580 }, { "epoch": 0.8718724385895237, "grad_norm": 0.042911358177661896, "learning_rate": 6.938612985104536e-06, "loss": 0.0007, "step": 51590 }, { "epoch": 0.8720414388683742, "grad_norm": 0.19388608634471893, "learning_rate": 6.937253438482369e-06, "loss": 0.0016, "step": 51600 }, { "epoch": 0.8722104391472246, "grad_norm": 0.07669465988874435, "learning_rate": 6.935893723309766e-06, "loss": 0.0014, "step": 51610 }, { "epoch": 0.8723794394260751, "grad_norm": 0.009165574796497822, "learning_rate": 6.934533839705029e-06, "loss": 0.0034, "step": 51620 }, { "epoch": 0.8725484397049255, "grad_norm": 0.02931053563952446, "learning_rate": 6.933173787786476e-06, "loss": 0.0022, "step": 51630 }, { "epoch": 0.8727174399837759, "grad_norm": 0.05150367692112923, "learning_rate": 6.931813567672435e-06, "loss": 0.0021, "step": 51640 }, { "epoch": 0.8728864402626264, "grad_norm": 0.03767949715256691, "learning_rate": 6.930453179481256e-06, "loss": 0.0026, "step": 51650 }, { "epoch": 0.8730554405414769, "grad_norm": 0.08445336669683456, "learning_rate": 6.929092623331296e-06, "loss": 0.0022, "step": 51660 }, { "epoch": 0.8732244408203274, "grad_norm": 0.03677793964743614, "learning_rate": 6.92773189934093e-06, "loss": 0.0011, "step": 51670 }, { "epoch": 0.8733934410991778, "grad_norm": 0.05863290652632713, "learning_rate": 6.926371007628551e-06, "loss": 0.0007, "step": 51680 }, { "epoch": 0.8735624413780283, "grad_norm": 0.01422297116369009, "learning_rate": 6.925009948312558e-06, "loss": 0.0013, "step": 51690 }, { "epoch": 0.8737314416568788, "grad_norm": 0.05631661415100098, "learning_rate": 6.923648721511374e-06, "loss": 0.0017, "step": 51700 }, { "epoch": 0.8739004419357292, "grad_norm": 0.03909344598650932, "learning_rate": 6.922287327343432e-06, "loss": 0.0014, "step": 51710 }, { "epoch": 0.8740694422145796, "grad_norm": 0.14640794694423676, "learning_rate": 6.920925765927178e-06, "loss": 0.0013, "step": 51720 }, { "epoch": 0.8742384424934301, "grad_norm": 0.09230657666921616, "learning_rate": 6.9195640373810756e-06, "loss": 0.0017, "step": 51730 }, { "epoch": 0.8744074427722806, "grad_norm": 0.11621160060167313, "learning_rate": 6.9182021418236025e-06, "loss": 0.002, "step": 51740 }, { "epoch": 0.874576443051131, "grad_norm": 0.011284736916422844, "learning_rate": 6.916840079373247e-06, "loss": 0.0012, "step": 51750 }, { "epoch": 0.8747454433299815, "grad_norm": 0.11019916832447052, "learning_rate": 6.915477850148519e-06, "loss": 0.0019, "step": 51760 }, { "epoch": 0.874914443608832, "grad_norm": 0.06869740039110184, "learning_rate": 6.914115454267936e-06, "loss": 0.0011, "step": 51770 }, { "epoch": 0.8750834438876824, "grad_norm": 0.09072849154472351, "learning_rate": 6.9127528918500344e-06, "loss": 0.0018, "step": 51780 }, { "epoch": 0.8752524441665329, "grad_norm": 0.013464599847793579, "learning_rate": 6.911390163013364e-06, "loss": 0.0016, "step": 51790 }, { "epoch": 0.8754214444453834, "grad_norm": 0.04589114338159561, "learning_rate": 6.910027267876489e-06, "loss": 0.0013, "step": 51800 }, { "epoch": 0.8755904447242338, "grad_norm": 0.05709948390722275, "learning_rate": 6.908664206557987e-06, "loss": 0.002, "step": 51810 }, { "epoch": 0.8757594450030842, "grad_norm": 0.05664777010679245, "learning_rate": 6.907300979176452e-06, "loss": 0.0019, "step": 51820 }, { "epoch": 0.8759284452819347, "grad_norm": 0.030458100140094757, "learning_rate": 6.905937585850491e-06, "loss": 0.0013, "step": 51830 }, { "epoch": 0.8760974455607852, "grad_norm": 0.02953479439020157, "learning_rate": 6.904574026698724e-06, "loss": 0.0019, "step": 51840 }, { "epoch": 0.8762664458396356, "grad_norm": 0.0162524227052927, "learning_rate": 6.90321030183979e-06, "loss": 0.001, "step": 51850 }, { "epoch": 0.8764354461184861, "grad_norm": 0.025975948199629784, "learning_rate": 6.901846411392338e-06, "loss": 0.0014, "step": 51860 }, { "epoch": 0.8766044463973366, "grad_norm": 0.018599003553390503, "learning_rate": 6.900482355475033e-06, "loss": 0.0023, "step": 51870 }, { "epoch": 0.8767734466761871, "grad_norm": 0.022095203399658203, "learning_rate": 6.8991181342065546e-06, "loss": 0.0019, "step": 51880 }, { "epoch": 0.8769424469550374, "grad_norm": 0.05831044912338257, "learning_rate": 6.897753747705599e-06, "loss": 0.0015, "step": 51890 }, { "epoch": 0.8771114472338879, "grad_norm": 0.2666162848472595, "learning_rate": 6.896389196090871e-06, "loss": 0.002, "step": 51900 }, { "epoch": 0.8772804475127384, "grad_norm": 0.08272144943475723, "learning_rate": 6.8950244794810965e-06, "loss": 0.0026, "step": 51910 }, { "epoch": 0.8774494477915888, "grad_norm": 0.03429991379380226, "learning_rate": 6.893659597995009e-06, "loss": 0.0056, "step": 51920 }, { "epoch": 0.8776184480704393, "grad_norm": 0.05217054858803749, "learning_rate": 6.892294551751362e-06, "loss": 0.0017, "step": 51930 }, { "epoch": 0.8777874483492898, "grad_norm": 0.26239824295043945, "learning_rate": 6.890929340868921e-06, "loss": 0.0016, "step": 51940 }, { "epoch": 0.8779564486281403, "grad_norm": 0.08103934675455093, "learning_rate": 6.889563965466465e-06, "loss": 0.0034, "step": 51950 }, { "epoch": 0.8781254489069907, "grad_norm": 0.0029180962592363358, "learning_rate": 6.888198425662789e-06, "loss": 0.0007, "step": 51960 }, { "epoch": 0.8782944491858412, "grad_norm": 0.1408311277627945, "learning_rate": 6.886832721576702e-06, "loss": 0.0023, "step": 51970 }, { "epoch": 0.8784634494646916, "grad_norm": 0.12084182351827621, "learning_rate": 6.8854668533270245e-06, "loss": 0.0016, "step": 51980 }, { "epoch": 0.878632449743542, "grad_norm": 0.06310968846082687, "learning_rate": 6.8841008210325966e-06, "loss": 0.0009, "step": 51990 }, { "epoch": 0.8788014500223925, "grad_norm": 0.06121402978897095, "learning_rate": 6.882734624812269e-06, "loss": 0.0013, "step": 52000 }, { "epoch": 0.878970450301243, "grad_norm": 0.077993243932724, "learning_rate": 6.881368264784906e-06, "loss": 0.0022, "step": 52010 }, { "epoch": 0.8791394505800935, "grad_norm": 0.06399333477020264, "learning_rate": 6.880001741069391e-06, "loss": 0.0015, "step": 52020 }, { "epoch": 0.8793084508589439, "grad_norm": 0.11310141533613205, "learning_rate": 6.878635053784614e-06, "loss": 0.0016, "step": 52030 }, { "epoch": 0.8794774511377944, "grad_norm": 0.051656268537044525, "learning_rate": 6.877268203049484e-06, "loss": 0.002, "step": 52040 }, { "epoch": 0.8796464514166449, "grad_norm": 0.12784205377101898, "learning_rate": 6.875901188982926e-06, "loss": 0.0016, "step": 52050 }, { "epoch": 0.8798154516954952, "grad_norm": 0.03359860181808472, "learning_rate": 6.874534011703876e-06, "loss": 0.0013, "step": 52060 }, { "epoch": 0.8799844519743457, "grad_norm": 0.06326126307249069, "learning_rate": 6.8731666713312835e-06, "loss": 0.0015, "step": 52070 }, { "epoch": 0.8801534522531962, "grad_norm": 0.0809256061911583, "learning_rate": 6.871799167984116e-06, "loss": 0.0009, "step": 52080 }, { "epoch": 0.8803224525320467, "grad_norm": 0.06105821952223778, "learning_rate": 6.870431501781352e-06, "loss": 0.0016, "step": 52090 }, { "epoch": 0.8804914528108971, "grad_norm": 0.09603806585073471, "learning_rate": 6.869063672841983e-06, "loss": 0.0016, "step": 52100 }, { "epoch": 0.8806604530897476, "grad_norm": 0.026588289067149162, "learning_rate": 6.86769568128502e-06, "loss": 0.0012, "step": 52110 }, { "epoch": 0.8808294533685981, "grad_norm": 0.1563897281885147, "learning_rate": 6.8663275272294835e-06, "loss": 0.0015, "step": 52120 }, { "epoch": 0.8809984536474486, "grad_norm": 0.02206975221633911, "learning_rate": 6.86495921079441e-06, "loss": 0.0022, "step": 52130 }, { "epoch": 0.881167453926299, "grad_norm": 0.00509232422336936, "learning_rate": 6.863590732098848e-06, "loss": 0.0015, "step": 52140 }, { "epoch": 0.8813364542051494, "grad_norm": 0.1817808747291565, "learning_rate": 6.862222091261864e-06, "loss": 0.0017, "step": 52150 }, { "epoch": 0.8815054544839999, "grad_norm": 0.09490422159433365, "learning_rate": 6.860853288402534e-06, "loss": 0.0015, "step": 52160 }, { "epoch": 0.8816744547628503, "grad_norm": 0.021016210317611694, "learning_rate": 6.859484323639953e-06, "loss": 0.0021, "step": 52170 }, { "epoch": 0.8818434550417008, "grad_norm": 0.022308198735117912, "learning_rate": 6.8581151970932266e-06, "loss": 0.0023, "step": 52180 }, { "epoch": 0.8820124553205513, "grad_norm": 0.09990277886390686, "learning_rate": 6.856745908881475e-06, "loss": 0.0016, "step": 52190 }, { "epoch": 0.8821814555994018, "grad_norm": 0.039097677916288376, "learning_rate": 6.855376459123833e-06, "loss": 0.0018, "step": 52200 }, { "epoch": 0.8823504558782522, "grad_norm": 0.1134280264377594, "learning_rate": 6.854006847939449e-06, "loss": 0.0011, "step": 52210 }, { "epoch": 0.8825194561571027, "grad_norm": 0.06151890754699707, "learning_rate": 6.852637075447488e-06, "loss": 0.0012, "step": 52220 }, { "epoch": 0.8826884564359532, "grad_norm": 0.07523173838853836, "learning_rate": 6.851267141767125e-06, "loss": 0.0009, "step": 52230 }, { "epoch": 0.8828574567148035, "grad_norm": 0.091850645840168, "learning_rate": 6.849897047017551e-06, "loss": 0.0014, "step": 52240 }, { "epoch": 0.883026456993654, "grad_norm": 0.039377931505441666, "learning_rate": 6.8485267913179694e-06, "loss": 0.0012, "step": 52250 }, { "epoch": 0.8831954572725045, "grad_norm": 0.04614482447504997, "learning_rate": 6.847156374787602e-06, "loss": 0.0015, "step": 52260 }, { "epoch": 0.883364457551355, "grad_norm": 0.025399111211299896, "learning_rate": 6.845785797545679e-06, "loss": 0.0015, "step": 52270 }, { "epoch": 0.8835334578302054, "grad_norm": 0.048147816210985184, "learning_rate": 6.84441505971145e-06, "loss": 0.001, "step": 52280 }, { "epoch": 0.8837024581090559, "grad_norm": 0.054078638553619385, "learning_rate": 6.8430441614041744e-06, "loss": 0.002, "step": 52290 }, { "epoch": 0.8838714583879064, "grad_norm": 0.020648863166570663, "learning_rate": 6.841673102743126e-06, "loss": 0.002, "step": 52300 }, { "epoch": 0.8840404586667568, "grad_norm": 0.07791152596473694, "learning_rate": 6.840301883847595e-06, "loss": 0.0019, "step": 52310 }, { "epoch": 0.8842094589456072, "grad_norm": 0.037497781217098236, "learning_rate": 6.838930504836885e-06, "loss": 0.0015, "step": 52320 }, { "epoch": 0.8843784592244577, "grad_norm": 0.0458468534052372, "learning_rate": 6.837558965830309e-06, "loss": 0.0012, "step": 52330 }, { "epoch": 0.8845474595033082, "grad_norm": 0.034784458577632904, "learning_rate": 6.8361872669472006e-06, "loss": 0.0007, "step": 52340 }, { "epoch": 0.8847164597821586, "grad_norm": 0.05131611227989197, "learning_rate": 6.834815408306903e-06, "loss": 0.0021, "step": 52350 }, { "epoch": 0.8848854600610091, "grad_norm": 0.16373878717422485, "learning_rate": 6.833443390028775e-06, "loss": 0.0028, "step": 52360 }, { "epoch": 0.8850544603398596, "grad_norm": 0.08664316684007645, "learning_rate": 6.832071212232191e-06, "loss": 0.0022, "step": 52370 }, { "epoch": 0.88522346061871, "grad_norm": 0.06289789080619812, "learning_rate": 6.830698875036533e-06, "loss": 0.0019, "step": 52380 }, { "epoch": 0.8853924608975605, "grad_norm": 0.05547764152288437, "learning_rate": 6.829326378561203e-06, "loss": 0.0022, "step": 52390 }, { "epoch": 0.885561461176411, "grad_norm": 0.07169349491596222, "learning_rate": 6.827953722925616e-06, "loss": 0.0007, "step": 52400 }, { "epoch": 0.8857304614552614, "grad_norm": 0.013705150224268436, "learning_rate": 6.826580908249198e-06, "loss": 0.0017, "step": 52410 }, { "epoch": 0.8858994617341118, "grad_norm": 0.003144758054986596, "learning_rate": 6.82520793465139e-06, "loss": 0.002, "step": 52420 }, { "epoch": 0.8860684620129623, "grad_norm": 0.10687538981437683, "learning_rate": 6.823834802251649e-06, "loss": 0.0018, "step": 52430 }, { "epoch": 0.8862374622918128, "grad_norm": 0.05764946714043617, "learning_rate": 6.822461511169442e-06, "loss": 0.0017, "step": 52440 }, { "epoch": 0.8864064625706632, "grad_norm": 0.0790930986404419, "learning_rate": 6.821088061524256e-06, "loss": 0.0014, "step": 52450 }, { "epoch": 0.8865754628495137, "grad_norm": 0.03424690291285515, "learning_rate": 6.819714453435583e-06, "loss": 0.0011, "step": 52460 }, { "epoch": 0.8867444631283642, "grad_norm": 0.03840995952486992, "learning_rate": 6.818340687022937e-06, "loss": 0.0015, "step": 52470 }, { "epoch": 0.8869134634072147, "grad_norm": 0.017015980556607246, "learning_rate": 6.816966762405841e-06, "loss": 0.0089, "step": 52480 }, { "epoch": 0.8870824636860651, "grad_norm": 0.11019181460142136, "learning_rate": 6.815592679703834e-06, "loss": 0.0024, "step": 52490 }, { "epoch": 0.8872514639649155, "grad_norm": 0.13899827003479004, "learning_rate": 6.814218439036466e-06, "loss": 0.0039, "step": 52500 }, { "epoch": 0.887420464243766, "grad_norm": 0.07236754894256592, "learning_rate": 6.812844040523305e-06, "loss": 0.0014, "step": 52510 }, { "epoch": 0.8875894645226164, "grad_norm": 0.039463385939598083, "learning_rate": 6.811469484283928e-06, "loss": 0.0014, "step": 52520 }, { "epoch": 0.8877584648014669, "grad_norm": 0.05553396791219711, "learning_rate": 6.810094770437929e-06, "loss": 0.0019, "step": 52530 }, { "epoch": 0.8879274650803174, "grad_norm": 0.2061227262020111, "learning_rate": 6.808719899104916e-06, "loss": 0.0025, "step": 52540 }, { "epoch": 0.8880964653591679, "grad_norm": 0.057045165449380875, "learning_rate": 6.807344870404506e-06, "loss": 0.0019, "step": 52550 }, { "epoch": 0.8882654656380183, "grad_norm": 0.03864588961005211, "learning_rate": 6.8059696844563384e-06, "loss": 0.0038, "step": 52560 }, { "epoch": 0.8884344659168688, "grad_norm": 0.046701423823833466, "learning_rate": 6.804594341380057e-06, "loss": 0.0013, "step": 52570 }, { "epoch": 0.8886034661957192, "grad_norm": 0.05355743691325188, "learning_rate": 6.8032188412953235e-06, "loss": 0.0016, "step": 52580 }, { "epoch": 0.8887724664745696, "grad_norm": 0.04035189375281334, "learning_rate": 6.8018431843218155e-06, "loss": 0.0023, "step": 52590 }, { "epoch": 0.8889414667534201, "grad_norm": 0.07668393105268478, "learning_rate": 6.80046737057922e-06, "loss": 0.0015, "step": 52600 }, { "epoch": 0.8891104670322706, "grad_norm": 0.04548002406954765, "learning_rate": 6.799091400187239e-06, "loss": 0.0022, "step": 52610 }, { "epoch": 0.8892794673111211, "grad_norm": 0.07837411761283875, "learning_rate": 6.7977152732655905e-06, "loss": 0.0007, "step": 52620 }, { "epoch": 0.8894484675899715, "grad_norm": 0.059095341712236404, "learning_rate": 6.7963389899340016e-06, "loss": 0.0014, "step": 52630 }, { "epoch": 0.889617467868822, "grad_norm": 0.09972900152206421, "learning_rate": 6.794962550312217e-06, "loss": 0.0017, "step": 52640 }, { "epoch": 0.8897864681476725, "grad_norm": 0.026061296463012695, "learning_rate": 6.793585954519995e-06, "loss": 0.0009, "step": 52650 }, { "epoch": 0.889955468426523, "grad_norm": 0.01142977736890316, "learning_rate": 6.792209202677105e-06, "loss": 0.0017, "step": 52660 }, { "epoch": 0.8901244687053733, "grad_norm": 0.04268473759293556, "learning_rate": 6.79083229490333e-06, "loss": 0.0012, "step": 52670 }, { "epoch": 0.8902934689842238, "grad_norm": 0.08778329193592072, "learning_rate": 6.789455231318469e-06, "loss": 0.0011, "step": 52680 }, { "epoch": 0.8904624692630743, "grad_norm": 0.08509475737810135, "learning_rate": 6.788078012042333e-06, "loss": 0.0012, "step": 52690 }, { "epoch": 0.8906314695419247, "grad_norm": 0.07777417451143265, "learning_rate": 6.786700637194745e-06, "loss": 0.0022, "step": 52700 }, { "epoch": 0.8908004698207752, "grad_norm": 0.13288511335849762, "learning_rate": 6.7853231068955474e-06, "loss": 0.0022, "step": 52710 }, { "epoch": 0.8909694700996257, "grad_norm": 0.008743119426071644, "learning_rate": 6.7839454212645876e-06, "loss": 0.0011, "step": 52720 }, { "epoch": 0.8911384703784762, "grad_norm": 0.11511184275150299, "learning_rate": 6.782567580421732e-06, "loss": 0.002, "step": 52730 }, { "epoch": 0.8913074706573266, "grad_norm": 0.03016517497599125, "learning_rate": 6.7811895844868626e-06, "loss": 0.0012, "step": 52740 }, { "epoch": 0.891476470936177, "grad_norm": 0.02707226388156414, "learning_rate": 6.779811433579867e-06, "loss": 0.0013, "step": 52750 }, { "epoch": 0.8916454712150275, "grad_norm": 0.013867163099348545, "learning_rate": 6.7784331278206536e-06, "loss": 0.0044, "step": 52760 }, { "epoch": 0.8918144714938779, "grad_norm": 0.15949314832687378, "learning_rate": 6.7770546673291425e-06, "loss": 0.0014, "step": 52770 }, { "epoch": 0.8919834717727284, "grad_norm": 0.02797427959740162, "learning_rate": 6.775676052225265e-06, "loss": 0.0014, "step": 52780 }, { "epoch": 0.8921524720515789, "grad_norm": 0.03147095441818237, "learning_rate": 6.7742972826289675e-06, "loss": 0.0021, "step": 52790 }, { "epoch": 0.8923214723304294, "grad_norm": 0.045642051845788956, "learning_rate": 6.7729183586602095e-06, "loss": 0.0024, "step": 52800 }, { "epoch": 0.8924904726092798, "grad_norm": 0.0411669984459877, "learning_rate": 6.771539280438966e-06, "loss": 0.0011, "step": 52810 }, { "epoch": 0.8926594728881303, "grad_norm": 0.02917470782995224, "learning_rate": 6.77016004808522e-06, "loss": 0.0015, "step": 52820 }, { "epoch": 0.8928284731669808, "grad_norm": 0.04881921410560608, "learning_rate": 6.768780661718973e-06, "loss": 0.0014, "step": 52830 }, { "epoch": 0.8929974734458311, "grad_norm": 0.03261767700314522, "learning_rate": 6.767401121460239e-06, "loss": 0.002, "step": 52840 }, { "epoch": 0.8931664737246816, "grad_norm": 0.039354369044303894, "learning_rate": 6.766021427429043e-06, "loss": 0.0052, "step": 52850 }, { "epoch": 0.8933354740035321, "grad_norm": 0.039264146238565445, "learning_rate": 6.764641579745429e-06, "loss": 0.0009, "step": 52860 }, { "epoch": 0.8935044742823826, "grad_norm": 0.04962320253252983, "learning_rate": 6.763261578529445e-06, "loss": 0.0012, "step": 52870 }, { "epoch": 0.893673474561233, "grad_norm": 0.05316542834043503, "learning_rate": 6.761881423901162e-06, "loss": 0.0025, "step": 52880 }, { "epoch": 0.8938424748400835, "grad_norm": 0.019389688968658447, "learning_rate": 6.760501115980659e-06, "loss": 0.0011, "step": 52890 }, { "epoch": 0.894011475118934, "grad_norm": 0.02297196164727211, "learning_rate": 6.7591206548880285e-06, "loss": 0.0019, "step": 52900 }, { "epoch": 0.8941804753977844, "grad_norm": 0.07117078453302383, "learning_rate": 6.757740040743378e-06, "loss": 0.0012, "step": 52910 }, { "epoch": 0.8943494756766349, "grad_norm": 0.06571634858846664, "learning_rate": 6.756359273666827e-06, "loss": 0.0009, "step": 52920 }, { "epoch": 0.8945184759554853, "grad_norm": 0.07537243515253067, "learning_rate": 6.754978353778508e-06, "loss": 0.0066, "step": 52930 }, { "epoch": 0.8946874762343358, "grad_norm": 0.02859083004295826, "learning_rate": 6.753597281198571e-06, "loss": 0.0018, "step": 52940 }, { "epoch": 0.8948564765131862, "grad_norm": 0.07475516945123672, "learning_rate": 6.752216056047174e-06, "loss": 0.0015, "step": 52950 }, { "epoch": 0.8950254767920367, "grad_norm": 0.0927102118730545, "learning_rate": 6.75083467844449e-06, "loss": 0.0012, "step": 52960 }, { "epoch": 0.8951944770708872, "grad_norm": 0.16480670869350433, "learning_rate": 6.749453148510706e-06, "loss": 0.0009, "step": 52970 }, { "epoch": 0.8953634773497376, "grad_norm": 0.04149400070309639, "learning_rate": 6.748071466366023e-06, "loss": 0.0012, "step": 52980 }, { "epoch": 0.8955324776285881, "grad_norm": 0.07089632749557495, "learning_rate": 6.746689632130652e-06, "loss": 0.0023, "step": 52990 }, { "epoch": 0.8957014779074386, "grad_norm": 0.05136013776063919, "learning_rate": 6.74530764592482e-06, "loss": 0.0011, "step": 53000 }, { "epoch": 0.895870478186289, "grad_norm": 0.011685706675052643, "learning_rate": 6.743925507868767e-06, "loss": 0.0022, "step": 53010 }, { "epoch": 0.8960394784651394, "grad_norm": 0.03491692245006561, "learning_rate": 6.742543218082744e-06, "loss": 0.001, "step": 53020 }, { "epoch": 0.8962084787439899, "grad_norm": 0.0011314982548356056, "learning_rate": 6.741160776687019e-06, "loss": 0.0019, "step": 53030 }, { "epoch": 0.8963774790228404, "grad_norm": 0.05087868496775627, "learning_rate": 6.73977818380187e-06, "loss": 0.0011, "step": 53040 }, { "epoch": 0.8965464793016908, "grad_norm": 0.08818584680557251, "learning_rate": 6.738395439547591e-06, "loss": 0.0019, "step": 53050 }, { "epoch": 0.8967154795805413, "grad_norm": 0.14296086132526398, "learning_rate": 6.737012544044486e-06, "loss": 0.0013, "step": 53060 }, { "epoch": 0.8968844798593918, "grad_norm": 0.0492781437933445, "learning_rate": 6.735629497412872e-06, "loss": 0.001, "step": 53070 }, { "epoch": 0.8970534801382423, "grad_norm": 0.025535326451063156, "learning_rate": 6.734246299773084e-06, "loss": 0.0021, "step": 53080 }, { "epoch": 0.8972224804170927, "grad_norm": 0.06880789995193481, "learning_rate": 6.7328629512454646e-06, "loss": 0.0013, "step": 53090 }, { "epoch": 0.8973914806959431, "grad_norm": 0.3037954866886139, "learning_rate": 6.731479451950373e-06, "loss": 0.0016, "step": 53100 }, { "epoch": 0.8975604809747936, "grad_norm": 0.07403312623500824, "learning_rate": 6.730095802008179e-06, "loss": 0.0015, "step": 53110 }, { "epoch": 0.897729481253644, "grad_norm": 0.08519771695137024, "learning_rate": 6.728712001539266e-06, "loss": 0.0017, "step": 53120 }, { "epoch": 0.8978984815324945, "grad_norm": 0.024969011545181274, "learning_rate": 6.7273280506640356e-06, "loss": 0.0013, "step": 53130 }, { "epoch": 0.898067481811345, "grad_norm": 0.06154831871390343, "learning_rate": 6.725943949502896e-06, "loss": 0.0017, "step": 53140 }, { "epoch": 0.8982364820901955, "grad_norm": 0.045953039079904556, "learning_rate": 6.724559698176269e-06, "loss": 0.0013, "step": 53150 }, { "epoch": 0.8984054823690459, "grad_norm": 0.03636516258120537, "learning_rate": 6.723175296804594e-06, "loss": 0.0012, "step": 53160 }, { "epoch": 0.8985744826478964, "grad_norm": 0.06074857339262962, "learning_rate": 6.7217907455083176e-06, "loss": 0.0016, "step": 53170 }, { "epoch": 0.8987434829267469, "grad_norm": 0.028408514335751534, "learning_rate": 6.720406044407905e-06, "loss": 0.0023, "step": 53180 }, { "epoch": 0.8989124832055972, "grad_norm": 0.04262072965502739, "learning_rate": 6.719021193623832e-06, "loss": 0.0014, "step": 53190 }, { "epoch": 0.8990814834844477, "grad_norm": 0.004805940669029951, "learning_rate": 6.717636193276584e-06, "loss": 0.0009, "step": 53200 }, { "epoch": 0.8992504837632982, "grad_norm": 0.13305015861988068, "learning_rate": 6.716251043486665e-06, "loss": 0.0021, "step": 53210 }, { "epoch": 0.8994194840421487, "grad_norm": 0.056643761694431305, "learning_rate": 6.714865744374591e-06, "loss": 0.0018, "step": 53220 }, { "epoch": 0.8995884843209991, "grad_norm": 0.021547023206949234, "learning_rate": 6.713480296060888e-06, "loss": 0.0026, "step": 53230 }, { "epoch": 0.8997574845998496, "grad_norm": 0.20311525464057922, "learning_rate": 6.712094698666099e-06, "loss": 0.001, "step": 53240 }, { "epoch": 0.8999264848787001, "grad_norm": 0.051384858787059784, "learning_rate": 6.710708952310774e-06, "loss": 0.0015, "step": 53250 }, { "epoch": 0.9000954851575506, "grad_norm": 0.004437590949237347, "learning_rate": 6.709323057115482e-06, "loss": 0.0021, "step": 53260 }, { "epoch": 0.9002644854364009, "grad_norm": 0.04546438902616501, "learning_rate": 6.707937013200803e-06, "loss": 0.0013, "step": 53270 }, { "epoch": 0.9004334857152514, "grad_norm": 0.05520818009972572, "learning_rate": 6.706550820687328e-06, "loss": 0.0013, "step": 53280 }, { "epoch": 0.9006024859941019, "grad_norm": 0.10690100491046906, "learning_rate": 6.7051644796956624e-06, "loss": 0.0014, "step": 53290 }, { "epoch": 0.9007714862729523, "grad_norm": 0.029496680945158005, "learning_rate": 6.703777990346427e-06, "loss": 0.0014, "step": 53300 }, { "epoch": 0.9009404865518028, "grad_norm": 0.10258401930332184, "learning_rate": 6.7023913527602506e-06, "loss": 0.0017, "step": 53310 }, { "epoch": 0.9011094868306533, "grad_norm": 0.06807773560285568, "learning_rate": 6.701004567057777e-06, "loss": 0.0023, "step": 53320 }, { "epoch": 0.9012784871095038, "grad_norm": 0.10241279006004333, "learning_rate": 6.699617633359666e-06, "loss": 0.0013, "step": 53330 }, { "epoch": 0.9014474873883542, "grad_norm": 0.07315660268068314, "learning_rate": 6.698230551786586e-06, "loss": 0.0026, "step": 53340 }, { "epoch": 0.9016164876672047, "grad_norm": 0.01719767041504383, "learning_rate": 6.69684332245922e-06, "loss": 0.0012, "step": 53350 }, { "epoch": 0.9017854879460551, "grad_norm": 0.010834837332367897, "learning_rate": 6.695455945498264e-06, "loss": 0.002, "step": 53360 }, { "epoch": 0.9019544882249055, "grad_norm": 0.06381508708000183, "learning_rate": 6.694068421024425e-06, "loss": 0.0039, "step": 53370 }, { "epoch": 0.902123488503756, "grad_norm": 0.03186887130141258, "learning_rate": 6.6926807491584255e-06, "loss": 0.002, "step": 53380 }, { "epoch": 0.9022924887826065, "grad_norm": 0.04062811657786369, "learning_rate": 6.691292930021001e-06, "loss": 0.0009, "step": 53390 }, { "epoch": 0.902461489061457, "grad_norm": 0.018574338406324387, "learning_rate": 6.689904963732895e-06, "loss": 0.0015, "step": 53400 }, { "epoch": 0.9026304893403074, "grad_norm": 0.03585190325975418, "learning_rate": 6.68851685041487e-06, "loss": 0.0016, "step": 53410 }, { "epoch": 0.9027994896191579, "grad_norm": 0.015502367168664932, "learning_rate": 6.687128590187698e-06, "loss": 0.0018, "step": 53420 }, { "epoch": 0.9029684898980084, "grad_norm": 0.05451056733727455, "learning_rate": 6.6857401831721645e-06, "loss": 0.0013, "step": 53430 }, { "epoch": 0.9031374901768587, "grad_norm": 0.05533871054649353, "learning_rate": 6.684351629489067e-06, "loss": 0.002, "step": 53440 }, { "epoch": 0.9033064904557092, "grad_norm": 0.04295478016138077, "learning_rate": 6.682962929259218e-06, "loss": 0.0021, "step": 53450 }, { "epoch": 0.9034754907345597, "grad_norm": 0.003592574968934059, "learning_rate": 6.681574082603439e-06, "loss": 0.0018, "step": 53460 }, { "epoch": 0.9036444910134102, "grad_norm": 0.07409350574016571, "learning_rate": 6.680185089642568e-06, "loss": 0.0018, "step": 53470 }, { "epoch": 0.9038134912922606, "grad_norm": 0.09028099477291107, "learning_rate": 6.678795950497453e-06, "loss": 0.0018, "step": 53480 }, { "epoch": 0.9039824915711111, "grad_norm": 0.016657106578350067, "learning_rate": 6.6774066652889565e-06, "loss": 0.004, "step": 53490 }, { "epoch": 0.9041514918499616, "grad_norm": 0.12282165139913559, "learning_rate": 6.6760172341379535e-06, "loss": 0.0018, "step": 53500 }, { "epoch": 0.904320492128812, "grad_norm": 0.05469250679016113, "learning_rate": 6.67462765716533e-06, "loss": 0.0013, "step": 53510 }, { "epoch": 0.9044894924076625, "grad_norm": 0.11904332041740417, "learning_rate": 6.673237934491988e-06, "loss": 0.0013, "step": 53520 }, { "epoch": 0.9046584926865129, "grad_norm": 0.08364378660917282, "learning_rate": 6.671848066238836e-06, "loss": 0.0008, "step": 53530 }, { "epoch": 0.9048274929653634, "grad_norm": 0.05299696698784828, "learning_rate": 6.6704580525268035e-06, "loss": 0.0011, "step": 53540 }, { "epoch": 0.9049964932442138, "grad_norm": 0.08057032525539398, "learning_rate": 6.669067893476827e-06, "loss": 0.0027, "step": 53550 }, { "epoch": 0.9051654935230643, "grad_norm": 0.10579432547092438, "learning_rate": 6.667677589209858e-06, "loss": 0.0016, "step": 53560 }, { "epoch": 0.9053344938019148, "grad_norm": 0.05485056713223457, "learning_rate": 6.666287139846857e-06, "loss": 0.0017, "step": 53570 }, { "epoch": 0.9055034940807652, "grad_norm": 0.04897540062665939, "learning_rate": 6.6648965455088025e-06, "loss": 0.002, "step": 53580 }, { "epoch": 0.9056724943596157, "grad_norm": 0.07316815853118896, "learning_rate": 6.663505806316681e-06, "loss": 0.0007, "step": 53590 }, { "epoch": 0.9058414946384662, "grad_norm": 0.02678026631474495, "learning_rate": 6.662114922391494e-06, "loss": 0.0012, "step": 53600 }, { "epoch": 0.9060104949173167, "grad_norm": 0.08421721309423447, "learning_rate": 6.660723893854256e-06, "loss": 0.0014, "step": 53610 }, { "epoch": 0.906179495196167, "grad_norm": 0.061888616532087326, "learning_rate": 6.6593327208259935e-06, "loss": 0.0029, "step": 53620 }, { "epoch": 0.9063484954750175, "grad_norm": 0.07698092609643936, "learning_rate": 6.657941403427745e-06, "loss": 0.0015, "step": 53630 }, { "epoch": 0.906517495753868, "grad_norm": 0.019941125065088272, "learning_rate": 6.6565499417805615e-06, "loss": 0.0027, "step": 53640 }, { "epoch": 0.9066864960327184, "grad_norm": 0.04463280364871025, "learning_rate": 6.655158336005505e-06, "loss": 0.002, "step": 53650 }, { "epoch": 0.9068554963115689, "grad_norm": 0.03760061785578728, "learning_rate": 6.653766586223656e-06, "loss": 0.0012, "step": 53660 }, { "epoch": 0.9070244965904194, "grad_norm": 0.05356159806251526, "learning_rate": 6.652374692556101e-06, "loss": 0.0014, "step": 53670 }, { "epoch": 0.9071934968692699, "grad_norm": 0.0786006897687912, "learning_rate": 6.650982655123941e-06, "loss": 0.0018, "step": 53680 }, { "epoch": 0.9073624971481203, "grad_norm": 0.041036710143089294, "learning_rate": 6.64959047404829e-06, "loss": 0.0026, "step": 53690 }, { "epoch": 0.9075314974269707, "grad_norm": 0.09620804339647293, "learning_rate": 6.648198149450277e-06, "loss": 0.0018, "step": 53700 }, { "epoch": 0.9077004977058212, "grad_norm": 0.01466481015086174, "learning_rate": 6.6468056814510385e-06, "loss": 0.002, "step": 53710 }, { "epoch": 0.9078694979846716, "grad_norm": 0.14561620354652405, "learning_rate": 6.645413070171726e-06, "loss": 0.0014, "step": 53720 }, { "epoch": 0.9080384982635221, "grad_norm": 0.01577634923160076, "learning_rate": 6.644020315733505e-06, "loss": 0.0018, "step": 53730 }, { "epoch": 0.9082074985423726, "grad_norm": 0.0007173779886215925, "learning_rate": 6.642627418257551e-06, "loss": 0.0015, "step": 53740 }, { "epoch": 0.9083764988212231, "grad_norm": 0.04145677387714386, "learning_rate": 6.641234377865053e-06, "loss": 0.0024, "step": 53750 }, { "epoch": 0.9085454991000735, "grad_norm": 0.003429944859817624, "learning_rate": 6.639841194677213e-06, "loss": 0.0016, "step": 53760 }, { "epoch": 0.908714499378924, "grad_norm": 0.0353064239025116, "learning_rate": 6.638447868815243e-06, "loss": 0.0016, "step": 53770 }, { "epoch": 0.9088834996577745, "grad_norm": 0.00795475672930479, "learning_rate": 6.63705440040037e-06, "loss": 0.0015, "step": 53780 }, { "epoch": 0.9090524999366248, "grad_norm": 0.0757342278957367, "learning_rate": 6.635660789553833e-06, "loss": 0.0022, "step": 53790 }, { "epoch": 0.9092215002154753, "grad_norm": 0.018039211630821228, "learning_rate": 6.634267036396881e-06, "loss": 0.0068, "step": 53800 }, { "epoch": 0.9093905004943258, "grad_norm": 0.09006370604038239, "learning_rate": 6.63287314105078e-06, "loss": 0.0014, "step": 53810 }, { "epoch": 0.9095595007731763, "grad_norm": 0.04026485234498978, "learning_rate": 6.631479103636803e-06, "loss": 0.0014, "step": 53820 }, { "epoch": 0.9097285010520267, "grad_norm": 0.05421081930398941, "learning_rate": 6.630084924276241e-06, "loss": 0.0024, "step": 53830 }, { "epoch": 0.9098975013308772, "grad_norm": 0.0850011482834816, "learning_rate": 6.628690603090391e-06, "loss": 0.002, "step": 53840 }, { "epoch": 0.9100665016097277, "grad_norm": 0.006448943633586168, "learning_rate": 6.627296140200569e-06, "loss": 0.0011, "step": 53850 }, { "epoch": 0.9102355018885782, "grad_norm": 0.006143988575786352, "learning_rate": 6.6259015357280965e-06, "loss": 0.0024, "step": 53860 }, { "epoch": 0.9104045021674286, "grad_norm": 0.029627706855535507, "learning_rate": 6.624506789794313e-06, "loss": 0.0014, "step": 53870 }, { "epoch": 0.910573502446279, "grad_norm": 0.05208719149231911, "learning_rate": 6.623111902520569e-06, "loss": 0.0012, "step": 53880 }, { "epoch": 0.9107425027251295, "grad_norm": 0.04385032504796982, "learning_rate": 6.6217168740282245e-06, "loss": 0.0011, "step": 53890 }, { "epoch": 0.9109115030039799, "grad_norm": 0.12211822718381882, "learning_rate": 6.6203217044386546e-06, "loss": 0.0016, "step": 53900 }, { "epoch": 0.9110805032828304, "grad_norm": 0.11783907562494278, "learning_rate": 6.618926393873246e-06, "loss": 0.0013, "step": 53910 }, { "epoch": 0.9112495035616809, "grad_norm": 0.03287163004279137, "learning_rate": 6.6175309424533985e-06, "loss": 0.0016, "step": 53920 }, { "epoch": 0.9114185038405314, "grad_norm": 0.022966263815760612, "learning_rate": 6.616135350300521e-06, "loss": 0.002, "step": 53930 }, { "epoch": 0.9115875041193818, "grad_norm": 0.4584880769252777, "learning_rate": 6.614739617536037e-06, "loss": 0.0014, "step": 53940 }, { "epoch": 0.9117565043982323, "grad_norm": 0.05042840167880058, "learning_rate": 6.613343744281383e-06, "loss": 0.0031, "step": 53950 }, { "epoch": 0.9119255046770827, "grad_norm": 0.05508103221654892, "learning_rate": 6.611947730658006e-06, "loss": 0.0011, "step": 53960 }, { "epoch": 0.9120945049559331, "grad_norm": 0.16197894513607025, "learning_rate": 6.610551576787367e-06, "loss": 0.0019, "step": 53970 }, { "epoch": 0.9122635052347836, "grad_norm": 0.059720925986766815, "learning_rate": 6.609155282790937e-06, "loss": 0.0016, "step": 53980 }, { "epoch": 0.9124325055136341, "grad_norm": 0.02155834622681141, "learning_rate": 6.607758848790201e-06, "loss": 0.0013, "step": 53990 }, { "epoch": 0.9126015057924846, "grad_norm": 0.03507447615265846, "learning_rate": 6.606362274906655e-06, "loss": 0.0031, "step": 54000 }, { "epoch": 0.912770506071335, "grad_norm": 0.053267836570739746, "learning_rate": 6.604965561261809e-06, "loss": 0.0017, "step": 54010 }, { "epoch": 0.9129395063501855, "grad_norm": 0.1057472974061966, "learning_rate": 6.603568707977183e-06, "loss": 0.0014, "step": 54020 }, { "epoch": 0.913108506629036, "grad_norm": 0.003757775528356433, "learning_rate": 6.602171715174309e-06, "loss": 0.0019, "step": 54030 }, { "epoch": 0.9132775069078864, "grad_norm": 0.0017160142306238413, "learning_rate": 6.600774582974734e-06, "loss": 0.0011, "step": 54040 }, { "epoch": 0.9134465071867368, "grad_norm": 0.07644976675510406, "learning_rate": 6.599377311500014e-06, "loss": 0.0023, "step": 54050 }, { "epoch": 0.9136155074655873, "grad_norm": 0.1244252622127533, "learning_rate": 6.5979799008717186e-06, "loss": 0.0017, "step": 54060 }, { "epoch": 0.9137845077444378, "grad_norm": 0.03451064974069595, "learning_rate": 6.596582351211429e-06, "loss": 0.0048, "step": 54070 }, { "epoch": 0.9139535080232882, "grad_norm": 0.04706354811787605, "learning_rate": 6.595184662640741e-06, "loss": 0.0023, "step": 54080 }, { "epoch": 0.9141225083021387, "grad_norm": 0.04288541525602341, "learning_rate": 6.5937868352812565e-06, "loss": 0.0023, "step": 54090 }, { "epoch": 0.9142915085809892, "grad_norm": 0.03782866150140762, "learning_rate": 6.592388869254596e-06, "loss": 0.0008, "step": 54100 }, { "epoch": 0.9144605088598396, "grad_norm": 0.025055140256881714, "learning_rate": 6.5909907646823876e-06, "loss": 0.0017, "step": 54110 }, { "epoch": 0.9146295091386901, "grad_norm": 0.03553859144449234, "learning_rate": 6.589592521686277e-06, "loss": 0.0027, "step": 54120 }, { "epoch": 0.9147985094175405, "grad_norm": 0.06720779091119766, "learning_rate": 6.5881941403879125e-06, "loss": 0.0017, "step": 54130 }, { "epoch": 0.914967509696391, "grad_norm": 0.045898016542196274, "learning_rate": 6.586795620908964e-06, "loss": 0.0019, "step": 54140 }, { "epoch": 0.9151365099752414, "grad_norm": 0.11866870522499084, "learning_rate": 6.585396963371108e-06, "loss": 0.0017, "step": 54150 }, { "epoch": 0.9153055102540919, "grad_norm": 0.057932619005441666, "learning_rate": 6.583998167896035e-06, "loss": 0.0015, "step": 54160 }, { "epoch": 0.9154745105329424, "grad_norm": 0.03443612530827522, "learning_rate": 6.5825992346054454e-06, "loss": 0.0007, "step": 54170 }, { "epoch": 0.9156435108117928, "grad_norm": 0.11433347314596176, "learning_rate": 6.581200163621055e-06, "loss": 0.0019, "step": 54180 }, { "epoch": 0.9158125110906433, "grad_norm": 0.04202871024608612, "learning_rate": 6.57980095506459e-06, "loss": 0.0013, "step": 54190 }, { "epoch": 0.9159815113694938, "grad_norm": 0.030800852924585342, "learning_rate": 6.578401609057789e-06, "loss": 0.002, "step": 54200 }, { "epoch": 0.9161505116483443, "grad_norm": 0.02836521342396736, "learning_rate": 6.577002125722398e-06, "loss": 0.0015, "step": 54210 }, { "epoch": 0.9163195119271946, "grad_norm": 0.11328999698162079, "learning_rate": 6.575602505180183e-06, "loss": 0.0021, "step": 54220 }, { "epoch": 0.9164885122060451, "grad_norm": 0.015508916229009628, "learning_rate": 6.574202747552914e-06, "loss": 0.0011, "step": 54230 }, { "epoch": 0.9166575124848956, "grad_norm": 0.040631003677845, "learning_rate": 6.572802852962381e-06, "loss": 0.0007, "step": 54240 }, { "epoch": 0.916826512763746, "grad_norm": 0.0665983110666275, "learning_rate": 6.571402821530378e-06, "loss": 0.0011, "step": 54250 }, { "epoch": 0.9169955130425965, "grad_norm": 0.053036030381917953, "learning_rate": 6.570002653378717e-06, "loss": 0.0016, "step": 54260 }, { "epoch": 0.917164513321447, "grad_norm": 0.09500153362751007, "learning_rate": 6.568602348629217e-06, "loss": 0.0012, "step": 54270 }, { "epoch": 0.9173335136002975, "grad_norm": 0.02812996692955494, "learning_rate": 6.567201907403713e-06, "loss": 0.001, "step": 54280 }, { "epoch": 0.9175025138791479, "grad_norm": 0.02416916936635971, "learning_rate": 6.565801329824051e-06, "loss": 0.0019, "step": 54290 }, { "epoch": 0.9176715141579984, "grad_norm": 0.09245016425848007, "learning_rate": 6.564400616012085e-06, "loss": 0.0009, "step": 54300 }, { "epoch": 0.9178405144368488, "grad_norm": 0.09982512891292572, "learning_rate": 6.562999766089687e-06, "loss": 0.0014, "step": 54310 }, { "epoch": 0.9180095147156992, "grad_norm": 0.06721735000610352, "learning_rate": 6.561598780178736e-06, "loss": 0.0016, "step": 54320 }, { "epoch": 0.9181785149945497, "grad_norm": 0.08400999009609222, "learning_rate": 6.560197658401126e-06, "loss": 0.0011, "step": 54330 }, { "epoch": 0.9183475152734002, "grad_norm": 0.14300887286663055, "learning_rate": 6.55879640087876e-06, "loss": 0.0019, "step": 54340 }, { "epoch": 0.9185165155522507, "grad_norm": 0.028444314375519753, "learning_rate": 6.557395007733554e-06, "loss": 0.0015, "step": 54350 }, { "epoch": 0.9186855158311011, "grad_norm": 0.040723320096731186, "learning_rate": 6.555993479087436e-06, "loss": 0.0017, "step": 54360 }, { "epoch": 0.9188545161099516, "grad_norm": 0.04981546476483345, "learning_rate": 6.554591815062346e-06, "loss": 0.0016, "step": 54370 }, { "epoch": 0.9190235163888021, "grad_norm": 0.06033708155155182, "learning_rate": 6.553190015780238e-06, "loss": 0.001, "step": 54380 }, { "epoch": 0.9191925166676524, "grad_norm": 0.036511003971099854, "learning_rate": 6.551788081363072e-06, "loss": 0.0016, "step": 54390 }, { "epoch": 0.9193615169465029, "grad_norm": 0.050383757799863815, "learning_rate": 6.550386011932824e-06, "loss": 0.0016, "step": 54400 }, { "epoch": 0.9195305172253534, "grad_norm": 0.06114116311073303, "learning_rate": 6.548983807611482e-06, "loss": 0.0016, "step": 54410 }, { "epoch": 0.9196995175042039, "grad_norm": 0.0021533877588808537, "learning_rate": 6.547581468521044e-06, "loss": 0.0009, "step": 54420 }, { "epoch": 0.9198685177830543, "grad_norm": 0.03272373229265213, "learning_rate": 6.546178994783519e-06, "loss": 0.0013, "step": 54430 }, { "epoch": 0.9200375180619048, "grad_norm": 0.00193277548532933, "learning_rate": 6.544776386520931e-06, "loss": 0.0012, "step": 54440 }, { "epoch": 0.9202065183407553, "grad_norm": 0.03134723752737045, "learning_rate": 6.543373643855312e-06, "loss": 0.0013, "step": 54450 }, { "epoch": 0.9203755186196058, "grad_norm": 0.03538252040743828, "learning_rate": 6.541970766908707e-06, "loss": 0.0011, "step": 54460 }, { "epoch": 0.9205445188984562, "grad_norm": 0.06776958703994751, "learning_rate": 6.540567755803177e-06, "loss": 0.0013, "step": 54470 }, { "epoch": 0.9207135191773066, "grad_norm": 0.10667550563812256, "learning_rate": 6.539164610660785e-06, "loss": 0.0012, "step": 54480 }, { "epoch": 0.9208825194561571, "grad_norm": 0.06932281702756882, "learning_rate": 6.537761331603617e-06, "loss": 0.0013, "step": 54490 }, { "epoch": 0.9210515197350075, "grad_norm": 0.058006856590509415, "learning_rate": 6.536357918753762e-06, "loss": 0.0018, "step": 54500 }, { "epoch": 0.921220520013858, "grad_norm": 0.19771824777126312, "learning_rate": 6.534954372233324e-06, "loss": 0.0015, "step": 54510 }, { "epoch": 0.9213895202927085, "grad_norm": 0.00955281127244234, "learning_rate": 6.533550692164419e-06, "loss": 0.0014, "step": 54520 }, { "epoch": 0.921558520571559, "grad_norm": 0.03951649367809296, "learning_rate": 6.532146878669172e-06, "loss": 0.0014, "step": 54530 }, { "epoch": 0.9217275208504094, "grad_norm": 0.11181564629077911, "learning_rate": 6.530742931869725e-06, "loss": 0.0014, "step": 54540 }, { "epoch": 0.9218965211292599, "grad_norm": 0.03836284950375557, "learning_rate": 6.529338851888225e-06, "loss": 0.0018, "step": 54550 }, { "epoch": 0.9220655214081104, "grad_norm": 0.10049938410520554, "learning_rate": 6.527934638846836e-06, "loss": 0.0019, "step": 54560 }, { "epoch": 0.9222345216869607, "grad_norm": 0.04472474008798599, "learning_rate": 6.526530292867729e-06, "loss": 0.0018, "step": 54570 }, { "epoch": 0.9224035219658112, "grad_norm": 0.10268321633338928, "learning_rate": 6.5251258140730924e-06, "loss": 0.0036, "step": 54580 }, { "epoch": 0.9225725222446617, "grad_norm": 0.030455652624368668, "learning_rate": 6.523721202585118e-06, "loss": 0.0017, "step": 54590 }, { "epoch": 0.9227415225235122, "grad_norm": 0.07699055969715118, "learning_rate": 6.522316458526019e-06, "loss": 0.0019, "step": 54600 }, { "epoch": 0.9229105228023626, "grad_norm": 0.08997844159603119, "learning_rate": 6.520911582018012e-06, "loss": 0.0014, "step": 54610 }, { "epoch": 0.9230795230812131, "grad_norm": 0.1037558987736702, "learning_rate": 6.519506573183328e-06, "loss": 0.0018, "step": 54620 }, { "epoch": 0.9232485233600636, "grad_norm": 0.031466227024793625, "learning_rate": 6.518101432144208e-06, "loss": 0.0023, "step": 54630 }, { "epoch": 0.923417523638914, "grad_norm": 0.15260553359985352, "learning_rate": 6.5166961590229105e-06, "loss": 0.003, "step": 54640 }, { "epoch": 0.9235865239177644, "grad_norm": 0.07779121398925781, "learning_rate": 6.515290753941697e-06, "loss": 0.0028, "step": 54650 }, { "epoch": 0.9237555241966149, "grad_norm": 0.04281062260270119, "learning_rate": 6.513885217022846e-06, "loss": 0.001, "step": 54660 }, { "epoch": 0.9239245244754654, "grad_norm": 0.04976315423846245, "learning_rate": 6.512479548388647e-06, "loss": 0.001, "step": 54670 }, { "epoch": 0.9240935247543158, "grad_norm": 0.05488727241754532, "learning_rate": 6.5110737481614e-06, "loss": 0.0014, "step": 54680 }, { "epoch": 0.9242625250331663, "grad_norm": 0.054966460913419724, "learning_rate": 6.509667816463414e-06, "loss": 0.001, "step": 54690 }, { "epoch": 0.9244315253120168, "grad_norm": 0.08770200610160828, "learning_rate": 6.508261753417014e-06, "loss": 0.0011, "step": 54700 }, { "epoch": 0.9246005255908673, "grad_norm": 0.027067530900239944, "learning_rate": 6.506855559144535e-06, "loss": 0.0017, "step": 54710 }, { "epoch": 0.9247695258697177, "grad_norm": 0.050520192831754684, "learning_rate": 6.5054492337683205e-06, "loss": 0.0015, "step": 54720 }, { "epoch": 0.9249385261485682, "grad_norm": 0.09086226671934128, "learning_rate": 6.504042777410728e-06, "loss": 0.0016, "step": 54730 }, { "epoch": 0.9251075264274186, "grad_norm": 0.026549015194177628, "learning_rate": 6.502636190194127e-06, "loss": 0.004, "step": 54740 }, { "epoch": 0.925276526706269, "grad_norm": 0.009608623571693897, "learning_rate": 6.501229472240896e-06, "loss": 0.0024, "step": 54750 }, { "epoch": 0.9254455269851195, "grad_norm": 0.03781052678823471, "learning_rate": 6.499822623673429e-06, "loss": 0.0011, "step": 54760 }, { "epoch": 0.92561452726397, "grad_norm": 0.03538934886455536, "learning_rate": 6.498415644614126e-06, "loss": 0.0017, "step": 54770 }, { "epoch": 0.9257835275428205, "grad_norm": 0.058362703770399094, "learning_rate": 6.497008535185402e-06, "loss": 0.0014, "step": 54780 }, { "epoch": 0.9259525278216709, "grad_norm": 0.01351145002990961, "learning_rate": 6.495601295509683e-06, "loss": 0.0011, "step": 54790 }, { "epoch": 0.9261215281005214, "grad_norm": 0.05451357737183571, "learning_rate": 6.494193925709405e-06, "loss": 0.0022, "step": 54800 }, { "epoch": 0.9262905283793719, "grad_norm": 0.1450265794992447, "learning_rate": 6.492786425907015e-06, "loss": 0.0015, "step": 54810 }, { "epoch": 0.9264595286582222, "grad_norm": 0.12919071316719055, "learning_rate": 6.4913787962249745e-06, "loss": 0.0017, "step": 54820 }, { "epoch": 0.9266285289370727, "grad_norm": 0.006917062681168318, "learning_rate": 6.489971036785752e-06, "loss": 0.001, "step": 54830 }, { "epoch": 0.9267975292159232, "grad_norm": 0.008178263902664185, "learning_rate": 6.488563147711829e-06, "loss": 0.0019, "step": 54840 }, { "epoch": 0.9269665294947737, "grad_norm": 0.04077032953500748, "learning_rate": 6.487155129125701e-06, "loss": 0.0013, "step": 54850 }, { "epoch": 0.9271355297736241, "grad_norm": 0.06073884665966034, "learning_rate": 6.485746981149872e-06, "loss": 0.0018, "step": 54860 }, { "epoch": 0.9273045300524746, "grad_norm": 0.028377428650856018, "learning_rate": 6.4843387039068566e-06, "loss": 0.0016, "step": 54870 }, { "epoch": 0.9274735303313251, "grad_norm": 0.3094031810760498, "learning_rate": 6.482930297519181e-06, "loss": 0.0017, "step": 54880 }, { "epoch": 0.9276425306101755, "grad_norm": 0.04300961270928383, "learning_rate": 6.481521762109386e-06, "loss": 0.0015, "step": 54890 }, { "epoch": 0.927811530889026, "grad_norm": 0.13757209479808807, "learning_rate": 6.48011309780002e-06, "loss": 0.0017, "step": 54900 }, { "epoch": 0.9279805311678764, "grad_norm": 0.020510252565145493, "learning_rate": 6.478704304713641e-06, "loss": 0.0032, "step": 54910 }, { "epoch": 0.9281495314467268, "grad_norm": 0.209051713347435, "learning_rate": 6.477295382972826e-06, "loss": 0.0015, "step": 54920 }, { "epoch": 0.9283185317255773, "grad_norm": 0.35918501019477844, "learning_rate": 6.475886332700152e-06, "loss": 0.0019, "step": 54930 }, { "epoch": 0.9284875320044278, "grad_norm": 0.03130156919360161, "learning_rate": 6.4744771540182175e-06, "loss": 0.002, "step": 54940 }, { "epoch": 0.9286565322832783, "grad_norm": 0.17833785712718964, "learning_rate": 6.473067847049627e-06, "loss": 0.0014, "step": 54950 }, { "epoch": 0.9288255325621287, "grad_norm": 0.02514052763581276, "learning_rate": 6.4716584119169956e-06, "loss": 0.0017, "step": 54960 }, { "epoch": 0.9289945328409792, "grad_norm": 0.030019115656614304, "learning_rate": 6.4702488487429526e-06, "loss": 0.0016, "step": 54970 }, { "epoch": 0.9291635331198297, "grad_norm": 0.03886539489030838, "learning_rate": 6.468839157650138e-06, "loss": 0.0013, "step": 54980 }, { "epoch": 0.9293325333986802, "grad_norm": 0.006110138725489378, "learning_rate": 6.467429338761197e-06, "loss": 0.002, "step": 54990 }, { "epoch": 0.9295015336775305, "grad_norm": 0.06936800479888916, "learning_rate": 6.466019392198795e-06, "loss": 0.0022, "step": 55000 }, { "epoch": 0.929670533956381, "grad_norm": 0.04609772190451622, "learning_rate": 6.464609318085602e-06, "loss": 0.0013, "step": 55010 }, { "epoch": 0.9298395342352315, "grad_norm": 0.04684607684612274, "learning_rate": 6.463199116544303e-06, "loss": 0.0012, "step": 55020 }, { "epoch": 0.9300085345140819, "grad_norm": 0.03586406260728836, "learning_rate": 6.4617887876975916e-06, "loss": 0.0011, "step": 55030 }, { "epoch": 0.9301775347929324, "grad_norm": 0.13385647535324097, "learning_rate": 6.460378331668174e-06, "loss": 0.0023, "step": 55040 }, { "epoch": 0.9303465350717829, "grad_norm": 0.047625139355659485, "learning_rate": 6.458967748578764e-06, "loss": 0.0017, "step": 55050 }, { "epoch": 0.9305155353506334, "grad_norm": 0.1226748377084732, "learning_rate": 6.457557038552091e-06, "loss": 0.001, "step": 55060 }, { "epoch": 0.9306845356294838, "grad_norm": 0.2520878314971924, "learning_rate": 6.456146201710895e-06, "loss": 0.0028, "step": 55070 }, { "epoch": 0.9308535359083342, "grad_norm": 0.043165381997823715, "learning_rate": 6.454735238177924e-06, "loss": 0.0015, "step": 55080 }, { "epoch": 0.9310225361871847, "grad_norm": 0.06458351761102676, "learning_rate": 6.453324148075939e-06, "loss": 0.002, "step": 55090 }, { "epoch": 0.9311915364660351, "grad_norm": 0.16306425631046295, "learning_rate": 6.4519129315277104e-06, "loss": 0.0016, "step": 55100 }, { "epoch": 0.9313605367448856, "grad_norm": 0.05315855145454407, "learning_rate": 6.450501588656024e-06, "loss": 0.0008, "step": 55110 }, { "epoch": 0.9315295370237361, "grad_norm": 0.09375635534524918, "learning_rate": 6.449090119583671e-06, "loss": 0.0013, "step": 55120 }, { "epoch": 0.9316985373025866, "grad_norm": 0.05113506317138672, "learning_rate": 6.447678524433456e-06, "loss": 0.0018, "step": 55130 }, { "epoch": 0.931867537581437, "grad_norm": 0.01696619763970375, "learning_rate": 6.4462668033281935e-06, "loss": 0.001, "step": 55140 }, { "epoch": 0.9320365378602875, "grad_norm": 0.000842265144456178, "learning_rate": 6.444854956390715e-06, "loss": 0.0013, "step": 55150 }, { "epoch": 0.932205538139138, "grad_norm": 0.022497303783893585, "learning_rate": 6.443442983743853e-06, "loss": 0.0011, "step": 55160 }, { "epoch": 0.9323745384179883, "grad_norm": 0.03009202890098095, "learning_rate": 6.442030885510459e-06, "loss": 0.0024, "step": 55170 }, { "epoch": 0.9325435386968388, "grad_norm": 0.027150332927703857, "learning_rate": 6.440618661813389e-06, "loss": 0.0014, "step": 55180 }, { "epoch": 0.9327125389756893, "grad_norm": 0.03145081549882889, "learning_rate": 6.439206312775518e-06, "loss": 0.001, "step": 55190 }, { "epoch": 0.9328815392545398, "grad_norm": 0.06422532349824905, "learning_rate": 6.437793838519724e-06, "loss": 0.0018, "step": 55200 }, { "epoch": 0.9330505395333902, "grad_norm": 0.08543737232685089, "learning_rate": 6.4363812391688985e-06, "loss": 0.0029, "step": 55210 }, { "epoch": 0.9332195398122407, "grad_norm": 0.12925246357917786, "learning_rate": 6.434968514845947e-06, "loss": 0.0018, "step": 55220 }, { "epoch": 0.9333885400910912, "grad_norm": 0.040887799113988876, "learning_rate": 6.433555665673781e-06, "loss": 0.0013, "step": 55230 }, { "epoch": 0.9335575403699417, "grad_norm": 0.1471778154373169, "learning_rate": 6.432142691775327e-06, "loss": 0.002, "step": 55240 }, { "epoch": 0.9337265406487921, "grad_norm": 0.07183373719453812, "learning_rate": 6.430729593273518e-06, "loss": 0.0016, "step": 55250 }, { "epoch": 0.9338955409276425, "grad_norm": 0.04649088531732559, "learning_rate": 6.429316370291305e-06, "loss": 0.0013, "step": 55260 }, { "epoch": 0.934064541206493, "grad_norm": 0.2931188642978668, "learning_rate": 6.427903022951642e-06, "loss": 0.0032, "step": 55270 }, { "epoch": 0.9342335414853434, "grad_norm": 0.031886644661426544, "learning_rate": 6.426489551377497e-06, "loss": 0.0014, "step": 55280 }, { "epoch": 0.9344025417641939, "grad_norm": 0.21854813396930695, "learning_rate": 6.42507595569185e-06, "loss": 0.0027, "step": 55290 }, { "epoch": 0.9345715420430444, "grad_norm": 0.1134476289153099, "learning_rate": 6.423662236017692e-06, "loss": 0.0012, "step": 55300 }, { "epoch": 0.9347405423218949, "grad_norm": 0.07070956379175186, "learning_rate": 6.422248392478019e-06, "loss": 0.0026, "step": 55310 }, { "epoch": 0.9349095426007453, "grad_norm": 0.01285579614341259, "learning_rate": 6.420834425195845e-06, "loss": 0.0012, "step": 55320 }, { "epoch": 0.9350785428795958, "grad_norm": 0.8135231733322144, "learning_rate": 6.419420334294193e-06, "loss": 0.004, "step": 55330 }, { "epoch": 0.9352475431584462, "grad_norm": 0.2671804428100586, "learning_rate": 6.418006119896094e-06, "loss": 0.0021, "step": 55340 }, { "epoch": 0.9354165434372966, "grad_norm": 0.03381161391735077, "learning_rate": 6.416591782124592e-06, "loss": 0.0015, "step": 55350 }, { "epoch": 0.9355855437161471, "grad_norm": 0.020434875041246414, "learning_rate": 6.415177321102744e-06, "loss": 0.0031, "step": 55360 }, { "epoch": 0.9357545439949976, "grad_norm": 0.003813839051872492, "learning_rate": 6.413762736953609e-06, "loss": 0.0014, "step": 55370 }, { "epoch": 0.935923544273848, "grad_norm": 0.1512255221605301, "learning_rate": 6.412348029800268e-06, "loss": 0.0021, "step": 55380 }, { "epoch": 0.9360925445526985, "grad_norm": 0.01463087648153305, "learning_rate": 6.410933199765806e-06, "loss": 0.001, "step": 55390 }, { "epoch": 0.936261544831549, "grad_norm": 0.046215448528528214, "learning_rate": 6.409518246973318e-06, "loss": 0.0016, "step": 55400 }, { "epoch": 0.9364305451103995, "grad_norm": 0.05667540803551674, "learning_rate": 6.408103171545913e-06, "loss": 0.0025, "step": 55410 }, { "epoch": 0.9365995453892499, "grad_norm": 0.2856285870075226, "learning_rate": 6.406687973606709e-06, "loss": 0.004, "step": 55420 }, { "epoch": 0.9367685456681003, "grad_norm": 0.11543390899896622, "learning_rate": 6.405272653278837e-06, "loss": 0.0011, "step": 55430 }, { "epoch": 0.9369375459469508, "grad_norm": 0.04262326657772064, "learning_rate": 6.403857210685435e-06, "loss": 0.0025, "step": 55440 }, { "epoch": 0.9371065462258013, "grad_norm": 0.018638089299201965, "learning_rate": 6.402441645949655e-06, "loss": 0.0023, "step": 55450 }, { "epoch": 0.9372755465046517, "grad_norm": 0.025145070627331734, "learning_rate": 6.401025959194656e-06, "loss": 0.0031, "step": 55460 }, { "epoch": 0.9374445467835022, "grad_norm": 0.075159952044487, "learning_rate": 6.39961015054361e-06, "loss": 0.0016, "step": 55470 }, { "epoch": 0.9376135470623527, "grad_norm": 0.18348009884357452, "learning_rate": 6.398194220119701e-06, "loss": 0.0011, "step": 55480 }, { "epoch": 0.9377825473412031, "grad_norm": 0.06098527833819389, "learning_rate": 6.396778168046119e-06, "loss": 0.0011, "step": 55490 }, { "epoch": 0.9379515476200536, "grad_norm": 0.013626277446746826, "learning_rate": 6.39536199444607e-06, "loss": 0.0017, "step": 55500 }, { "epoch": 0.938120547898904, "grad_norm": 0.05409810692071915, "learning_rate": 6.393945699442765e-06, "loss": 0.0011, "step": 55510 }, { "epoch": 0.9382895481777545, "grad_norm": 0.06189171224832535, "learning_rate": 6.392529283159432e-06, "loss": 0.0017, "step": 55520 }, { "epoch": 0.9384585484566049, "grad_norm": 0.11078029125928879, "learning_rate": 6.391112745719303e-06, "loss": 0.0026, "step": 55530 }, { "epoch": 0.9386275487354554, "grad_norm": 0.005103504750877619, "learning_rate": 6.389696087245626e-06, "loss": 0.0014, "step": 55540 }, { "epoch": 0.9387965490143059, "grad_norm": 0.005580130498856306, "learning_rate": 6.388279307861656e-06, "loss": 0.001, "step": 55550 }, { "epoch": 0.9389655492931563, "grad_norm": 0.07432939857244492, "learning_rate": 6.386862407690661e-06, "loss": 0.0012, "step": 55560 }, { "epoch": 0.9391345495720068, "grad_norm": 0.0997578427195549, "learning_rate": 6.385445386855915e-06, "loss": 0.0028, "step": 55570 }, { "epoch": 0.9393035498508573, "grad_norm": 0.046545471996068954, "learning_rate": 6.384028245480709e-06, "loss": 0.0054, "step": 55580 }, { "epoch": 0.9394725501297078, "grad_norm": 0.06160053238272667, "learning_rate": 6.38261098368834e-06, "loss": 0.0008, "step": 55590 }, { "epoch": 0.9396415504085581, "grad_norm": 0.03182736784219742, "learning_rate": 6.381193601602116e-06, "loss": 0.0011, "step": 55600 }, { "epoch": 0.9398105506874086, "grad_norm": 0.05927756801247597, "learning_rate": 6.379776099345356e-06, "loss": 0.0022, "step": 55610 }, { "epoch": 0.9399795509662591, "grad_norm": 0.02774706669151783, "learning_rate": 6.378358477041391e-06, "loss": 0.001, "step": 55620 }, { "epoch": 0.9401485512451095, "grad_norm": 0.059262096881866455, "learning_rate": 6.3769407348135595e-06, "loss": 0.0023, "step": 55630 }, { "epoch": 0.94031755152396, "grad_norm": 0.04048911854624748, "learning_rate": 6.375522872785213e-06, "loss": 0.0017, "step": 55640 }, { "epoch": 0.9404865518028105, "grad_norm": 0.03914349153637886, "learning_rate": 6.374104891079713e-06, "loss": 0.0027, "step": 55650 }, { "epoch": 0.940655552081661, "grad_norm": 2.4938673973083496, "learning_rate": 6.37268678982043e-06, "loss": 0.002, "step": 55660 }, { "epoch": 0.9408245523605114, "grad_norm": 0.09228339791297913, "learning_rate": 6.371268569130744e-06, "loss": 0.0013, "step": 55670 }, { "epoch": 0.9409935526393619, "grad_norm": 0.08421079069375992, "learning_rate": 6.369850229134049e-06, "loss": 0.0018, "step": 55680 }, { "epoch": 0.9411625529182123, "grad_norm": 0.013725800439715385, "learning_rate": 6.368431769953747e-06, "loss": 0.0016, "step": 55690 }, { "epoch": 0.9413315531970627, "grad_norm": 0.04755226522684097, "learning_rate": 6.36701319171325e-06, "loss": 0.0014, "step": 55700 }, { "epoch": 0.9415005534759132, "grad_norm": 0.03607148677110672, "learning_rate": 6.365594494535982e-06, "loss": 0.0021, "step": 55710 }, { "epoch": 0.9416695537547637, "grad_norm": 0.2749858498573303, "learning_rate": 6.3641756785453775e-06, "loss": 0.0012, "step": 55720 }, { "epoch": 0.9418385540336142, "grad_norm": 0.09154827147722244, "learning_rate": 6.36275674386488e-06, "loss": 0.0012, "step": 55730 }, { "epoch": 0.9420075543124646, "grad_norm": 0.03720062971115112, "learning_rate": 6.361337690617942e-06, "loss": 0.0013, "step": 55740 }, { "epoch": 0.9421765545913151, "grad_norm": 0.040803078562021255, "learning_rate": 6.35991851892803e-06, "loss": 0.0015, "step": 55750 }, { "epoch": 0.9423455548701656, "grad_norm": 0.15711310505867004, "learning_rate": 6.358499228918617e-06, "loss": 0.0015, "step": 55760 }, { "epoch": 0.9425145551490159, "grad_norm": 0.011676276102662086, "learning_rate": 6.357079820713188e-06, "loss": 0.0014, "step": 55770 }, { "epoch": 0.9426835554278664, "grad_norm": 0.144158735871315, "learning_rate": 6.355660294435242e-06, "loss": 0.0017, "step": 55780 }, { "epoch": 0.9428525557067169, "grad_norm": 0.020729806274175644, "learning_rate": 6.35424065020828e-06, "loss": 0.0009, "step": 55790 }, { "epoch": 0.9430215559855674, "grad_norm": 0.04956257343292236, "learning_rate": 6.352820888155821e-06, "loss": 0.0013, "step": 55800 }, { "epoch": 0.9431905562644178, "grad_norm": 0.13145987689495087, "learning_rate": 6.3514010084013896e-06, "loss": 0.0024, "step": 55810 }, { "epoch": 0.9433595565432683, "grad_norm": 0.09588636457920074, "learning_rate": 6.3499810110685224e-06, "loss": 0.0012, "step": 55820 }, { "epoch": 0.9435285568221188, "grad_norm": 0.09883897006511688, "learning_rate": 6.348560896280767e-06, "loss": 0.001, "step": 55830 }, { "epoch": 0.9436975571009693, "grad_norm": 0.027389192953705788, "learning_rate": 6.34714066416168e-06, "loss": 0.001, "step": 55840 }, { "epoch": 0.9438665573798197, "grad_norm": 0.058483511209487915, "learning_rate": 6.345720314834828e-06, "loss": 0.0008, "step": 55850 }, { "epoch": 0.9440355576586701, "grad_norm": 0.025423582643270493, "learning_rate": 6.344299848423788e-06, "loss": 0.0013, "step": 55860 }, { "epoch": 0.9442045579375206, "grad_norm": 0.1596396118402481, "learning_rate": 6.342879265052149e-06, "loss": 0.0023, "step": 55870 }, { "epoch": 0.944373558216371, "grad_norm": 0.10541984438896179, "learning_rate": 6.341458564843507e-06, "loss": 0.0009, "step": 55880 }, { "epoch": 0.9445425584952215, "grad_norm": 0.060642823576927185, "learning_rate": 6.34003774792147e-06, "loss": 0.0012, "step": 55890 }, { "epoch": 0.944711558774072, "grad_norm": 0.08629707247018814, "learning_rate": 6.3386168144096564e-06, "loss": 0.0019, "step": 55900 }, { "epoch": 0.9448805590529225, "grad_norm": 0.039498019963502884, "learning_rate": 6.337195764431694e-06, "loss": 0.0011, "step": 55910 }, { "epoch": 0.9450495593317729, "grad_norm": 0.06169212982058525, "learning_rate": 6.335774598111222e-06, "loss": 0.0013, "step": 55920 }, { "epoch": 0.9452185596106234, "grad_norm": 0.14251552522182465, "learning_rate": 6.334353315571887e-06, "loss": 0.0023, "step": 55930 }, { "epoch": 0.9453875598894739, "grad_norm": 0.04333026334643364, "learning_rate": 6.33293191693735e-06, "loss": 0.0014, "step": 55940 }, { "epoch": 0.9455565601683242, "grad_norm": 0.032190270721912384, "learning_rate": 6.331510402331276e-06, "loss": 0.001, "step": 55950 }, { "epoch": 0.9457255604471747, "grad_norm": 0.08754512667655945, "learning_rate": 6.330088771877347e-06, "loss": 0.0015, "step": 55960 }, { "epoch": 0.9458945607260252, "grad_norm": 0.0429929755628109, "learning_rate": 6.32866702569925e-06, "loss": 0.0013, "step": 55970 }, { "epoch": 0.9460635610048757, "grad_norm": 0.11732529103755951, "learning_rate": 6.327245163920685e-06, "loss": 0.0012, "step": 55980 }, { "epoch": 0.9462325612837261, "grad_norm": 0.03252869099378586, "learning_rate": 6.325823186665358e-06, "loss": 0.0013, "step": 55990 }, { "epoch": 0.9464015615625766, "grad_norm": 0.025627510622143745, "learning_rate": 6.324401094056991e-06, "loss": 0.0013, "step": 56000 }, { "epoch": 0.9465705618414271, "grad_norm": 0.10628566890954971, "learning_rate": 6.322978886219313e-06, "loss": 0.0016, "step": 56010 }, { "epoch": 0.9467395621202775, "grad_norm": 0.07275257259607315, "learning_rate": 6.32155656327606e-06, "loss": 0.0009, "step": 56020 }, { "epoch": 0.9469085623991279, "grad_norm": 0.047055941075086594, "learning_rate": 6.320134125350984e-06, "loss": 0.001, "step": 56030 }, { "epoch": 0.9470775626779784, "grad_norm": 0.04651493579149246, "learning_rate": 6.3187115725678435e-06, "loss": 0.001, "step": 56040 }, { "epoch": 0.9472465629568289, "grad_norm": 0.10704516619443893, "learning_rate": 6.3172889050504065e-06, "loss": 0.0018, "step": 56050 }, { "epoch": 0.9474155632356793, "grad_norm": 0.04490116983652115, "learning_rate": 6.3158661229224524e-06, "loss": 0.0021, "step": 56060 }, { "epoch": 0.9475845635145298, "grad_norm": 0.07242177426815033, "learning_rate": 6.31444322630777e-06, "loss": 0.0017, "step": 56070 }, { "epoch": 0.9477535637933803, "grad_norm": 0.03556281700730324, "learning_rate": 6.313020215330159e-06, "loss": 0.0014, "step": 56080 }, { "epoch": 0.9479225640722307, "grad_norm": 0.047583408653736115, "learning_rate": 6.311597090113426e-06, "loss": 0.0012, "step": 56090 }, { "epoch": 0.9480915643510812, "grad_norm": 0.02261732518672943, "learning_rate": 6.310173850781391e-06, "loss": 0.0016, "step": 56100 }, { "epoch": 0.9482605646299317, "grad_norm": 0.0035633507650345564, "learning_rate": 6.308750497457885e-06, "loss": 0.0018, "step": 56110 }, { "epoch": 0.948429564908782, "grad_norm": 0.028753597289323807, "learning_rate": 6.307327030266743e-06, "loss": 0.0013, "step": 56120 }, { "epoch": 0.9485985651876325, "grad_norm": 0.014429560862481594, "learning_rate": 6.305903449331817e-06, "loss": 0.0017, "step": 56130 }, { "epoch": 0.948767565466483, "grad_norm": 0.05367155745625496, "learning_rate": 6.304479754776962e-06, "loss": 0.001, "step": 56140 }, { "epoch": 0.9489365657453335, "grad_norm": 0.0019822013564407825, "learning_rate": 6.303055946726049e-06, "loss": 0.0014, "step": 56150 }, { "epoch": 0.9491055660241839, "grad_norm": 0.14954140782356262, "learning_rate": 6.301632025302955e-06, "loss": 0.0026, "step": 56160 }, { "epoch": 0.9492745663030344, "grad_norm": 0.06319954991340637, "learning_rate": 6.300207990631568e-06, "loss": 0.0014, "step": 56170 }, { "epoch": 0.9494435665818849, "grad_norm": 0.016481533646583557, "learning_rate": 6.298783842835787e-06, "loss": 0.0016, "step": 56180 }, { "epoch": 0.9496125668607354, "grad_norm": 0.06972762942314148, "learning_rate": 6.297359582039518e-06, "loss": 0.0015, "step": 56190 }, { "epoch": 0.9497815671395857, "grad_norm": 0.03541379049420357, "learning_rate": 6.295935208366679e-06, "loss": 0.0019, "step": 56200 }, { "epoch": 0.9499505674184362, "grad_norm": 0.08550960570573807, "learning_rate": 6.294510721941198e-06, "loss": 0.001, "step": 56210 }, { "epoch": 0.9501195676972867, "grad_norm": 0.08662448078393936, "learning_rate": 6.293086122887013e-06, "loss": 0.0031, "step": 56220 }, { "epoch": 0.9502885679761371, "grad_norm": 0.041542794555425644, "learning_rate": 6.29166141132807e-06, "loss": 0.0018, "step": 56230 }, { "epoch": 0.9504575682549876, "grad_norm": 0.06773274391889572, "learning_rate": 6.2902365873883275e-06, "loss": 0.0017, "step": 56240 }, { "epoch": 0.9506265685338381, "grad_norm": 0.08712329715490341, "learning_rate": 6.288811651191749e-06, "loss": 0.0013, "step": 56250 }, { "epoch": 0.9507955688126886, "grad_norm": 0.10475753247737885, "learning_rate": 6.287386602862312e-06, "loss": 0.0014, "step": 56260 }, { "epoch": 0.950964569091539, "grad_norm": 0.007063519209623337, "learning_rate": 6.285961442524003e-06, "loss": 0.0011, "step": 56270 }, { "epoch": 0.9511335693703895, "grad_norm": 0.06975963711738586, "learning_rate": 6.284536170300818e-06, "loss": 0.0013, "step": 56280 }, { "epoch": 0.9513025696492399, "grad_norm": 0.02428462915122509, "learning_rate": 6.283110786316763e-06, "loss": 0.0013, "step": 56290 }, { "epoch": 0.9514715699280903, "grad_norm": 0.020005803555250168, "learning_rate": 6.281685290695851e-06, "loss": 0.0018, "step": 56300 }, { "epoch": 0.9516405702069408, "grad_norm": 0.0080246077850461, "learning_rate": 6.280259683562111e-06, "loss": 0.0015, "step": 56310 }, { "epoch": 0.9518095704857913, "grad_norm": 0.19228099286556244, "learning_rate": 6.2788339650395744e-06, "loss": 0.0015, "step": 56320 }, { "epoch": 0.9519785707646418, "grad_norm": 0.06089368835091591, "learning_rate": 6.277408135252288e-06, "loss": 0.0014, "step": 56330 }, { "epoch": 0.9521475710434922, "grad_norm": 0.10971179604530334, "learning_rate": 6.275982194324304e-06, "loss": 0.001, "step": 56340 }, { "epoch": 0.9523165713223427, "grad_norm": 0.11247263848781586, "learning_rate": 6.274556142379686e-06, "loss": 0.0015, "step": 56350 }, { "epoch": 0.9524855716011932, "grad_norm": 0.01730550080537796, "learning_rate": 6.273129979542509e-06, "loss": 0.0008, "step": 56360 }, { "epoch": 0.9526545718800437, "grad_norm": 0.016524197533726692, "learning_rate": 6.2717037059368555e-06, "loss": 0.001, "step": 56370 }, { "epoch": 0.952823572158894, "grad_norm": 0.10556582361459732, "learning_rate": 6.2702773216868185e-06, "loss": 0.0022, "step": 56380 }, { "epoch": 0.9529925724377445, "grad_norm": 0.019711600616574287, "learning_rate": 6.268850826916497e-06, "loss": 0.0021, "step": 56390 }, { "epoch": 0.953161572716595, "grad_norm": 0.009352779015898705, "learning_rate": 6.2674242217500096e-06, "loss": 0.0007, "step": 56400 }, { "epoch": 0.9533305729954454, "grad_norm": 0.07730622589588165, "learning_rate": 6.265997506311472e-06, "loss": 0.0014, "step": 56410 }, { "epoch": 0.9534995732742959, "grad_norm": 0.027490902692079544, "learning_rate": 6.2645706807250175e-06, "loss": 0.0012, "step": 56420 }, { "epoch": 0.9536685735531464, "grad_norm": 0.09496179968118668, "learning_rate": 6.263143745114788e-06, "loss": 0.003, "step": 56430 }, { "epoch": 0.9538375738319969, "grad_norm": 0.1781720072031021, "learning_rate": 6.261716699604932e-06, "loss": 0.0042, "step": 56440 }, { "epoch": 0.9540065741108473, "grad_norm": 0.039281900972127914, "learning_rate": 6.26028954431961e-06, "loss": 0.0012, "step": 56450 }, { "epoch": 0.9541755743896977, "grad_norm": 0.030199745669960976, "learning_rate": 6.2588622793829914e-06, "loss": 0.0016, "step": 56460 }, { "epoch": 0.9543445746685482, "grad_norm": 0.07956171780824661, "learning_rate": 6.257434904919255e-06, "loss": 0.0011, "step": 56470 }, { "epoch": 0.9545135749473986, "grad_norm": 0.05297559127211571, "learning_rate": 6.256007421052588e-06, "loss": 0.0011, "step": 56480 }, { "epoch": 0.9546825752262491, "grad_norm": 0.024796657264232635, "learning_rate": 6.254579827907191e-06, "loss": 0.0009, "step": 56490 }, { "epoch": 0.9548515755050996, "grad_norm": 0.08131872117519379, "learning_rate": 6.253152125607271e-06, "loss": 0.0026, "step": 56500 }, { "epoch": 0.95502057578395, "grad_norm": 0.058359820395708084, "learning_rate": 6.251724314277045e-06, "loss": 0.0013, "step": 56510 }, { "epoch": 0.9551895760628005, "grad_norm": 0.028362829238176346, "learning_rate": 6.2502963940407376e-06, "loss": 0.0016, "step": 56520 }, { "epoch": 0.955358576341651, "grad_norm": 0.010488352738320827, "learning_rate": 6.248868365022586e-06, "loss": 0.0014, "step": 56530 }, { "epoch": 0.9555275766205015, "grad_norm": 0.07383910566568375, "learning_rate": 6.247440227346836e-06, "loss": 0.0018, "step": 56540 }, { "epoch": 0.9556965768993518, "grad_norm": 0.0049832952208817005, "learning_rate": 6.246011981137743e-06, "loss": 0.0021, "step": 56550 }, { "epoch": 0.9558655771782023, "grad_norm": 0.14020919799804688, "learning_rate": 6.244583626519569e-06, "loss": 0.0026, "step": 56560 }, { "epoch": 0.9560345774570528, "grad_norm": 0.17698492109775543, "learning_rate": 6.243155163616591e-06, "loss": 0.0035, "step": 56570 }, { "epoch": 0.9562035777359033, "grad_norm": 0.09493689239025116, "learning_rate": 6.241726592553089e-06, "loss": 0.0017, "step": 56580 }, { "epoch": 0.9563725780147537, "grad_norm": 0.052152473479509354, "learning_rate": 6.240297913453358e-06, "loss": 0.0014, "step": 56590 }, { "epoch": 0.9565415782936042, "grad_norm": 0.13550731539726257, "learning_rate": 6.2388691264417e-06, "loss": 0.0009, "step": 56600 }, { "epoch": 0.9567105785724547, "grad_norm": 0.15419816970825195, "learning_rate": 6.237440231642424e-06, "loss": 0.0012, "step": 56610 }, { "epoch": 0.9568795788513051, "grad_norm": 0.02464517019689083, "learning_rate": 6.236011229179854e-06, "loss": 0.0016, "step": 56620 }, { "epoch": 0.9570485791301556, "grad_norm": 0.010589303448796272, "learning_rate": 6.234582119178317e-06, "loss": 0.0011, "step": 56630 }, { "epoch": 0.957217579409006, "grad_norm": 0.05937943980097771, "learning_rate": 6.233152901762155e-06, "loss": 0.0024, "step": 56640 }, { "epoch": 0.9573865796878565, "grad_norm": 0.12208323180675507, "learning_rate": 6.231723577055715e-06, "loss": 0.0011, "step": 56650 }, { "epoch": 0.9575555799667069, "grad_norm": 0.03957764059305191, "learning_rate": 6.230294145183357e-06, "loss": 0.0019, "step": 56660 }, { "epoch": 0.9577245802455574, "grad_norm": 0.07161909341812134, "learning_rate": 6.228864606269446e-06, "loss": 0.0032, "step": 56670 }, { "epoch": 0.9578935805244079, "grad_norm": 0.08066277951002121, "learning_rate": 6.227434960438361e-06, "loss": 0.0021, "step": 56680 }, { "epoch": 0.9580625808032583, "grad_norm": 0.01578165404498577, "learning_rate": 6.2260052078144875e-06, "loss": 0.0014, "step": 56690 }, { "epoch": 0.9582315810821088, "grad_norm": 0.05495981127023697, "learning_rate": 6.224575348522221e-06, "loss": 0.0014, "step": 56700 }, { "epoch": 0.9584005813609593, "grad_norm": 0.21792486310005188, "learning_rate": 6.223145382685965e-06, "loss": 0.0007, "step": 56710 }, { "epoch": 0.9585695816398097, "grad_norm": 0.017716677859425545, "learning_rate": 6.221715310430135e-06, "loss": 0.0009, "step": 56720 }, { "epoch": 0.9587385819186601, "grad_norm": 0.042220745235681534, "learning_rate": 6.220285131879153e-06, "loss": 0.0006, "step": 56730 }, { "epoch": 0.9589075821975106, "grad_norm": 0.06167328357696533, "learning_rate": 6.218854847157454e-06, "loss": 0.0011, "step": 56740 }, { "epoch": 0.9590765824763611, "grad_norm": 0.04163149371743202, "learning_rate": 6.217424456389477e-06, "loss": 0.0012, "step": 56750 }, { "epoch": 0.9592455827552115, "grad_norm": 0.027633186429739, "learning_rate": 6.215993959699672e-06, "loss": 0.0007, "step": 56760 }, { "epoch": 0.959414583034062, "grad_norm": 0.04331083595752716, "learning_rate": 6.214563357212502e-06, "loss": 0.0011, "step": 56770 }, { "epoch": 0.9595835833129125, "grad_norm": 0.06631213426589966, "learning_rate": 6.213132649052435e-06, "loss": 0.0013, "step": 56780 }, { "epoch": 0.959752583591763, "grad_norm": 0.31079480051994324, "learning_rate": 6.21170183534395e-06, "loss": 0.0012, "step": 56790 }, { "epoch": 0.9599215838706134, "grad_norm": 0.2310328185558319, "learning_rate": 6.210270916211535e-06, "loss": 0.0038, "step": 56800 }, { "epoch": 0.9600905841494638, "grad_norm": 0.26346540451049805, "learning_rate": 6.208839891779685e-06, "loss": 0.0045, "step": 56810 }, { "epoch": 0.9602595844283143, "grad_norm": 0.06398440897464752, "learning_rate": 6.207408762172909e-06, "loss": 0.0063, "step": 56820 }, { "epoch": 0.9604285847071647, "grad_norm": 0.07587594538927078, "learning_rate": 6.205977527515721e-06, "loss": 0.0026, "step": 56830 }, { "epoch": 0.9605975849860152, "grad_norm": 0.003844884689897299, "learning_rate": 6.204546187932644e-06, "loss": 0.0014, "step": 56840 }, { "epoch": 0.9607665852648657, "grad_norm": 0.0372488796710968, "learning_rate": 6.203114743548213e-06, "loss": 0.0009, "step": 56850 }, { "epoch": 0.9609355855437162, "grad_norm": 0.00627544242888689, "learning_rate": 6.20168319448697e-06, "loss": 0.001, "step": 56860 }, { "epoch": 0.9611045858225666, "grad_norm": 0.05668949335813522, "learning_rate": 6.200251540873465e-06, "loss": 0.0014, "step": 56870 }, { "epoch": 0.9612735861014171, "grad_norm": 0.008376243524253368, "learning_rate": 6.198819782832263e-06, "loss": 0.0024, "step": 56880 }, { "epoch": 0.9614425863802675, "grad_norm": 0.013443087227642536, "learning_rate": 6.19738792048793e-06, "loss": 0.0013, "step": 56890 }, { "epoch": 0.961611586659118, "grad_norm": 0.034180834889411926, "learning_rate": 6.195955953965049e-06, "loss": 0.0018, "step": 56900 }, { "epoch": 0.9617805869379684, "grad_norm": 0.05446441471576691, "learning_rate": 6.194523883388203e-06, "loss": 0.0012, "step": 56910 }, { "epoch": 0.9619495872168189, "grad_norm": 0.11061554402112961, "learning_rate": 6.193091708881992e-06, "loss": 0.0006, "step": 56920 }, { "epoch": 0.9621185874956694, "grad_norm": 0.0899341031908989, "learning_rate": 6.191659430571022e-06, "loss": 0.0008, "step": 56930 }, { "epoch": 0.9622875877745198, "grad_norm": 0.011873092502355576, "learning_rate": 6.190227048579908e-06, "loss": 0.0006, "step": 56940 }, { "epoch": 0.9624565880533703, "grad_norm": 0.028710903599858284, "learning_rate": 6.188794563033274e-06, "loss": 0.001, "step": 56950 }, { "epoch": 0.9626255883322208, "grad_norm": 0.008559263311326504, "learning_rate": 6.1873619740557515e-06, "loss": 0.0014, "step": 56960 }, { "epoch": 0.9627945886110713, "grad_norm": 0.07611192762851715, "learning_rate": 6.185929281771985e-06, "loss": 0.001, "step": 56970 }, { "epoch": 0.9629635888899216, "grad_norm": 0.07905812561511993, "learning_rate": 6.184496486306626e-06, "loss": 0.0015, "step": 56980 }, { "epoch": 0.9631325891687721, "grad_norm": 0.03562645614147186, "learning_rate": 6.1830635877843325e-06, "loss": 0.0008, "step": 56990 }, { "epoch": 0.9633015894476226, "grad_norm": 0.03875849395990372, "learning_rate": 6.181630586329776e-06, "loss": 0.0015, "step": 57000 }, { "epoch": 0.963470589726473, "grad_norm": 0.009532700292766094, "learning_rate": 6.1801974820676336e-06, "loss": 0.0018, "step": 57010 }, { "epoch": 0.9636395900053235, "grad_norm": 0.11676210910081863, "learning_rate": 6.178764275122592e-06, "loss": 0.0017, "step": 57020 }, { "epoch": 0.963808590284174, "grad_norm": 0.07378076761960983, "learning_rate": 6.177330965619346e-06, "loss": 0.0022, "step": 57030 }, { "epoch": 0.9639775905630245, "grad_norm": 0.049584876745939255, "learning_rate": 6.175897553682601e-06, "loss": 0.0033, "step": 57040 }, { "epoch": 0.9641465908418749, "grad_norm": 0.07187934964895248, "learning_rate": 6.174464039437074e-06, "loss": 0.0009, "step": 57050 }, { "epoch": 0.9643155911207254, "grad_norm": 0.01915927045047283, "learning_rate": 6.1730304230074835e-06, "loss": 0.0016, "step": 57060 }, { "epoch": 0.9644845913995758, "grad_norm": 0.07050523906946182, "learning_rate": 6.171596704518562e-06, "loss": 0.0016, "step": 57070 }, { "epoch": 0.9646535916784262, "grad_norm": 0.02501530572772026, "learning_rate": 6.170162884095053e-06, "loss": 0.0018, "step": 57080 }, { "epoch": 0.9648225919572767, "grad_norm": 0.1584603190422058, "learning_rate": 6.168728961861702e-06, "loss": 0.0013, "step": 57090 }, { "epoch": 0.9649915922361272, "grad_norm": 0.011680858209729195, "learning_rate": 6.1672949379432686e-06, "loss": 0.0017, "step": 57100 }, { "epoch": 0.9651605925149777, "grad_norm": 0.13482710719108582, "learning_rate": 6.165860812464522e-06, "loss": 0.0028, "step": 57110 }, { "epoch": 0.9653295927938281, "grad_norm": 0.013348933309316635, "learning_rate": 6.164426585550234e-06, "loss": 0.0016, "step": 57120 }, { "epoch": 0.9654985930726786, "grad_norm": 0.06926348060369492, "learning_rate": 6.162992257325191e-06, "loss": 0.0018, "step": 57130 }, { "epoch": 0.9656675933515291, "grad_norm": 0.017068268731236458, "learning_rate": 6.1615578279141874e-06, "loss": 0.0016, "step": 57140 }, { "epoch": 0.9658365936303794, "grad_norm": 0.04602353647351265, "learning_rate": 6.160123297442025e-06, "loss": 0.0016, "step": 57150 }, { "epoch": 0.9660055939092299, "grad_norm": 0.0930069163441658, "learning_rate": 6.158688666033515e-06, "loss": 0.0019, "step": 57160 }, { "epoch": 0.9661745941880804, "grad_norm": 0.11917515099048615, "learning_rate": 6.157253933813476e-06, "loss": 0.0021, "step": 57170 }, { "epoch": 0.9663435944669309, "grad_norm": 0.007319287862628698, "learning_rate": 6.155819100906739e-06, "loss": 0.0007, "step": 57180 }, { "epoch": 0.9665125947457813, "grad_norm": 0.08814607560634613, "learning_rate": 6.15438416743814e-06, "loss": 0.001, "step": 57190 }, { "epoch": 0.9666815950246318, "grad_norm": 0.09839361160993576, "learning_rate": 6.152949133532526e-06, "loss": 0.0015, "step": 57200 }, { "epoch": 0.9668505953034823, "grad_norm": 0.03802715986967087, "learning_rate": 6.151513999314749e-06, "loss": 0.0011, "step": 57210 }, { "epoch": 0.9670195955823327, "grad_norm": 0.129659041762352, "learning_rate": 6.150078764909676e-06, "loss": 0.0011, "step": 57220 }, { "epoch": 0.9671885958611832, "grad_norm": 0.04857420548796654, "learning_rate": 6.148643430442179e-06, "loss": 0.0015, "step": 57230 }, { "epoch": 0.9673575961400336, "grad_norm": 0.028127888217568398, "learning_rate": 6.1472079960371364e-06, "loss": 0.0011, "step": 57240 }, { "epoch": 0.967526596418884, "grad_norm": 0.08872781693935394, "learning_rate": 6.145772461819441e-06, "loss": 0.0028, "step": 57250 }, { "epoch": 0.9676955966977345, "grad_norm": 0.02356112003326416, "learning_rate": 6.1443368279139905e-06, "loss": 0.0014, "step": 57260 }, { "epoch": 0.967864596976585, "grad_norm": 0.05805081129074097, "learning_rate": 6.142901094445691e-06, "loss": 0.0009, "step": 57270 }, { "epoch": 0.9680335972554355, "grad_norm": 0.06666218489408493, "learning_rate": 6.141465261539459e-06, "loss": 0.0022, "step": 57280 }, { "epoch": 0.968202597534286, "grad_norm": 0.09788259863853455, "learning_rate": 6.140029329320217e-06, "loss": 0.0014, "step": 57290 }, { "epoch": 0.9683715978131364, "grad_norm": 0.05695125833153725, "learning_rate": 6.138593297912901e-06, "loss": 0.0008, "step": 57300 }, { "epoch": 0.9685405980919869, "grad_norm": 0.03995615988969803, "learning_rate": 6.137157167442452e-06, "loss": 0.0014, "step": 57310 }, { "epoch": 0.9687095983708374, "grad_norm": 0.09380437433719635, "learning_rate": 6.13572093803382e-06, "loss": 0.001, "step": 57320 }, { "epoch": 0.9688785986496877, "grad_norm": 0.1397034078836441, "learning_rate": 6.1342846098119635e-06, "loss": 0.0013, "step": 57330 }, { "epoch": 0.9690475989285382, "grad_norm": 0.10551384091377258, "learning_rate": 6.132848182901851e-06, "loss": 0.0013, "step": 57340 }, { "epoch": 0.9692165992073887, "grad_norm": 0.07721326500177383, "learning_rate": 6.131411657428454e-06, "loss": 0.0026, "step": 57350 }, { "epoch": 0.9693855994862391, "grad_norm": 0.0641603097319603, "learning_rate": 6.129975033516765e-06, "loss": 0.0014, "step": 57360 }, { "epoch": 0.9695545997650896, "grad_norm": 0.0963938981294632, "learning_rate": 6.128538311291772e-06, "loss": 0.0015, "step": 57370 }, { "epoch": 0.9697236000439401, "grad_norm": 0.009676202200353146, "learning_rate": 6.127101490878478e-06, "loss": 0.0019, "step": 57380 }, { "epoch": 0.9698926003227906, "grad_norm": 0.07981002330780029, "learning_rate": 6.125664572401894e-06, "loss": 0.0014, "step": 57390 }, { "epoch": 0.970061600601641, "grad_norm": 0.2350415140390396, "learning_rate": 6.124227555987037e-06, "loss": 0.0027, "step": 57400 }, { "epoch": 0.9702306008804914, "grad_norm": 0.040543004870414734, "learning_rate": 6.122790441758937e-06, "loss": 0.0009, "step": 57410 }, { "epoch": 0.9703996011593419, "grad_norm": 0.03330293297767639, "learning_rate": 6.121353229842627e-06, "loss": 0.0012, "step": 57420 }, { "epoch": 0.9705686014381923, "grad_norm": 0.07130574434995651, "learning_rate": 6.119915920363154e-06, "loss": 0.0015, "step": 57430 }, { "epoch": 0.9707376017170428, "grad_norm": 0.03864027187228203, "learning_rate": 6.118478513445568e-06, "loss": 0.0016, "step": 57440 }, { "epoch": 0.9709066019958933, "grad_norm": 0.09162461757659912, "learning_rate": 6.117041009214935e-06, "loss": 0.0025, "step": 57450 }, { "epoch": 0.9710756022747438, "grad_norm": 0.0017001412343233824, "learning_rate": 6.11560340779632e-06, "loss": 0.0015, "step": 57460 }, { "epoch": 0.9712446025535942, "grad_norm": 0.19290043413639069, "learning_rate": 6.114165709314805e-06, "loss": 0.0014, "step": 57470 }, { "epoch": 0.9714136028324447, "grad_norm": 0.02218267321586609, "learning_rate": 6.112727913895473e-06, "loss": 0.0015, "step": 57480 }, { "epoch": 0.9715826031112952, "grad_norm": 0.27840378880500793, "learning_rate": 6.111290021663423e-06, "loss": 0.0024, "step": 57490 }, { "epoch": 0.9717516033901455, "grad_norm": 0.11960061639547348, "learning_rate": 6.109852032743756e-06, "loss": 0.0017, "step": 57500 }, { "epoch": 0.971920603668996, "grad_norm": 0.22218191623687744, "learning_rate": 6.108413947261585e-06, "loss": 0.0023, "step": 57510 }, { "epoch": 0.9720896039478465, "grad_norm": 0.062080252915620804, "learning_rate": 6.10697576534203e-06, "loss": 0.002, "step": 57520 }, { "epoch": 0.972258604226697, "grad_norm": 0.03579581528902054, "learning_rate": 6.105537487110219e-06, "loss": 0.0019, "step": 57530 }, { "epoch": 0.9724276045055474, "grad_norm": 0.05509597808122635, "learning_rate": 6.1040991126912906e-06, "loss": 0.0017, "step": 57540 }, { "epoch": 0.9725966047843979, "grad_norm": 0.08803009986877441, "learning_rate": 6.10266064221039e-06, "loss": 0.002, "step": 57550 }, { "epoch": 0.9727656050632484, "grad_norm": 0.05634976923465729, "learning_rate": 6.101222075792671e-06, "loss": 0.0011, "step": 57560 }, { "epoch": 0.9729346053420989, "grad_norm": 0.04451654478907585, "learning_rate": 6.0997834135632976e-06, "loss": 0.0017, "step": 57570 }, { "epoch": 0.9731036056209492, "grad_norm": 0.2588036358356476, "learning_rate": 6.098344655647437e-06, "loss": 0.0029, "step": 57580 }, { "epoch": 0.9732726058997997, "grad_norm": 0.035599395632743835, "learning_rate": 6.09690580217027e-06, "loss": 0.001, "step": 57590 }, { "epoch": 0.9734416061786502, "grad_norm": 0.015586217865347862, "learning_rate": 6.095466853256984e-06, "loss": 0.001, "step": 57600 }, { "epoch": 0.9736106064575006, "grad_norm": 0.05883041396737099, "learning_rate": 6.094027809032774e-06, "loss": 0.0016, "step": 57610 }, { "epoch": 0.9737796067363511, "grad_norm": 0.10953272879123688, "learning_rate": 6.092588669622843e-06, "loss": 0.0023, "step": 57620 }, { "epoch": 0.9739486070152016, "grad_norm": 0.4857546389102936, "learning_rate": 6.091149435152406e-06, "loss": 0.0084, "step": 57630 }, { "epoch": 0.9741176072940521, "grad_norm": 0.018154164776206017, "learning_rate": 6.089710105746679e-06, "loss": 0.0015, "step": 57640 }, { "epoch": 0.9742866075729025, "grad_norm": 0.09440393000841141, "learning_rate": 6.0882706815308945e-06, "loss": 0.0011, "step": 57650 }, { "epoch": 0.974455607851753, "grad_norm": 0.032431937754154205, "learning_rate": 6.086831162630287e-06, "loss": 0.001, "step": 57660 }, { "epoch": 0.9746246081306034, "grad_norm": 0.08695471286773682, "learning_rate": 6.085391549170103e-06, "loss": 0.0028, "step": 57670 }, { "epoch": 0.9747936084094538, "grad_norm": 0.0596468411386013, "learning_rate": 6.083951841275596e-06, "loss": 0.0015, "step": 57680 }, { "epoch": 0.9749626086883043, "grad_norm": 0.017689548432826996, "learning_rate": 6.082512039072027e-06, "loss": 0.0013, "step": 57690 }, { "epoch": 0.9751316089671548, "grad_norm": 0.054335687309503555, "learning_rate": 6.081072142684665e-06, "loss": 0.0012, "step": 57700 }, { "epoch": 0.9753006092460053, "grad_norm": 0.10406859964132309, "learning_rate": 6.079632152238789e-06, "loss": 0.0022, "step": 57710 }, { "epoch": 0.9754696095248557, "grad_norm": 0.12383397668600082, "learning_rate": 6.078192067859684e-06, "loss": 0.0008, "step": 57720 }, { "epoch": 0.9756386098037062, "grad_norm": 0.04708423092961311, "learning_rate": 6.076751889672645e-06, "loss": 0.0034, "step": 57730 }, { "epoch": 0.9758076100825567, "grad_norm": 0.06755197048187256, "learning_rate": 6.0753116178029756e-06, "loss": 0.002, "step": 57740 }, { "epoch": 0.9759766103614071, "grad_norm": 0.08884324133396149, "learning_rate": 6.073871252375986e-06, "loss": 0.0007, "step": 57750 }, { "epoch": 0.9761456106402575, "grad_norm": 0.001903891796246171, "learning_rate": 6.072430793516993e-06, "loss": 0.0016, "step": 57760 }, { "epoch": 0.976314610919108, "grad_norm": 0.09241598099470139, "learning_rate": 6.070990241351327e-06, "loss": 0.0014, "step": 57770 }, { "epoch": 0.9764836111979585, "grad_norm": 0.09973130375146866, "learning_rate": 6.069549596004319e-06, "loss": 0.0018, "step": 57780 }, { "epoch": 0.9766526114768089, "grad_norm": 0.02484247088432312, "learning_rate": 6.0681088576013144e-06, "loss": 0.002, "step": 57790 }, { "epoch": 0.9768216117556594, "grad_norm": 0.031424183398485184, "learning_rate": 6.066668026267664e-06, "loss": 0.0014, "step": 57800 }, { "epoch": 0.9769906120345099, "grad_norm": 0.0393252931535244, "learning_rate": 6.065227102128728e-06, "loss": 0.0008, "step": 57810 }, { "epoch": 0.9771596123133603, "grad_norm": 0.0469253845512867, "learning_rate": 6.0637860853098714e-06, "loss": 0.0009, "step": 57820 }, { "epoch": 0.9773286125922108, "grad_norm": 0.10950836539268494, "learning_rate": 6.062344975936472e-06, "loss": 0.0042, "step": 57830 }, { "epoch": 0.9774976128710612, "grad_norm": 0.042595747858285904, "learning_rate": 6.060903774133913e-06, "loss": 0.0011, "step": 57840 }, { "epoch": 0.9776666131499117, "grad_norm": 0.038873203098773956, "learning_rate": 6.059462480027585e-06, "loss": 0.0017, "step": 57850 }, { "epoch": 0.9778356134287621, "grad_norm": 0.02171887271106243, "learning_rate": 6.058021093742888e-06, "loss": 0.0016, "step": 57860 }, { "epoch": 0.9780046137076126, "grad_norm": 0.05608280748128891, "learning_rate": 6.05657961540523e-06, "loss": 0.0014, "step": 57870 }, { "epoch": 0.9781736139864631, "grad_norm": 0.03701461851596832, "learning_rate": 6.055138045140027e-06, "loss": 0.0008, "step": 57880 }, { "epoch": 0.9783426142653135, "grad_norm": 0.012678167782723904, "learning_rate": 6.0536963830727e-06, "loss": 0.0048, "step": 57890 }, { "epoch": 0.978511614544164, "grad_norm": 0.006651062052696943, "learning_rate": 6.0522546293286845e-06, "loss": 0.0016, "step": 57900 }, { "epoch": 0.9786806148230145, "grad_norm": 0.08401114493608475, "learning_rate": 6.050812784033417e-06, "loss": 0.001, "step": 57910 }, { "epoch": 0.978849615101865, "grad_norm": 0.02398318611085415, "learning_rate": 6.049370847312345e-06, "loss": 0.0015, "step": 57920 }, { "epoch": 0.9790186153807153, "grad_norm": 0.05877359211444855, "learning_rate": 6.047928819290925e-06, "loss": 0.0019, "step": 57930 }, { "epoch": 0.9791876156595658, "grad_norm": 0.037036243826150894, "learning_rate": 6.046486700094621e-06, "loss": 0.0019, "step": 57940 }, { "epoch": 0.9793566159384163, "grad_norm": 0.1200292557477951, "learning_rate": 6.045044489848904e-06, "loss": 0.0013, "step": 57950 }, { "epoch": 0.9795256162172667, "grad_norm": 0.004075548145920038, "learning_rate": 6.043602188679253e-06, "loss": 0.0011, "step": 57960 }, { "epoch": 0.9796946164961172, "grad_norm": 0.00037315511144697666, "learning_rate": 6.042159796711156e-06, "loss": 0.0014, "step": 57970 }, { "epoch": 0.9798636167749677, "grad_norm": 0.05303901433944702, "learning_rate": 6.040717314070106e-06, "loss": 0.0027, "step": 57980 }, { "epoch": 0.9800326170538182, "grad_norm": 0.19602589309215546, "learning_rate": 6.039274740881607e-06, "loss": 0.0019, "step": 57990 }, { "epoch": 0.9802016173326686, "grad_norm": 0.028596512973308563, "learning_rate": 6.037832077271172e-06, "loss": 0.0013, "step": 58000 }, { "epoch": 0.9803706176115191, "grad_norm": 0.20253531634807587, "learning_rate": 6.0363893233643155e-06, "loss": 0.0016, "step": 58010 }, { "epoch": 0.9805396178903695, "grad_norm": 0.061884328722953796, "learning_rate": 6.0349464792865675e-06, "loss": 0.0009, "step": 58020 }, { "epoch": 0.98070861816922, "grad_norm": 0.05780744552612305, "learning_rate": 6.033503545163462e-06, "loss": 0.002, "step": 58030 }, { "epoch": 0.9808776184480704, "grad_norm": 0.028961317613720894, "learning_rate": 6.03206052112054e-06, "loss": 0.0019, "step": 58040 }, { "epoch": 0.9810466187269209, "grad_norm": 0.07479706406593323, "learning_rate": 6.0306174072833525e-06, "loss": 0.0013, "step": 58050 }, { "epoch": 0.9812156190057714, "grad_norm": 0.004724375903606415, "learning_rate": 6.0291742037774555e-06, "loss": 0.0022, "step": 58060 }, { "epoch": 0.9813846192846218, "grad_norm": 0.022922640666365623, "learning_rate": 6.0277309107284174e-06, "loss": 0.0011, "step": 58070 }, { "epoch": 0.9815536195634723, "grad_norm": 0.03456398844718933, "learning_rate": 6.026287528261812e-06, "loss": 0.0016, "step": 58080 }, { "epoch": 0.9817226198423228, "grad_norm": 0.07966052740812302, "learning_rate": 6.024844056503218e-06, "loss": 0.0016, "step": 58090 }, { "epoch": 0.9818916201211731, "grad_norm": 0.06790140271186829, "learning_rate": 6.023400495578226e-06, "loss": 0.0014, "step": 58100 }, { "epoch": 0.9820606204000236, "grad_norm": 0.06890040636062622, "learning_rate": 6.021956845612432e-06, "loss": 0.0008, "step": 58110 }, { "epoch": 0.9822296206788741, "grad_norm": 0.008626767434179783, "learning_rate": 6.02051310673144e-06, "loss": 0.0021, "step": 58120 }, { "epoch": 0.9823986209577246, "grad_norm": 0.07097417861223221, "learning_rate": 6.019069279060864e-06, "loss": 0.0015, "step": 58130 }, { "epoch": 0.982567621236575, "grad_norm": 0.03134439140558243, "learning_rate": 6.017625362726324e-06, "loss": 0.0018, "step": 58140 }, { "epoch": 0.9827366215154255, "grad_norm": 0.0705198273062706, "learning_rate": 6.016181357853447e-06, "loss": 0.0021, "step": 58150 }, { "epoch": 0.982905621794276, "grad_norm": 0.046758249402046204, "learning_rate": 6.014737264567867e-06, "loss": 0.002, "step": 58160 }, { "epoch": 0.9830746220731265, "grad_norm": 0.05306238308548927, "learning_rate": 6.01329308299523e-06, "loss": 0.0014, "step": 58170 }, { "epoch": 0.9832436223519769, "grad_norm": 0.009773504920303822, "learning_rate": 6.011848813261184e-06, "loss": 0.0021, "step": 58180 }, { "epoch": 0.9834126226308273, "grad_norm": 0.07215312123298645, "learning_rate": 6.01040445549139e-06, "loss": 0.0019, "step": 58190 }, { "epoch": 0.9835816229096778, "grad_norm": 0.107475645840168, "learning_rate": 6.008960009811512e-06, "loss": 0.0019, "step": 58200 }, { "epoch": 0.9837506231885282, "grad_norm": 0.009603841230273247, "learning_rate": 6.007515476347223e-06, "loss": 0.0018, "step": 58210 }, { "epoch": 0.9839196234673787, "grad_norm": 0.03349253535270691, "learning_rate": 6.006070855224209e-06, "loss": 0.0011, "step": 58220 }, { "epoch": 0.9840886237462292, "grad_norm": 0.17738290131092072, "learning_rate": 6.004626146568154e-06, "loss": 0.0022, "step": 58230 }, { "epoch": 0.9842576240250797, "grad_norm": 0.039686206728219986, "learning_rate": 6.003181350504758e-06, "loss": 0.0014, "step": 58240 }, { "epoch": 0.9844266243039301, "grad_norm": 0.03419140726327896, "learning_rate": 6.001736467159723e-06, "loss": 0.0016, "step": 58250 }, { "epoch": 0.9845956245827806, "grad_norm": 0.07928406447172165, "learning_rate": 6.000291496658763e-06, "loss": 0.0025, "step": 58260 }, { "epoch": 0.9847646248616311, "grad_norm": 0.22952072322368622, "learning_rate": 5.998846439127596e-06, "loss": 0.0023, "step": 58270 }, { "epoch": 0.9849336251404814, "grad_norm": 0.07492335885763168, "learning_rate": 5.997401294691949e-06, "loss": 0.0021, "step": 58280 }, { "epoch": 0.9851026254193319, "grad_norm": 0.09141848236322403, "learning_rate": 5.995956063477555e-06, "loss": 0.0062, "step": 58290 }, { "epoch": 0.9852716256981824, "grad_norm": 0.022302517667412758, "learning_rate": 5.9945107456101605e-06, "loss": 0.0012, "step": 58300 }, { "epoch": 0.9854406259770329, "grad_norm": 0.06388254463672638, "learning_rate": 5.99306534121551e-06, "loss": 0.0014, "step": 58310 }, { "epoch": 0.9856096262558833, "grad_norm": 0.02491978369653225, "learning_rate": 5.991619850419365e-06, "loss": 0.002, "step": 58320 }, { "epoch": 0.9857786265347338, "grad_norm": 0.12101317942142487, "learning_rate": 5.99017427334749e-06, "loss": 0.0016, "step": 58330 }, { "epoch": 0.9859476268135843, "grad_norm": 0.03233025595545769, "learning_rate": 5.988728610125653e-06, "loss": 0.001, "step": 58340 }, { "epoch": 0.9861166270924347, "grad_norm": 0.09366673231124878, "learning_rate": 5.987282860879638e-06, "loss": 0.0012, "step": 58350 }, { "epoch": 0.9862856273712851, "grad_norm": 0.02704479545354843, "learning_rate": 5.985837025735232e-06, "loss": 0.0018, "step": 58360 }, { "epoch": 0.9864546276501356, "grad_norm": 0.05152517184615135, "learning_rate": 5.984391104818226e-06, "loss": 0.0009, "step": 58370 }, { "epoch": 0.9866236279289861, "grad_norm": 0.0815383568406105, "learning_rate": 5.982945098254425e-06, "loss": 0.0025, "step": 58380 }, { "epoch": 0.9867926282078365, "grad_norm": 0.030310895293951035, "learning_rate": 5.981499006169637e-06, "loss": 0.0008, "step": 58390 }, { "epoch": 0.986961628486687, "grad_norm": 0.054158393293619156, "learning_rate": 5.980052828689681e-06, "loss": 0.0013, "step": 58400 }, { "epoch": 0.9871306287655375, "grad_norm": 0.10451913625001907, "learning_rate": 5.97860656594038e-06, "loss": 0.0034, "step": 58410 }, { "epoch": 0.987299629044388, "grad_norm": 0.062465324997901917, "learning_rate": 5.977160218047567e-06, "loss": 0.0016, "step": 58420 }, { "epoch": 0.9874686293232384, "grad_norm": 0.01912902668118477, "learning_rate": 5.97571378513708e-06, "loss": 0.0011, "step": 58430 }, { "epoch": 0.9876376296020889, "grad_norm": 0.05353707820177078, "learning_rate": 5.974267267334768e-06, "loss": 0.0022, "step": 58440 }, { "epoch": 0.9878066298809393, "grad_norm": 0.16070276498794556, "learning_rate": 5.972820664766481e-06, "loss": 0.0017, "step": 58450 }, { "epoch": 0.9879756301597897, "grad_norm": 0.021296484395861626, "learning_rate": 5.971373977558084e-06, "loss": 0.0016, "step": 58460 }, { "epoch": 0.9881446304386402, "grad_norm": 0.1948113888502121, "learning_rate": 5.969927205835444e-06, "loss": 0.0023, "step": 58470 }, { "epoch": 0.9883136307174907, "grad_norm": 0.16366679966449738, "learning_rate": 5.968480349724438e-06, "loss": 0.0012, "step": 58480 }, { "epoch": 0.9884826309963411, "grad_norm": 0.022841233760118484, "learning_rate": 5.967033409350949e-06, "loss": 0.0013, "step": 58490 }, { "epoch": 0.9886516312751916, "grad_norm": 0.06846968084573746, "learning_rate": 5.965586384840868e-06, "loss": 0.0014, "step": 58500 }, { "epoch": 0.9888206315540421, "grad_norm": 0.10312211513519287, "learning_rate": 5.964139276320092e-06, "loss": 0.0031, "step": 58510 }, { "epoch": 0.9889896318328926, "grad_norm": 0.14728088676929474, "learning_rate": 5.9626920839145295e-06, "loss": 0.0025, "step": 58520 }, { "epoch": 0.9891586321117429, "grad_norm": 0.2092452347278595, "learning_rate": 5.96124480775009e-06, "loss": 0.0091, "step": 58530 }, { "epoch": 0.9893276323905934, "grad_norm": 0.0748872309923172, "learning_rate": 5.959797447952697e-06, "loss": 0.001, "step": 58540 }, { "epoch": 0.9894966326694439, "grad_norm": 0.015515661798417568, "learning_rate": 5.958350004648273e-06, "loss": 0.002, "step": 58550 }, { "epoch": 0.9896656329482943, "grad_norm": 0.042304836213588715, "learning_rate": 5.956902477962757e-06, "loss": 0.001, "step": 58560 }, { "epoch": 0.9898346332271448, "grad_norm": 0.04571213945746422, "learning_rate": 5.955454868022088e-06, "loss": 0.0015, "step": 58570 }, { "epoch": 0.9900036335059953, "grad_norm": 0.1851183921098709, "learning_rate": 5.954007174952217e-06, "loss": 0.0014, "step": 58580 }, { "epoch": 0.9901726337848458, "grad_norm": 0.04956304654479027, "learning_rate": 5.952559398879099e-06, "loss": 0.0014, "step": 58590 }, { "epoch": 0.9903416340636962, "grad_norm": 0.05614320561289787, "learning_rate": 5.951111539928698e-06, "loss": 0.0019, "step": 58600 }, { "epoch": 0.9905106343425467, "grad_norm": 0.019860200583934784, "learning_rate": 5.949663598226985e-06, "loss": 0.001, "step": 58610 }, { "epoch": 0.9906796346213971, "grad_norm": 0.044026993215084076, "learning_rate": 5.948215573899938e-06, "loss": 0.001, "step": 58620 }, { "epoch": 0.9908486349002475, "grad_norm": 0.01446566917002201, "learning_rate": 5.946767467073542e-06, "loss": 0.002, "step": 58630 }, { "epoch": 0.991017635179098, "grad_norm": 0.04005853831768036, "learning_rate": 5.945319277873789e-06, "loss": 0.0016, "step": 58640 }, { "epoch": 0.9911866354579485, "grad_norm": 0.08893624693155289, "learning_rate": 5.943871006426678e-06, "loss": 0.0013, "step": 58650 }, { "epoch": 0.991355635736799, "grad_norm": 0.04670179262757301, "learning_rate": 5.942422652858218e-06, "loss": 0.0011, "step": 58660 }, { "epoch": 0.9915246360156494, "grad_norm": 0.035787198692560196, "learning_rate": 5.940974217294419e-06, "loss": 0.002, "step": 58670 }, { "epoch": 0.9916936362944999, "grad_norm": 0.07763170450925827, "learning_rate": 5.939525699861305e-06, "loss": 0.0012, "step": 58680 }, { "epoch": 0.9918626365733504, "grad_norm": 0.05746553838253021, "learning_rate": 5.938077100684901e-06, "loss": 0.0018, "step": 58690 }, { "epoch": 0.9920316368522009, "grad_norm": 0.010973265394568443, "learning_rate": 5.936628419891247e-06, "loss": 0.0019, "step": 58700 }, { "epoch": 0.9922006371310512, "grad_norm": 0.015557970851659775, "learning_rate": 5.935179657606381e-06, "loss": 0.0012, "step": 58710 }, { "epoch": 0.9923696374099017, "grad_norm": 0.2120949923992157, "learning_rate": 5.933730813956354e-06, "loss": 0.0018, "step": 58720 }, { "epoch": 0.9925386376887522, "grad_norm": 0.03492291271686554, "learning_rate": 5.932281889067223e-06, "loss": 0.0013, "step": 58730 }, { "epoch": 0.9927076379676026, "grad_norm": 0.16607755422592163, "learning_rate": 5.93083288306505e-06, "loss": 0.0044, "step": 58740 }, { "epoch": 0.9928766382464531, "grad_norm": 0.073784738779068, "learning_rate": 5.929383796075906e-06, "loss": 0.0008, "step": 58750 }, { "epoch": 0.9930456385253036, "grad_norm": 0.010416547767817974, "learning_rate": 5.927934628225868e-06, "loss": 0.0012, "step": 58760 }, { "epoch": 0.9932146388041541, "grad_norm": 0.023475607857108116, "learning_rate": 5.926485379641024e-06, "loss": 0.0015, "step": 58770 }, { "epoch": 0.9933836390830045, "grad_norm": 0.23363591730594635, "learning_rate": 5.9250360504474605e-06, "loss": 0.0022, "step": 58780 }, { "epoch": 0.9935526393618549, "grad_norm": 0.3121536374092102, "learning_rate": 5.9235866407712796e-06, "loss": 0.0104, "step": 58790 }, { "epoch": 0.9937216396407054, "grad_norm": 0.09558659046888351, "learning_rate": 5.922137150738585e-06, "loss": 0.0016, "step": 58800 }, { "epoch": 0.9938906399195558, "grad_norm": 0.08474873006343842, "learning_rate": 5.920687580475492e-06, "loss": 0.0017, "step": 58810 }, { "epoch": 0.9940596401984063, "grad_norm": 0.08459119498729706, "learning_rate": 5.919237930108117e-06, "loss": 0.0013, "step": 58820 }, { "epoch": 0.9942286404772568, "grad_norm": 0.06942421197891235, "learning_rate": 5.917788199762589e-06, "loss": 0.0013, "step": 58830 }, { "epoch": 0.9943976407561073, "grad_norm": 0.056626077741384506, "learning_rate": 5.916338389565042e-06, "loss": 0.0019, "step": 58840 }, { "epoch": 0.9945666410349577, "grad_norm": 0.07657656818628311, "learning_rate": 5.9148884996416145e-06, "loss": 0.0009, "step": 58850 }, { "epoch": 0.9947356413138082, "grad_norm": 0.11495871841907501, "learning_rate": 5.913438530118455e-06, "loss": 0.0017, "step": 58860 }, { "epoch": 0.9949046415926587, "grad_norm": 0.08522608876228333, "learning_rate": 5.911988481121717e-06, "loss": 0.0008, "step": 58870 }, { "epoch": 0.995073641871509, "grad_norm": 0.09870191663503647, "learning_rate": 5.9105383527775625e-06, "loss": 0.0022, "step": 58880 }, { "epoch": 0.9952426421503595, "grad_norm": 0.035707537084817886, "learning_rate": 5.909088145212161e-06, "loss": 0.0026, "step": 58890 }, { "epoch": 0.99541164242921, "grad_norm": 0.02863745577633381, "learning_rate": 5.907637858551684e-06, "loss": 0.0013, "step": 58900 }, { "epoch": 0.9955806427080605, "grad_norm": 0.014273157343268394, "learning_rate": 5.906187492922318e-06, "loss": 0.0009, "step": 58910 }, { "epoch": 0.9957496429869109, "grad_norm": 0.04612638056278229, "learning_rate": 5.904737048450249e-06, "loss": 0.0011, "step": 58920 }, { "epoch": 0.9959186432657614, "grad_norm": 0.05339609086513519, "learning_rate": 5.903286525261673e-06, "loss": 0.0011, "step": 58930 }, { "epoch": 0.9960876435446119, "grad_norm": 0.05890113487839699, "learning_rate": 5.901835923482793e-06, "loss": 0.0014, "step": 58940 }, { "epoch": 0.9962566438234624, "grad_norm": 0.03513943403959274, "learning_rate": 5.9003852432398164e-06, "loss": 0.0019, "step": 58950 }, { "epoch": 0.9964256441023128, "grad_norm": 0.009775166399776936, "learning_rate": 5.898934484658963e-06, "loss": 0.0017, "step": 58960 }, { "epoch": 0.9965946443811632, "grad_norm": 0.03837180137634277, "learning_rate": 5.897483647866453e-06, "loss": 0.0018, "step": 58970 }, { "epoch": 0.9967636446600137, "grad_norm": 0.06849581003189087, "learning_rate": 5.8960327329885185e-06, "loss": 0.0018, "step": 58980 }, { "epoch": 0.9969326449388641, "grad_norm": 0.06228544935584068, "learning_rate": 5.894581740151394e-06, "loss": 0.002, "step": 58990 }, { "epoch": 0.9971016452177146, "grad_norm": 0.07643171399831772, "learning_rate": 5.893130669481324e-06, "loss": 0.0012, "step": 59000 }, { "epoch": 0.9972706454965651, "grad_norm": 0.015796547755599022, "learning_rate": 5.891679521104558e-06, "loss": 0.0012, "step": 59010 }, { "epoch": 0.9974396457754156, "grad_norm": 0.03740460425615311, "learning_rate": 5.890228295147353e-06, "loss": 0.0015, "step": 59020 }, { "epoch": 0.997608646054266, "grad_norm": 0.03803800791501999, "learning_rate": 5.8887769917359746e-06, "loss": 0.0013, "step": 59030 }, { "epoch": 0.9977776463331165, "grad_norm": 0.04857852682471275, "learning_rate": 5.887325610996692e-06, "loss": 0.0009, "step": 59040 }, { "epoch": 0.9979466466119669, "grad_norm": 0.009476653300225735, "learning_rate": 5.885874153055779e-06, "loss": 0.0016, "step": 59050 }, { "epoch": 0.9981156468908173, "grad_norm": 0.10072492808103561, "learning_rate": 5.884422618039525e-06, "loss": 0.0021, "step": 59060 }, { "epoch": 0.9982846471696678, "grad_norm": 0.05873139202594757, "learning_rate": 5.882971006074217e-06, "loss": 0.0009, "step": 59070 }, { "epoch": 0.9984536474485183, "grad_norm": 0.1507243812084198, "learning_rate": 5.881519317286153e-06, "loss": 0.0018, "step": 59080 }, { "epoch": 0.9986226477273688, "grad_norm": 0.15657368302345276, "learning_rate": 5.880067551801638e-06, "loss": 0.0015, "step": 59090 }, { "epoch": 0.9987916480062192, "grad_norm": 0.05158833786845207, "learning_rate": 5.878615709746983e-06, "loss": 0.0015, "step": 59100 }, { "epoch": 0.9989606482850697, "grad_norm": 0.1752629280090332, "learning_rate": 5.877163791248503e-06, "loss": 0.0021, "step": 59110 }, { "epoch": 0.9991296485639202, "grad_norm": 0.02186552993953228, "learning_rate": 5.875711796432524e-06, "loss": 0.0008, "step": 59120 }, { "epoch": 0.9992986488427706, "grad_norm": 0.05802800878882408, "learning_rate": 5.874259725425375e-06, "loss": 0.0023, "step": 59130 }, { "epoch": 0.999467649121621, "grad_norm": 0.06508190184831619, "learning_rate": 5.872807578353396e-06, "loss": 0.0011, "step": 59140 }, { "epoch": 0.9996366494004715, "grad_norm": 0.03184705972671509, "learning_rate": 5.871355355342927e-06, "loss": 0.0017, "step": 59150 }, { "epoch": 0.999805649679322, "grad_norm": 0.026573611423373222, "learning_rate": 5.869903056520321e-06, "loss": 0.0013, "step": 59160 }, { "epoch": 0.9999746499581724, "grad_norm": 0.14917759597301483, "learning_rate": 5.868450682011932e-06, "loss": 0.0007, "step": 59170 }, { "epoch": 1.000143650237023, "grad_norm": 0.03130548819899559, "learning_rate": 5.8669982319441274e-06, "loss": 0.0005, "step": 59180 }, { "epoch": 1.0003126505158733, "grad_norm": 0.01425376906991005, "learning_rate": 5.865545706443277e-06, "loss": 0.0014, "step": 59190 }, { "epoch": 1.0004816507947238, "grad_norm": 0.04437197372317314, "learning_rate": 5.864093105635756e-06, "loss": 0.0007, "step": 59200 }, { "epoch": 1.0006506510735742, "grad_norm": 0.17034755647182465, "learning_rate": 5.862640429647948e-06, "loss": 0.0015, "step": 59210 }, { "epoch": 1.0008196513524248, "grad_norm": 0.30543792247772217, "learning_rate": 5.861187678606243e-06, "loss": 0.0018, "step": 59220 }, { "epoch": 1.0009886516312752, "grad_norm": 0.028305659070611, "learning_rate": 5.859734852637038e-06, "loss": 0.001, "step": 59230 }, { "epoch": 1.0011576519101257, "grad_norm": 0.08342143148183823, "learning_rate": 5.858281951866735e-06, "loss": 0.0013, "step": 59240 }, { "epoch": 1.001326652188976, "grad_norm": 0.0451873242855072, "learning_rate": 5.856828976421743e-06, "loss": 0.001, "step": 59250 }, { "epoch": 1.0014956524678267, "grad_norm": 0.0032537258230149746, "learning_rate": 5.855375926428478e-06, "loss": 0.001, "step": 59260 }, { "epoch": 1.001664652746677, "grad_norm": 0.10370821505784988, "learning_rate": 5.853922802013364e-06, "loss": 0.0042, "step": 59270 }, { "epoch": 1.0018336530255274, "grad_norm": 0.04501200094819069, "learning_rate": 5.85246960330283e-06, "loss": 0.0011, "step": 59280 }, { "epoch": 1.002002653304378, "grad_norm": 0.057593826204538345, "learning_rate": 5.851016330423309e-06, "loss": 0.002, "step": 59290 }, { "epoch": 1.0021716535832284, "grad_norm": 0.048292193561792374, "learning_rate": 5.8495629835012436e-06, "loss": 0.0025, "step": 59300 }, { "epoch": 1.002340653862079, "grad_norm": 0.04649476706981659, "learning_rate": 5.848109562663083e-06, "loss": 0.0008, "step": 59310 }, { "epoch": 1.0025096541409293, "grad_norm": 0.024468235671520233, "learning_rate": 5.846656068035281e-06, "loss": 0.0018, "step": 59320 }, { "epoch": 1.0026786544197799, "grad_norm": 0.03331344947218895, "learning_rate": 5.8452024997443e-06, "loss": 0.0017, "step": 59330 }, { "epoch": 1.0028476546986302, "grad_norm": 0.12571880221366882, "learning_rate": 5.843748857916605e-06, "loss": 0.0008, "step": 59340 }, { "epoch": 1.0030166549774806, "grad_norm": 0.12865477800369263, "learning_rate": 5.842295142678671e-06, "loss": 0.0018, "step": 59350 }, { "epoch": 1.0031856552563312, "grad_norm": 0.03438074141740799, "learning_rate": 5.84084135415698e-06, "loss": 0.0009, "step": 59360 }, { "epoch": 1.0033546555351815, "grad_norm": 0.09085816144943237, "learning_rate": 5.839387492478016e-06, "loss": 0.0023, "step": 59370 }, { "epoch": 1.0035236558140321, "grad_norm": 0.00895149540156126, "learning_rate": 5.837933557768274e-06, "loss": 0.0013, "step": 59380 }, { "epoch": 1.0036926560928825, "grad_norm": 0.22068193554878235, "learning_rate": 5.836479550154253e-06, "loss": 0.0026, "step": 59390 }, { "epoch": 1.003861656371733, "grad_norm": 0.0855739638209343, "learning_rate": 5.8350254697624575e-06, "loss": 0.0032, "step": 59400 }, { "epoch": 1.0040306566505834, "grad_norm": 0.06858029216527939, "learning_rate": 5.8335713167194e-06, "loss": 0.0011, "step": 59410 }, { "epoch": 1.004199656929434, "grad_norm": 0.05948581174015999, "learning_rate": 5.8321170911516e-06, "loss": 0.0013, "step": 59420 }, { "epoch": 1.0043686572082844, "grad_norm": 0.058058999478816986, "learning_rate": 5.830662793185582e-06, "loss": 0.0015, "step": 59430 }, { "epoch": 1.0045376574871347, "grad_norm": 0.08761637657880783, "learning_rate": 5.829208422947875e-06, "loss": 0.0011, "step": 59440 }, { "epoch": 1.0047066577659853, "grad_norm": 0.02783399261534214, "learning_rate": 5.827753980565018e-06, "loss": 0.0009, "step": 59450 }, { "epoch": 1.0048756580448357, "grad_norm": 0.0013959665084257722, "learning_rate": 5.8262994661635544e-06, "loss": 0.0012, "step": 59460 }, { "epoch": 1.0050446583236863, "grad_norm": 0.06014212965965271, "learning_rate": 5.824844879870033e-06, "loss": 0.0009, "step": 59470 }, { "epoch": 1.0052136586025366, "grad_norm": 0.2674904465675354, "learning_rate": 5.823390221811012e-06, "loss": 0.0013, "step": 59480 }, { "epoch": 1.0053826588813872, "grad_norm": 0.010724053718149662, "learning_rate": 5.8219354921130515e-06, "loss": 0.0009, "step": 59490 }, { "epoch": 1.0055516591602376, "grad_norm": 0.14621977508068085, "learning_rate": 5.82048069090272e-06, "loss": 0.0016, "step": 59500 }, { "epoch": 1.0057206594390882, "grad_norm": 0.008767823688685894, "learning_rate": 5.819025818306594e-06, "loss": 0.0012, "step": 59510 }, { "epoch": 1.0058896597179385, "grad_norm": 0.015448343008756638, "learning_rate": 5.8175708744512534e-06, "loss": 0.0016, "step": 59520 }, { "epoch": 1.006058659996789, "grad_norm": 0.03502323105931282, "learning_rate": 5.816115859463286e-06, "loss": 0.0007, "step": 59530 }, { "epoch": 1.0062276602756395, "grad_norm": 0.0019112990703433752, "learning_rate": 5.814660773469283e-06, "loss": 0.0013, "step": 59540 }, { "epoch": 1.0063966605544898, "grad_norm": 0.008734236471354961, "learning_rate": 5.813205616595848e-06, "loss": 0.001, "step": 59550 }, { "epoch": 1.0065656608333404, "grad_norm": 0.03247779235243797, "learning_rate": 5.811750388969582e-06, "loss": 0.001, "step": 59560 }, { "epoch": 1.0067346611121908, "grad_norm": 0.030376799404621124, "learning_rate": 5.8102950907171e-06, "loss": 0.0015, "step": 59570 }, { "epoch": 1.0069036613910414, "grad_norm": 0.16987168788909912, "learning_rate": 5.808839721965019e-06, "loss": 0.0019, "step": 59580 }, { "epoch": 1.0070726616698917, "grad_norm": 0.023650676012039185, "learning_rate": 5.807384282839963e-06, "loss": 0.0013, "step": 59590 }, { "epoch": 1.0072416619487423, "grad_norm": 0.0017415835754945874, "learning_rate": 5.805928773468563e-06, "loss": 0.0016, "step": 59600 }, { "epoch": 1.0074106622275927, "grad_norm": 0.0149134062230587, "learning_rate": 5.804473193977455e-06, "loss": 0.0009, "step": 59610 }, { "epoch": 1.007579662506443, "grad_norm": 0.13160866498947144, "learning_rate": 5.803017544493281e-06, "loss": 0.0028, "step": 59620 }, { "epoch": 1.0077486627852936, "grad_norm": 0.03560006618499756, "learning_rate": 5.801561825142691e-06, "loss": 0.0014, "step": 59630 }, { "epoch": 1.007917663064144, "grad_norm": 0.046323128044605255, "learning_rate": 5.800106036052337e-06, "loss": 0.0012, "step": 59640 }, { "epoch": 1.0080866633429946, "grad_norm": 0.2378297746181488, "learning_rate": 5.798650177348883e-06, "loss": 0.0016, "step": 59650 }, { "epoch": 1.008255663621845, "grad_norm": 0.09480898827314377, "learning_rate": 5.797194249158993e-06, "loss": 0.001, "step": 59660 }, { "epoch": 1.0084246639006955, "grad_norm": 0.05673360824584961, "learning_rate": 5.7957382516093405e-06, "loss": 0.0011, "step": 59670 }, { "epoch": 1.0085936641795459, "grad_norm": 0.03825223073363304, "learning_rate": 5.794282184826605e-06, "loss": 0.0021, "step": 59680 }, { "epoch": 1.0087626644583965, "grad_norm": 0.013609139248728752, "learning_rate": 5.792826048937471e-06, "loss": 0.0009, "step": 59690 }, { "epoch": 1.0089316647372468, "grad_norm": 0.08244311064481735, "learning_rate": 5.79136984406863e-06, "loss": 0.0008, "step": 59700 }, { "epoch": 1.0091006650160972, "grad_norm": 0.06593206524848938, "learning_rate": 5.789913570346778e-06, "loss": 0.0012, "step": 59710 }, { "epoch": 1.0092696652949478, "grad_norm": 0.060266364365816116, "learning_rate": 5.788457227898619e-06, "loss": 0.0017, "step": 59720 }, { "epoch": 1.0094386655737981, "grad_norm": 0.054617322981357574, "learning_rate": 5.787000816850858e-06, "loss": 0.0011, "step": 59730 }, { "epoch": 1.0096076658526487, "grad_norm": 0.04017211124300957, "learning_rate": 5.785544337330214e-06, "loss": 0.0009, "step": 59740 }, { "epoch": 1.009776666131499, "grad_norm": 0.006230284925550222, "learning_rate": 5.784087789463408e-06, "loss": 0.0009, "step": 59750 }, { "epoch": 1.0099456664103497, "grad_norm": 0.050391849130392075, "learning_rate": 5.782631173377166e-06, "loss": 0.0006, "step": 59760 }, { "epoch": 1.0101146666892, "grad_norm": 0.020542792975902557, "learning_rate": 5.781174489198218e-06, "loss": 0.0037, "step": 59770 }, { "epoch": 1.0102836669680504, "grad_norm": 0.030491294339299202, "learning_rate": 5.779717737053306e-06, "loss": 0.001, "step": 59780 }, { "epoch": 1.010452667246901, "grad_norm": 0.027004361152648926, "learning_rate": 5.778260917069172e-06, "loss": 0.001, "step": 59790 }, { "epoch": 1.0106216675257513, "grad_norm": 0.06397947669029236, "learning_rate": 5.776804029372568e-06, "loss": 0.0014, "step": 59800 }, { "epoch": 1.010790667804602, "grad_norm": 0.014559914357960224, "learning_rate": 5.7753470740902495e-06, "loss": 0.0011, "step": 59810 }, { "epoch": 1.0109596680834523, "grad_norm": 0.09837585687637329, "learning_rate": 5.77389005134898e-06, "loss": 0.0012, "step": 59820 }, { "epoch": 1.0111286683623029, "grad_norm": 0.08793274313211441, "learning_rate": 5.772432961275523e-06, "loss": 0.0008, "step": 59830 }, { "epoch": 1.0112976686411532, "grad_norm": 0.10017421096563339, "learning_rate": 5.770975803996659e-06, "loss": 0.0011, "step": 59840 }, { "epoch": 1.0114666689200038, "grad_norm": 0.06319086253643036, "learning_rate": 5.769518579639163e-06, "loss": 0.001, "step": 59850 }, { "epoch": 1.0116356691988542, "grad_norm": 0.004494407679885626, "learning_rate": 5.768061288329823e-06, "loss": 0.0016, "step": 59860 }, { "epoch": 1.0118046694777045, "grad_norm": 0.04238826781511307, "learning_rate": 5.766603930195429e-06, "loss": 0.0013, "step": 59870 }, { "epoch": 1.0119736697565551, "grad_norm": 0.06290490180253983, "learning_rate": 5.76514650536278e-06, "loss": 0.0011, "step": 59880 }, { "epoch": 1.0121426700354055, "grad_norm": 0.06323585659265518, "learning_rate": 5.763689013958677e-06, "loss": 0.0009, "step": 59890 }, { "epoch": 1.012311670314256, "grad_norm": 0.06292340159416199, "learning_rate": 5.76223145610993e-06, "loss": 0.004, "step": 59900 }, { "epoch": 1.0124806705931064, "grad_norm": 0.05352671816945076, "learning_rate": 5.760773831943354e-06, "loss": 0.0013, "step": 59910 }, { "epoch": 1.012649670871957, "grad_norm": 0.049937695264816284, "learning_rate": 5.7593161415857665e-06, "loss": 0.0018, "step": 59920 }, { "epoch": 1.0128186711508074, "grad_norm": 0.06884843111038208, "learning_rate": 5.757858385163997e-06, "loss": 0.0016, "step": 59930 }, { "epoch": 1.012987671429658, "grad_norm": 0.06434638798236847, "learning_rate": 5.756400562804876e-06, "loss": 0.0008, "step": 59940 }, { "epoch": 1.0131566717085083, "grad_norm": 0.1740601807832718, "learning_rate": 5.754942674635241e-06, "loss": 0.0012, "step": 59950 }, { "epoch": 1.0133256719873587, "grad_norm": 0.02064857818186283, "learning_rate": 5.753484720781936e-06, "loss": 0.0007, "step": 59960 }, { "epoch": 1.0134946722662093, "grad_norm": 0.05028160661458969, "learning_rate": 5.752026701371809e-06, "loss": 0.0008, "step": 59970 }, { "epoch": 1.0136636725450596, "grad_norm": 0.05740112438797951, "learning_rate": 5.750568616531716e-06, "loss": 0.0012, "step": 59980 }, { "epoch": 1.0138326728239102, "grad_norm": 0.09930772334337234, "learning_rate": 5.749110466388516e-06, "loss": 0.0017, "step": 59990 }, { "epoch": 1.0140016731027606, "grad_norm": 0.027258582413196564, "learning_rate": 5.747652251069076e-06, "loss": 0.0019, "step": 60000 }, { "epoch": 1.0141706733816112, "grad_norm": 0.020125612616539, "learning_rate": 5.746193970700268e-06, "loss": 0.0011, "step": 60010 }, { "epoch": 1.0143396736604615, "grad_norm": 0.05175361782312393, "learning_rate": 5.744735625408969e-06, "loss": 0.0006, "step": 60020 }, { "epoch": 1.014508673939312, "grad_norm": 0.04280708357691765, "learning_rate": 5.743277215322062e-06, "loss": 0.0008, "step": 60030 }, { "epoch": 1.0146776742181625, "grad_norm": 0.00904067326337099, "learning_rate": 5.741818740566436e-06, "loss": 0.0009, "step": 60040 }, { "epoch": 1.0148466744970128, "grad_norm": 0.021013254299759865, "learning_rate": 5.740360201268986e-06, "loss": 0.0018, "step": 60050 }, { "epoch": 1.0150156747758634, "grad_norm": 0.10922824591398239, "learning_rate": 5.738901597556611e-06, "loss": 0.001, "step": 60060 }, { "epoch": 1.0151846750547138, "grad_norm": 0.09556003659963608, "learning_rate": 5.737442929556217e-06, "loss": 0.0008, "step": 60070 }, { "epoch": 1.0153536753335644, "grad_norm": 0.06434619426727295, "learning_rate": 5.735984197394715e-06, "loss": 0.001, "step": 60080 }, { "epoch": 1.0155226756124147, "grad_norm": 0.350649356842041, "learning_rate": 5.734525401199022e-06, "loss": 0.0015, "step": 60090 }, { "epoch": 1.0156916758912653, "grad_norm": 0.012149405665695667, "learning_rate": 5.733066541096059e-06, "loss": 0.0014, "step": 60100 }, { "epoch": 1.0158606761701157, "grad_norm": 0.04299250245094299, "learning_rate": 5.731607617212758e-06, "loss": 0.0009, "step": 60110 }, { "epoch": 1.0160296764489662, "grad_norm": 0.012249082326889038, "learning_rate": 5.730148629676048e-06, "loss": 0.001, "step": 60120 }, { "epoch": 1.0161986767278166, "grad_norm": 0.052636779844760895, "learning_rate": 5.728689578612868e-06, "loss": 0.0015, "step": 60130 }, { "epoch": 1.016367677006667, "grad_norm": 0.0249900221824646, "learning_rate": 5.727230464150167e-06, "loss": 0.0051, "step": 60140 }, { "epoch": 1.0165366772855176, "grad_norm": 0.10780275613069534, "learning_rate": 5.725771286414889e-06, "loss": 0.0013, "step": 60150 }, { "epoch": 1.016705677564368, "grad_norm": 0.02290751226246357, "learning_rate": 5.724312045533995e-06, "loss": 0.0013, "step": 60160 }, { "epoch": 1.0168746778432185, "grad_norm": 0.11151617765426636, "learning_rate": 5.722852741634444e-06, "loss": 0.001, "step": 60170 }, { "epoch": 1.0170436781220689, "grad_norm": 0.04342846944928169, "learning_rate": 5.721393374843201e-06, "loss": 0.0009, "step": 60180 }, { "epoch": 1.0172126784009194, "grad_norm": 0.042505376040935516, "learning_rate": 5.71993394528724e-06, "loss": 0.0012, "step": 60190 }, { "epoch": 1.0173816786797698, "grad_norm": 0.029010934755206108, "learning_rate": 5.718474453093537e-06, "loss": 0.0014, "step": 60200 }, { "epoch": 1.0175506789586204, "grad_norm": 0.04606638476252556, "learning_rate": 5.717014898389075e-06, "loss": 0.0011, "step": 60210 }, { "epoch": 1.0177196792374708, "grad_norm": 0.07484077662229538, "learning_rate": 5.715555281300842e-06, "loss": 0.0014, "step": 60220 }, { "epoch": 1.0178886795163211, "grad_norm": 0.023599427193403244, "learning_rate": 5.714095601955833e-06, "loss": 0.0011, "step": 60230 }, { "epoch": 1.0180576797951717, "grad_norm": 0.0500069223344326, "learning_rate": 5.712635860481048e-06, "loss": 0.0016, "step": 60240 }, { "epoch": 1.018226680074022, "grad_norm": 0.03526817262172699, "learning_rate": 5.711176057003491e-06, "loss": 0.0018, "step": 60250 }, { "epoch": 1.0183956803528726, "grad_norm": 0.030725175514817238, "learning_rate": 5.709716191650171e-06, "loss": 0.0004, "step": 60260 }, { "epoch": 1.018564680631723, "grad_norm": 0.0388350747525692, "learning_rate": 5.708256264548102e-06, "loss": 0.0022, "step": 60270 }, { "epoch": 1.0187336809105736, "grad_norm": 0.0803573951125145, "learning_rate": 5.706796275824308e-06, "loss": 0.0024, "step": 60280 }, { "epoch": 1.018902681189424, "grad_norm": 0.021474266424775124, "learning_rate": 5.705336225605813e-06, "loss": 0.0011, "step": 60290 }, { "epoch": 1.0190716814682743, "grad_norm": 0.5377752184867859, "learning_rate": 5.703876114019649e-06, "loss": 0.0018, "step": 60300 }, { "epoch": 1.019240681747125, "grad_norm": 0.006219279952347279, "learning_rate": 5.7024159411928516e-06, "loss": 0.0023, "step": 60310 }, { "epoch": 1.0194096820259753, "grad_norm": 0.07081045210361481, "learning_rate": 5.700955707252465e-06, "loss": 0.0016, "step": 60320 }, { "epoch": 1.0195786823048258, "grad_norm": 0.029531359672546387, "learning_rate": 5.699495412325535e-06, "loss": 0.0013, "step": 60330 }, { "epoch": 1.0197476825836762, "grad_norm": 0.01691374182701111, "learning_rate": 5.698035056539117e-06, "loss": 0.0009, "step": 60340 }, { "epoch": 1.0199166828625268, "grad_norm": 0.09221971035003662, "learning_rate": 5.6965746400202646e-06, "loss": 0.0009, "step": 60350 }, { "epoch": 1.0200856831413772, "grad_norm": 0.03064112365245819, "learning_rate": 5.695114162896044e-06, "loss": 0.0015, "step": 60360 }, { "epoch": 1.0202546834202277, "grad_norm": 0.1137605831027031, "learning_rate": 5.693653625293524e-06, "loss": 0.0012, "step": 60370 }, { "epoch": 1.020423683699078, "grad_norm": 0.04199506342411041, "learning_rate": 5.6921930273397785e-06, "loss": 0.0011, "step": 60380 }, { "epoch": 1.0205926839779285, "grad_norm": 0.10365763306617737, "learning_rate": 5.6907323691618845e-06, "loss": 0.0018, "step": 60390 }, { "epoch": 1.020761684256779, "grad_norm": 0.15501956641674042, "learning_rate": 5.689271650886928e-06, "loss": 0.0013, "step": 60400 }, { "epoch": 1.0209306845356294, "grad_norm": 0.051700349897146225, "learning_rate": 5.687810872641999e-06, "loss": 0.001, "step": 60410 }, { "epoch": 1.02109968481448, "grad_norm": 0.05651358515024185, "learning_rate": 5.68635003455419e-06, "loss": 0.0008, "step": 60420 }, { "epoch": 1.0212686850933304, "grad_norm": 0.04288886487483978, "learning_rate": 5.684889136750604e-06, "loss": 0.001, "step": 60430 }, { "epoch": 1.021437685372181, "grad_norm": 0.043509211391210556, "learning_rate": 5.683428179358344e-06, "loss": 0.0008, "step": 60440 }, { "epoch": 1.0216066856510313, "grad_norm": 0.029147949069738388, "learning_rate": 5.6819671625045225e-06, "loss": 0.0009, "step": 60450 }, { "epoch": 1.0217756859298819, "grad_norm": 0.003975256811827421, "learning_rate": 5.680506086316252e-06, "loss": 0.0067, "step": 60460 }, { "epoch": 1.0219446862087322, "grad_norm": 0.08576459437608719, "learning_rate": 5.679044950920656e-06, "loss": 0.0019, "step": 60470 }, { "epoch": 1.0221136864875826, "grad_norm": 0.11685976386070251, "learning_rate": 5.677583756444859e-06, "loss": 0.0011, "step": 60480 }, { "epoch": 1.0222826867664332, "grad_norm": 0.026770997792482376, "learning_rate": 5.676122503015992e-06, "loss": 0.0011, "step": 60490 }, { "epoch": 1.0224516870452836, "grad_norm": 0.07476416230201721, "learning_rate": 5.674661190761191e-06, "loss": 0.0018, "step": 60500 }, { "epoch": 1.0226206873241341, "grad_norm": 0.21177829802036285, "learning_rate": 5.673199819807598e-06, "loss": 0.001, "step": 60510 }, { "epoch": 1.0227896876029845, "grad_norm": 0.004483386874198914, "learning_rate": 5.6717383902823576e-06, "loss": 0.0011, "step": 60520 }, { "epoch": 1.022958687881835, "grad_norm": 0.01776953786611557, "learning_rate": 5.670276902312625e-06, "loss": 0.0009, "step": 60530 }, { "epoch": 1.0231276881606854, "grad_norm": 0.046271588653326035, "learning_rate": 5.6688153560255525e-06, "loss": 0.0007, "step": 60540 }, { "epoch": 1.023296688439536, "grad_norm": 0.021583637222647667, "learning_rate": 5.6673537515483045e-06, "loss": 0.0013, "step": 60550 }, { "epoch": 1.0234656887183864, "grad_norm": 0.040574200451374054, "learning_rate": 5.6658920890080475e-06, "loss": 0.001, "step": 60560 }, { "epoch": 1.0236346889972368, "grad_norm": 0.04284026473760605, "learning_rate": 5.66443036853195e-06, "loss": 0.0013, "step": 60570 }, { "epoch": 1.0238036892760873, "grad_norm": 0.1469365656375885, "learning_rate": 5.6629685902471935e-06, "loss": 0.0011, "step": 60580 }, { "epoch": 1.0239726895549377, "grad_norm": 0.026097025722265244, "learning_rate": 5.661506754280956e-06, "loss": 0.0016, "step": 60590 }, { "epoch": 1.0241416898337883, "grad_norm": 0.06931482255458832, "learning_rate": 5.660044860760425e-06, "loss": 0.0019, "step": 60600 }, { "epoch": 1.0243106901126386, "grad_norm": 0.029825277626514435, "learning_rate": 5.658582909812795e-06, "loss": 0.0009, "step": 60610 }, { "epoch": 1.0244796903914892, "grad_norm": 0.04135201498866081, "learning_rate": 5.65712090156526e-06, "loss": 0.0005, "step": 60620 }, { "epoch": 1.0246486906703396, "grad_norm": 0.0761294886469841, "learning_rate": 5.655658836145022e-06, "loss": 0.0011, "step": 60630 }, { "epoch": 1.0248176909491902, "grad_norm": 0.12756778299808502, "learning_rate": 5.654196713679291e-06, "loss": 0.0015, "step": 60640 }, { "epoch": 1.0249866912280405, "grad_norm": 0.1018855944275856, "learning_rate": 5.652734534295274e-06, "loss": 0.001, "step": 60650 }, { "epoch": 1.025155691506891, "grad_norm": 0.07063782960176468, "learning_rate": 5.651272298120192e-06, "loss": 0.0015, "step": 60660 }, { "epoch": 1.0253246917857415, "grad_norm": 0.03172563761472702, "learning_rate": 5.6498100052812635e-06, "loss": 0.0013, "step": 60670 }, { "epoch": 1.0254936920645918, "grad_norm": 0.06417392939329147, "learning_rate": 5.648347655905716e-06, "loss": 0.0037, "step": 60680 }, { "epoch": 1.0256626923434424, "grad_norm": 0.02979256398975849, "learning_rate": 5.6468852501207816e-06, "loss": 0.0003, "step": 60690 }, { "epoch": 1.0258316926222928, "grad_norm": 0.1255546510219574, "learning_rate": 5.6454227880536945e-06, "loss": 0.0012, "step": 60700 }, { "epoch": 1.0260006929011434, "grad_norm": 0.017972994595766068, "learning_rate": 5.6439602698316985e-06, "loss": 0.0012, "step": 60710 }, { "epoch": 1.0261696931799937, "grad_norm": 0.004161244723945856, "learning_rate": 5.64249769558204e-06, "loss": 0.0011, "step": 60720 }, { "epoch": 1.026338693458844, "grad_norm": 0.0404852032661438, "learning_rate": 5.641035065431969e-06, "loss": 0.0013, "step": 60730 }, { "epoch": 1.0265076937376947, "grad_norm": 0.11190905421972275, "learning_rate": 5.639572379508741e-06, "loss": 0.0012, "step": 60740 }, { "epoch": 1.026676694016545, "grad_norm": 0.06214538961648941, "learning_rate": 5.6381096379396174e-06, "loss": 0.0013, "step": 60750 }, { "epoch": 1.0268456942953956, "grad_norm": 0.09106887876987457, "learning_rate": 5.636646840851863e-06, "loss": 0.0008, "step": 60760 }, { "epoch": 1.027014694574246, "grad_norm": 0.027696475386619568, "learning_rate": 5.6351839883727485e-06, "loss": 0.0009, "step": 60770 }, { "epoch": 1.0271836948530966, "grad_norm": 0.0765504464507103, "learning_rate": 5.633721080629551e-06, "loss": 0.0034, "step": 60780 }, { "epoch": 1.027352695131947, "grad_norm": 0.06260436028242111, "learning_rate": 5.632258117749547e-06, "loss": 0.0008, "step": 60790 }, { "epoch": 1.0275216954107975, "grad_norm": 0.0022702463902533054, "learning_rate": 5.630795099860024e-06, "loss": 0.0008, "step": 60800 }, { "epoch": 1.0276906956896479, "grad_norm": 0.04073888063430786, "learning_rate": 5.6293320270882726e-06, "loss": 0.0016, "step": 60810 }, { "epoch": 1.0278596959684982, "grad_norm": 0.022465115413069725, "learning_rate": 5.6278688995615836e-06, "loss": 0.0026, "step": 60820 }, { "epoch": 1.0280286962473488, "grad_norm": 0.06191631779074669, "learning_rate": 5.62640571740726e-06, "loss": 0.0031, "step": 60830 }, { "epoch": 1.0281976965261992, "grad_norm": 0.07303152233362198, "learning_rate": 5.624942480752603e-06, "loss": 0.0016, "step": 60840 }, { "epoch": 1.0283666968050498, "grad_norm": 0.07268793880939484, "learning_rate": 5.623479189724923e-06, "loss": 0.0011, "step": 60850 }, { "epoch": 1.0285356970839001, "grad_norm": 0.015413266606628895, "learning_rate": 5.622015844451533e-06, "loss": 0.0011, "step": 60860 }, { "epoch": 1.0287046973627507, "grad_norm": 0.030530164018273354, "learning_rate": 5.620552445059748e-06, "loss": 0.0006, "step": 60870 }, { "epoch": 1.028873697641601, "grad_norm": 0.1251511573791504, "learning_rate": 5.619088991676895e-06, "loss": 0.0014, "step": 60880 }, { "epoch": 1.0290426979204517, "grad_norm": 0.011009990237653255, "learning_rate": 5.617625484430301e-06, "loss": 0.001, "step": 60890 }, { "epoch": 1.029211698199302, "grad_norm": 0.04360377788543701, "learning_rate": 5.616161923447297e-06, "loss": 0.0014, "step": 60900 }, { "epoch": 1.0293806984781524, "grad_norm": 0.027645142748951912, "learning_rate": 5.614698308855221e-06, "loss": 0.001, "step": 60910 }, { "epoch": 1.029549698757003, "grad_norm": 0.017806977033615112, "learning_rate": 5.6132346407814135e-06, "loss": 0.0007, "step": 60920 }, { "epoch": 1.0297186990358533, "grad_norm": 0.011086252517998219, "learning_rate": 5.61177091935322e-06, "loss": 0.001, "step": 60930 }, { "epoch": 1.029887699314704, "grad_norm": 0.026597237214446068, "learning_rate": 5.610307144697994e-06, "loss": 0.0007, "step": 60940 }, { "epoch": 1.0300566995935543, "grad_norm": 0.04197683557868004, "learning_rate": 5.608843316943089e-06, "loss": 0.0011, "step": 60950 }, { "epoch": 1.0302256998724049, "grad_norm": 0.11756736040115356, "learning_rate": 5.607379436215865e-06, "loss": 0.0009, "step": 60960 }, { "epoch": 1.0303947001512552, "grad_norm": 0.03770684078335762, "learning_rate": 5.605915502643687e-06, "loss": 0.001, "step": 60970 }, { "epoch": 1.0305637004301058, "grad_norm": 0.01879987120628357, "learning_rate": 5.6044515163539244e-06, "loss": 0.001, "step": 60980 }, { "epoch": 1.0307327007089562, "grad_norm": 0.050969868898391724, "learning_rate": 5.602987477473951e-06, "loss": 0.0008, "step": 60990 }, { "epoch": 1.0309017009878065, "grad_norm": 0.1651037186384201, "learning_rate": 5.6015233861311465e-06, "loss": 0.0009, "step": 61000 }, { "epoch": 1.0309017009878065, "eval_loss": 0.001259764190763235, "eval_runtime": 5.7929, "eval_samples_per_second": 34.525, "eval_steps_per_second": 8.631, "step": 61000 }, { "epoch": 1.0310707012666571, "grad_norm": 0.05381014943122864, "learning_rate": 5.600059242452893e-06, "loss": 0.0008, "step": 61010 }, { "epoch": 1.0312397015455075, "grad_norm": 0.17811964452266693, "learning_rate": 5.598595046566579e-06, "loss": 0.0014, "step": 61020 }, { "epoch": 1.031408701824358, "grad_norm": 0.16397219896316528, "learning_rate": 5.597130798599594e-06, "loss": 0.0015, "step": 61030 }, { "epoch": 1.0315777021032084, "grad_norm": 0.1764872968196869, "learning_rate": 5.595666498679337e-06, "loss": 0.0028, "step": 61040 }, { "epoch": 1.031746702382059, "grad_norm": 0.15535208582878113, "learning_rate": 5.594202146933209e-06, "loss": 0.0015, "step": 61050 }, { "epoch": 1.0319157026609094, "grad_norm": 0.046183567494153976, "learning_rate": 5.592737743488614e-06, "loss": 0.0008, "step": 61060 }, { "epoch": 1.03208470293976, "grad_norm": 0.00033106733462773263, "learning_rate": 5.591273288472964e-06, "loss": 0.001, "step": 61070 }, { "epoch": 1.0322537032186103, "grad_norm": 0.11447389423847198, "learning_rate": 5.589808782013673e-06, "loss": 0.0009, "step": 61080 }, { "epoch": 1.0324227034974607, "grad_norm": 0.043528001755476, "learning_rate": 5.58834422423816e-06, "loss": 0.0006, "step": 61090 }, { "epoch": 1.0325917037763113, "grad_norm": 0.04317929968237877, "learning_rate": 5.586879615273849e-06, "loss": 0.0009, "step": 61100 }, { "epoch": 1.0327607040551616, "grad_norm": 0.02791532315313816, "learning_rate": 5.58541495524817e-06, "loss": 0.0007, "step": 61110 }, { "epoch": 1.0329297043340122, "grad_norm": 0.06267747282981873, "learning_rate": 5.58395024428855e-06, "loss": 0.0033, "step": 61120 }, { "epoch": 1.0330987046128626, "grad_norm": 0.08951187878847122, "learning_rate": 5.582485482522432e-06, "loss": 0.0008, "step": 61130 }, { "epoch": 1.0332677048917132, "grad_norm": 0.05569801107048988, "learning_rate": 5.581020670077253e-06, "loss": 0.0017, "step": 61140 }, { "epoch": 1.0334367051705635, "grad_norm": 0.02896769903600216, "learning_rate": 5.579555807080462e-06, "loss": 0.0014, "step": 61150 }, { "epoch": 1.033605705449414, "grad_norm": 0.01657586172223091, "learning_rate": 5.578090893659508e-06, "loss": 0.0012, "step": 61160 }, { "epoch": 1.0337747057282645, "grad_norm": 0.017979320138692856, "learning_rate": 5.576625929941844e-06, "loss": 0.0011, "step": 61170 }, { "epoch": 1.0339437060071148, "grad_norm": 0.1074618473649025, "learning_rate": 5.5751609160549315e-06, "loss": 0.0012, "step": 61180 }, { "epoch": 1.0341127062859654, "grad_norm": 0.061468396335840225, "learning_rate": 5.573695852126232e-06, "loss": 0.0009, "step": 61190 }, { "epoch": 1.0342817065648158, "grad_norm": 0.29444196820259094, "learning_rate": 5.572230738283213e-06, "loss": 0.0009, "step": 61200 }, { "epoch": 1.0344507068436664, "grad_norm": 0.06462724506855011, "learning_rate": 5.570765574653349e-06, "loss": 0.0013, "step": 61210 }, { "epoch": 1.0346197071225167, "grad_norm": 0.12618573009967804, "learning_rate": 5.569300361364114e-06, "loss": 0.0012, "step": 61220 }, { "epoch": 1.0347887074013673, "grad_norm": 0.07626745849847794, "learning_rate": 5.567835098542988e-06, "loss": 0.0011, "step": 61230 }, { "epoch": 1.0349577076802177, "grad_norm": 0.016155019402503967, "learning_rate": 5.5663697863174595e-06, "loss": 0.0013, "step": 61240 }, { "epoch": 1.035126707959068, "grad_norm": 0.016351575031876564, "learning_rate": 5.564904424815014e-06, "loss": 0.0015, "step": 61250 }, { "epoch": 1.0352957082379186, "grad_norm": 0.025634871795773506, "learning_rate": 5.563439014163146e-06, "loss": 0.001, "step": 61260 }, { "epoch": 1.035464708516769, "grad_norm": 0.02750859037041664, "learning_rate": 5.561973554489354e-06, "loss": 0.002, "step": 61270 }, { "epoch": 1.0356337087956196, "grad_norm": 0.06129152700304985, "learning_rate": 5.5605080459211405e-06, "loss": 0.0011, "step": 61280 }, { "epoch": 1.03580270907447, "grad_norm": 0.05943496152758598, "learning_rate": 5.559042488586012e-06, "loss": 0.0008, "step": 61290 }, { "epoch": 1.0359717093533205, "grad_norm": 0.04022928327322006, "learning_rate": 5.557576882611477e-06, "loss": 0.0008, "step": 61300 }, { "epoch": 1.0361407096321709, "grad_norm": 0.04034409672021866, "learning_rate": 5.556111228125053e-06, "loss": 0.0016, "step": 61310 }, { "epoch": 1.0363097099110214, "grad_norm": 0.05896139144897461, "learning_rate": 5.5546455252542564e-06, "loss": 0.0012, "step": 61320 }, { "epoch": 1.0364787101898718, "grad_norm": 0.019437892362475395, "learning_rate": 5.553179774126612e-06, "loss": 0.0004, "step": 61330 }, { "epoch": 1.0366477104687222, "grad_norm": 0.07897966355085373, "learning_rate": 5.551713974869648e-06, "loss": 0.0005, "step": 61340 }, { "epoch": 1.0368167107475728, "grad_norm": 0.022268425673246384, "learning_rate": 5.550248127610894e-06, "loss": 0.001, "step": 61350 }, { "epoch": 1.0369857110264231, "grad_norm": 0.24211256206035614, "learning_rate": 5.5487822324778876e-06, "loss": 0.0016, "step": 61360 }, { "epoch": 1.0371547113052737, "grad_norm": 0.07900585234165192, "learning_rate": 5.547316289598168e-06, "loss": 0.0011, "step": 61370 }, { "epoch": 1.037323711584124, "grad_norm": 0.029902538284659386, "learning_rate": 5.545850299099278e-06, "loss": 0.0009, "step": 61380 }, { "epoch": 1.0374927118629746, "grad_norm": 0.05169609189033508, "learning_rate": 5.5443842611087686e-06, "loss": 0.0012, "step": 61390 }, { "epoch": 1.037661712141825, "grad_norm": 0.05702834203839302, "learning_rate": 5.542918175754191e-06, "loss": 0.0014, "step": 61400 }, { "epoch": 1.0378307124206756, "grad_norm": 0.0235629603266716, "learning_rate": 5.541452043163101e-06, "loss": 0.0008, "step": 61410 }, { "epoch": 1.037999712699526, "grad_norm": 0.011054320260882378, "learning_rate": 5.539985863463061e-06, "loss": 0.0007, "step": 61420 }, { "epoch": 1.0381687129783763, "grad_norm": 0.0698440670967102, "learning_rate": 5.5385196367816345e-06, "loss": 0.0008, "step": 61430 }, { "epoch": 1.038337713257227, "grad_norm": 0.011756319552659988, "learning_rate": 5.5370533632463886e-06, "loss": 0.0008, "step": 61440 }, { "epoch": 1.0385067135360773, "grad_norm": 0.08873260021209717, "learning_rate": 5.5355870429849005e-06, "loss": 0.001, "step": 61450 }, { "epoch": 1.0386757138149278, "grad_norm": 0.040042344480752945, "learning_rate": 5.534120676124743e-06, "loss": 0.0009, "step": 61460 }, { "epoch": 1.0388447140937782, "grad_norm": 0.09399939328432083, "learning_rate": 5.532654262793498e-06, "loss": 0.0016, "step": 61470 }, { "epoch": 1.0390137143726288, "grad_norm": 0.012367842718958855, "learning_rate": 5.531187803118753e-06, "loss": 0.0008, "step": 61480 }, { "epoch": 1.0391827146514792, "grad_norm": 0.023874038830399513, "learning_rate": 5.529721297228094e-06, "loss": 0.0013, "step": 61490 }, { "epoch": 1.0393517149303297, "grad_norm": 0.06434915214776993, "learning_rate": 5.528254745249117e-06, "loss": 0.0017, "step": 61500 }, { "epoch": 1.03952071520918, "grad_norm": 0.0253615640103817, "learning_rate": 5.526788147309417e-06, "loss": 0.0014, "step": 61510 }, { "epoch": 1.0396897154880305, "grad_norm": 0.0545208640396595, "learning_rate": 5.525321503536597e-06, "loss": 0.0016, "step": 61520 }, { "epoch": 1.039858715766881, "grad_norm": 0.19076873362064362, "learning_rate": 5.52385481405826e-06, "loss": 0.0007, "step": 61530 }, { "epoch": 1.0400277160457314, "grad_norm": 0.006800387986004353, "learning_rate": 5.522388079002015e-06, "loss": 0.0007, "step": 61540 }, { "epoch": 1.040196716324582, "grad_norm": 0.003130823839455843, "learning_rate": 5.520921298495479e-06, "loss": 0.0008, "step": 61550 }, { "epoch": 1.0403657166034324, "grad_norm": 0.035810377448797226, "learning_rate": 5.519454472666263e-06, "loss": 0.002, "step": 61560 }, { "epoch": 1.040534716882283, "grad_norm": 0.22395989298820496, "learning_rate": 5.517987601641992e-06, "loss": 0.001, "step": 61570 }, { "epoch": 1.0407037171611333, "grad_norm": 0.01395457424223423, "learning_rate": 5.516520685550291e-06, "loss": 0.002, "step": 61580 }, { "epoch": 1.0408727174399837, "grad_norm": 0.009725483134388924, "learning_rate": 5.515053724518787e-06, "loss": 0.0007, "step": 61590 }, { "epoch": 1.0410417177188342, "grad_norm": 0.008647358976304531, "learning_rate": 5.5135867186751136e-06, "loss": 0.0008, "step": 61600 }, { "epoch": 1.0412107179976846, "grad_norm": 0.09219738841056824, "learning_rate": 5.512119668146907e-06, "loss": 0.0016, "step": 61610 }, { "epoch": 1.0413797182765352, "grad_norm": 0.039802417159080505, "learning_rate": 5.510652573061809e-06, "loss": 0.0023, "step": 61620 }, { "epoch": 1.0415487185553856, "grad_norm": 0.03601793944835663, "learning_rate": 5.509185433547461e-06, "loss": 0.0012, "step": 61630 }, { "epoch": 1.0417177188342361, "grad_norm": 0.05075589194893837, "learning_rate": 5.507718249731514e-06, "loss": 0.0008, "step": 61640 }, { "epoch": 1.0418867191130865, "grad_norm": 0.014225496910512447, "learning_rate": 5.50625102174162e-06, "loss": 0.001, "step": 61650 }, { "epoch": 1.042055719391937, "grad_norm": 0.0011709729442372918, "learning_rate": 5.504783749705435e-06, "loss": 0.0008, "step": 61660 }, { "epoch": 1.0422247196707874, "grad_norm": 0.05055329203605652, "learning_rate": 5.503316433750615e-06, "loss": 0.0011, "step": 61670 }, { "epoch": 1.0423937199496378, "grad_norm": 0.10448620468378067, "learning_rate": 5.501849074004829e-06, "loss": 0.0009, "step": 61680 }, { "epoch": 1.0425627202284884, "grad_norm": 0.08875548839569092, "learning_rate": 5.5003816705957425e-06, "loss": 0.0013, "step": 61690 }, { "epoch": 1.0427317205073388, "grad_norm": 0.022355513647198677, "learning_rate": 5.498914223651025e-06, "loss": 0.001, "step": 61700 }, { "epoch": 1.0429007207861893, "grad_norm": 0.024993648752570152, "learning_rate": 5.497446733298354e-06, "loss": 0.0022, "step": 61710 }, { "epoch": 1.0430697210650397, "grad_norm": 0.03373303636908531, "learning_rate": 5.495979199665405e-06, "loss": 0.0015, "step": 61720 }, { "epoch": 1.0432387213438903, "grad_norm": 0.18401862680912018, "learning_rate": 5.4945116228798645e-06, "loss": 0.0017, "step": 61730 }, { "epoch": 1.0434077216227406, "grad_norm": 0.037265434861183167, "learning_rate": 5.493044003069416e-06, "loss": 0.0007, "step": 61740 }, { "epoch": 1.0435767219015912, "grad_norm": 0.06706123054027557, "learning_rate": 5.491576340361752e-06, "loss": 0.001, "step": 61750 }, { "epoch": 1.0437457221804416, "grad_norm": 0.04308038577437401, "learning_rate": 5.4901086348845615e-06, "loss": 0.0005, "step": 61760 }, { "epoch": 1.043914722459292, "grad_norm": 0.0024284175597131252, "learning_rate": 5.488640886765547e-06, "loss": 0.001, "step": 61770 }, { "epoch": 1.0440837227381425, "grad_norm": 0.035065487027168274, "learning_rate": 5.487173096132408e-06, "loss": 0.0009, "step": 61780 }, { "epoch": 1.044252723016993, "grad_norm": 0.14422401785850525, "learning_rate": 5.4857052631128485e-06, "loss": 0.0012, "step": 61790 }, { "epoch": 1.0444217232958435, "grad_norm": 0.09435462206602097, "learning_rate": 5.484237387834579e-06, "loss": 0.0015, "step": 61800 }, { "epoch": 1.0445907235746938, "grad_norm": 0.0944463238120079, "learning_rate": 5.4827694704253095e-06, "loss": 0.0014, "step": 61810 }, { "epoch": 1.0447597238535444, "grad_norm": 0.028856197372078896, "learning_rate": 5.481301511012758e-06, "loss": 0.0007, "step": 61820 }, { "epoch": 1.0449287241323948, "grad_norm": 0.10152793675661087, "learning_rate": 5.479833509724642e-06, "loss": 0.0023, "step": 61830 }, { "epoch": 1.0450977244112454, "grad_norm": 0.023587454110383987, "learning_rate": 5.478365466688687e-06, "loss": 0.0007, "step": 61840 }, { "epoch": 1.0452667246900957, "grad_norm": 0.0464615672826767, "learning_rate": 5.476897382032615e-06, "loss": 0.0008, "step": 61850 }, { "epoch": 1.045435724968946, "grad_norm": 0.015667250379920006, "learning_rate": 5.4754292558841635e-06, "loss": 0.0013, "step": 61860 }, { "epoch": 1.0456047252477967, "grad_norm": 0.04416114091873169, "learning_rate": 5.473961088371064e-06, "loss": 0.0011, "step": 61870 }, { "epoch": 1.045773725526647, "grad_norm": 0.05349062383174896, "learning_rate": 5.472492879621052e-06, "loss": 0.0044, "step": 61880 }, { "epoch": 1.0459427258054976, "grad_norm": 0.03349825739860535, "learning_rate": 5.471024629761869e-06, "loss": 0.0008, "step": 61890 }, { "epoch": 1.046111726084348, "grad_norm": 0.02115381322801113, "learning_rate": 5.469556338921263e-06, "loss": 0.0005, "step": 61900 }, { "epoch": 1.0462807263631986, "grad_norm": 0.06258860230445862, "learning_rate": 5.468088007226979e-06, "loss": 0.0005, "step": 61910 }, { "epoch": 1.046449726642049, "grad_norm": 0.03726901859045029, "learning_rate": 5.466619634806771e-06, "loss": 0.001, "step": 61920 }, { "epoch": 1.0466187269208995, "grad_norm": 0.01326848566532135, "learning_rate": 5.465151221788395e-06, "loss": 0.0014, "step": 61930 }, { "epoch": 1.0467877271997499, "grad_norm": 0.04024356231093407, "learning_rate": 5.463682768299608e-06, "loss": 0.001, "step": 61940 }, { "epoch": 1.0469567274786002, "grad_norm": 0.10139014571905136, "learning_rate": 5.462214274468173e-06, "loss": 0.0016, "step": 61950 }, { "epoch": 1.0471257277574508, "grad_norm": 0.18097710609436035, "learning_rate": 5.4607457404218575e-06, "loss": 0.0031, "step": 61960 }, { "epoch": 1.0472947280363012, "grad_norm": 0.05461236461997032, "learning_rate": 5.45927716628843e-06, "loss": 0.0008, "step": 61970 }, { "epoch": 1.0474637283151518, "grad_norm": 0.03354823216795921, "learning_rate": 5.457808552195664e-06, "loss": 0.0006, "step": 61980 }, { "epoch": 1.0476327285940021, "grad_norm": 0.07403817772865295, "learning_rate": 5.456339898271335e-06, "loss": 0.0019, "step": 61990 }, { "epoch": 1.0478017288728527, "grad_norm": 0.044637531042099, "learning_rate": 5.454871204643226e-06, "loss": 0.001, "step": 62000 }, { "epoch": 1.047970729151703, "grad_norm": 0.029047558084130287, "learning_rate": 5.453402471439117e-06, "loss": 0.0014, "step": 62010 }, { "epoch": 1.0481397294305537, "grad_norm": 0.14039036631584167, "learning_rate": 5.451933698786796e-06, "loss": 0.0011, "step": 62020 }, { "epoch": 1.048308729709404, "grad_norm": 0.0889308899641037, "learning_rate": 5.450464886814053e-06, "loss": 0.0011, "step": 62030 }, { "epoch": 1.0484777299882544, "grad_norm": 0.061940066516399384, "learning_rate": 5.448996035648682e-06, "loss": 0.0009, "step": 62040 }, { "epoch": 1.048646730267105, "grad_norm": 0.053737200796604156, "learning_rate": 5.447527145418482e-06, "loss": 0.0016, "step": 62050 }, { "epoch": 1.0488157305459553, "grad_norm": 0.04960788041353226, "learning_rate": 5.446058216251251e-06, "loss": 0.0009, "step": 62060 }, { "epoch": 1.048984730824806, "grad_norm": 0.030090223997831345, "learning_rate": 5.444589248274794e-06, "loss": 0.0011, "step": 62070 }, { "epoch": 1.0491537311036563, "grad_norm": 0.01693059131503105, "learning_rate": 5.443120241616919e-06, "loss": 0.0011, "step": 62080 }, { "epoch": 1.0493227313825069, "grad_norm": 0.24588625133037567, "learning_rate": 5.441651196405436e-06, "loss": 0.0019, "step": 62090 }, { "epoch": 1.0494917316613572, "grad_norm": 0.07555793970823288, "learning_rate": 5.4401821127681584e-06, "loss": 0.001, "step": 62100 }, { "epoch": 1.0496607319402078, "grad_norm": 0.03147921338677406, "learning_rate": 5.438712990832905e-06, "loss": 0.0017, "step": 62110 }, { "epoch": 1.0498297322190582, "grad_norm": 0.03343828022480011, "learning_rate": 5.437243830727496e-06, "loss": 0.0014, "step": 62120 }, { "epoch": 1.0499987324979085, "grad_norm": 0.08114682883024216, "learning_rate": 5.435774632579753e-06, "loss": 0.0015, "step": 62130 }, { "epoch": 1.0501677327767591, "grad_norm": 0.08246869593858719, "learning_rate": 5.434305396517508e-06, "loss": 0.0012, "step": 62140 }, { "epoch": 1.0503367330556095, "grad_norm": 0.011487971059978008, "learning_rate": 5.432836122668588e-06, "loss": 0.0007, "step": 62150 }, { "epoch": 1.05050573333446, "grad_norm": 0.019197674468159676, "learning_rate": 5.431366811160829e-06, "loss": 0.0012, "step": 62160 }, { "epoch": 1.0506747336133104, "grad_norm": 0.14968179166316986, "learning_rate": 5.4298974621220665e-06, "loss": 0.0015, "step": 62170 }, { "epoch": 1.050843733892161, "grad_norm": 0.04203634336590767, "learning_rate": 5.428428075680142e-06, "loss": 0.0011, "step": 62180 }, { "epoch": 1.0510127341710114, "grad_norm": 0.041146960109472275, "learning_rate": 5.4269586519629e-06, "loss": 0.001, "step": 62190 }, { "epoch": 1.0511817344498617, "grad_norm": 0.039256054908037186, "learning_rate": 5.425489191098187e-06, "loss": 0.0014, "step": 62200 }, { "epoch": 1.0513507347287123, "grad_norm": 0.0404156930744648, "learning_rate": 5.4240196932138515e-06, "loss": 0.0016, "step": 62210 }, { "epoch": 1.0515197350075627, "grad_norm": 0.03777492046356201, "learning_rate": 5.422550158437749e-06, "loss": 0.0008, "step": 62220 }, { "epoch": 1.0516887352864133, "grad_norm": 0.015118823386728764, "learning_rate": 5.421080586897736e-06, "loss": 0.0005, "step": 62230 }, { "epoch": 1.0518577355652636, "grad_norm": 0.10300882160663605, "learning_rate": 5.419610978721669e-06, "loss": 0.0006, "step": 62240 }, { "epoch": 1.0520267358441142, "grad_norm": 0.08029638230800629, "learning_rate": 5.418141334037415e-06, "loss": 0.0004, "step": 62250 }, { "epoch": 1.0521957361229646, "grad_norm": 0.165598064661026, "learning_rate": 5.416671652972841e-06, "loss": 0.001, "step": 62260 }, { "epoch": 1.0523647364018152, "grad_norm": 0.0262772124260664, "learning_rate": 5.415201935655813e-06, "loss": 0.0019, "step": 62270 }, { "epoch": 1.0525337366806655, "grad_norm": 0.05175478383898735, "learning_rate": 5.413732182214205e-06, "loss": 0.0006, "step": 62280 }, { "epoch": 1.0527027369595159, "grad_norm": 0.2423478364944458, "learning_rate": 5.412262392775893e-06, "loss": 0.0011, "step": 62290 }, { "epoch": 1.0528717372383665, "grad_norm": 0.04322667792439461, "learning_rate": 5.410792567468755e-06, "loss": 0.0015, "step": 62300 }, { "epoch": 1.0530407375172168, "grad_norm": 0.11982213705778122, "learning_rate": 5.409322706420673e-06, "loss": 0.0009, "step": 62310 }, { "epoch": 1.0532097377960674, "grad_norm": 0.054979804903268814, "learning_rate": 5.4078528097595325e-06, "loss": 0.0014, "step": 62320 }, { "epoch": 1.0533787380749178, "grad_norm": 0.0444863960146904, "learning_rate": 5.406382877613221e-06, "loss": 0.0011, "step": 62330 }, { "epoch": 1.0535477383537684, "grad_norm": 0.07523641735315323, "learning_rate": 5.404912910109631e-06, "loss": 0.0018, "step": 62340 }, { "epoch": 1.0537167386326187, "grad_norm": 0.08459766954183578, "learning_rate": 5.403442907376656e-06, "loss": 0.0013, "step": 62350 }, { "epoch": 1.0538857389114693, "grad_norm": 0.11866951733827591, "learning_rate": 5.401972869542193e-06, "loss": 0.0013, "step": 62360 }, { "epoch": 1.0540547391903197, "grad_norm": 0.028530221432447433, "learning_rate": 5.400502796734143e-06, "loss": 0.0021, "step": 62370 }, { "epoch": 1.05422373946917, "grad_norm": 0.0645567923784256, "learning_rate": 5.399032689080409e-06, "loss": 0.0012, "step": 62380 }, { "epoch": 1.0543927397480206, "grad_norm": 0.05980142951011658, "learning_rate": 5.397562546708898e-06, "loss": 0.0014, "step": 62390 }, { "epoch": 1.054561740026871, "grad_norm": 0.05293196439743042, "learning_rate": 5.396092369747517e-06, "loss": 0.0019, "step": 62400 }, { "epoch": 1.0547307403057216, "grad_norm": 0.07668273895978928, "learning_rate": 5.394622158324183e-06, "loss": 0.0014, "step": 62410 }, { "epoch": 1.054899740584572, "grad_norm": 0.04001685231924057, "learning_rate": 5.393151912566809e-06, "loss": 0.0028, "step": 62420 }, { "epoch": 1.0550687408634225, "grad_norm": 0.0355171374976635, "learning_rate": 5.391681632603313e-06, "loss": 0.0009, "step": 62430 }, { "epoch": 1.0552377411422729, "grad_norm": 0.023094194009900093, "learning_rate": 5.390211318561618e-06, "loss": 0.0013, "step": 62440 }, { "epoch": 1.0554067414211235, "grad_norm": 0.021117648109793663, "learning_rate": 5.388740970569647e-06, "loss": 0.0013, "step": 62450 }, { "epoch": 1.0555757416999738, "grad_norm": 0.0840069055557251, "learning_rate": 5.387270588755329e-06, "loss": 0.0013, "step": 62460 }, { "epoch": 1.0557447419788242, "grad_norm": 0.056771233677864075, "learning_rate": 5.385800173246592e-06, "loss": 0.0013, "step": 62470 }, { "epoch": 1.0559137422576748, "grad_norm": 0.06988224387168884, "learning_rate": 5.3843297241713735e-06, "loss": 0.0013, "step": 62480 }, { "epoch": 1.0560827425365251, "grad_norm": 0.07018313556909561, "learning_rate": 5.382859241657605e-06, "loss": 0.0009, "step": 62490 }, { "epoch": 1.0562517428153757, "grad_norm": 0.00574153708294034, "learning_rate": 5.381388725833227e-06, "loss": 0.0016, "step": 62500 }, { "epoch": 1.056420743094226, "grad_norm": 0.06866547465324402, "learning_rate": 5.379918176826182e-06, "loss": 0.0014, "step": 62510 }, { "epoch": 1.0565897433730767, "grad_norm": 0.039436422288417816, "learning_rate": 5.3784475947644156e-06, "loss": 0.0012, "step": 62520 }, { "epoch": 1.056758743651927, "grad_norm": 0.022123584523797035, "learning_rate": 5.3769769797758745e-06, "loss": 0.0013, "step": 62530 }, { "epoch": 1.0569277439307774, "grad_norm": 0.047539424151182175, "learning_rate": 5.375506331988509e-06, "loss": 0.0017, "step": 62540 }, { "epoch": 1.057096744209628, "grad_norm": 0.11156386882066727, "learning_rate": 5.3740356515302735e-06, "loss": 0.002, "step": 62550 }, { "epoch": 1.0572657444884783, "grad_norm": 0.06007852777838707, "learning_rate": 5.372564938529126e-06, "loss": 0.002, "step": 62560 }, { "epoch": 1.057434744767329, "grad_norm": 0.02162274345755577, "learning_rate": 5.371094193113022e-06, "loss": 0.0015, "step": 62570 }, { "epoch": 1.0576037450461793, "grad_norm": 0.099835604429245, "learning_rate": 5.369623415409926e-06, "loss": 0.0025, "step": 62580 }, { "epoch": 1.0577727453250299, "grad_norm": 0.016269249841570854, "learning_rate": 5.3681526055478e-06, "loss": 0.0012, "step": 62590 }, { "epoch": 1.0579417456038802, "grad_norm": 0.041425686329603195, "learning_rate": 5.3666817636546165e-06, "loss": 0.0006, "step": 62600 }, { "epoch": 1.0581107458827308, "grad_norm": 0.11686749756336212, "learning_rate": 5.36521088985834e-06, "loss": 0.0017, "step": 62610 }, { "epoch": 1.0582797461615812, "grad_norm": 0.02760067768394947, "learning_rate": 5.363739984286949e-06, "loss": 0.0007, "step": 62620 }, { "epoch": 1.0584487464404315, "grad_norm": 0.060067202895879745, "learning_rate": 5.362269047068416e-06, "loss": 0.0014, "step": 62630 }, { "epoch": 1.058617746719282, "grad_norm": 0.06818683445453644, "learning_rate": 5.36079807833072e-06, "loss": 0.0009, "step": 62640 }, { "epoch": 1.0587867469981325, "grad_norm": 0.028272630646824837, "learning_rate": 5.3593270782018436e-06, "loss": 0.0011, "step": 62650 }, { "epoch": 1.058955747276983, "grad_norm": 0.38542062044143677, "learning_rate": 5.35785604680977e-06, "loss": 0.0033, "step": 62660 }, { "epoch": 1.0591247475558334, "grad_norm": 0.0479482002556324, "learning_rate": 5.356384984282488e-06, "loss": 0.0007, "step": 62670 }, { "epoch": 1.059293747834684, "grad_norm": 0.031087413430213928, "learning_rate": 5.354913890747985e-06, "loss": 0.0009, "step": 62680 }, { "epoch": 1.0594627481135344, "grad_norm": 0.26890724897384644, "learning_rate": 5.353442766334253e-06, "loss": 0.0019, "step": 62690 }, { "epoch": 1.059631748392385, "grad_norm": 0.05777981877326965, "learning_rate": 5.351971611169289e-06, "loss": 0.0011, "step": 62700 }, { "epoch": 1.0598007486712353, "grad_norm": 0.06634936481714249, "learning_rate": 5.350500425381089e-06, "loss": 0.0012, "step": 62710 }, { "epoch": 1.0599697489500857, "grad_norm": 0.07184673845767975, "learning_rate": 5.349029209097654e-06, "loss": 0.0013, "step": 62720 }, { "epoch": 1.0601387492289363, "grad_norm": 0.0021499255672097206, "learning_rate": 5.347557962446985e-06, "loss": 0.0007, "step": 62730 }, { "epoch": 1.0603077495077866, "grad_norm": 0.14570766687393188, "learning_rate": 5.346086685557091e-06, "loss": 0.0016, "step": 62740 }, { "epoch": 1.0604767497866372, "grad_norm": 0.13053706288337708, "learning_rate": 5.34461537855598e-06, "loss": 0.0013, "step": 62750 }, { "epoch": 1.0606457500654876, "grad_norm": 0.033001068979501724, "learning_rate": 5.34314404157166e-06, "loss": 0.0019, "step": 62760 }, { "epoch": 1.0608147503443381, "grad_norm": 0.09939400106668472, "learning_rate": 5.341672674732145e-06, "loss": 0.0009, "step": 62770 }, { "epoch": 1.0609837506231885, "grad_norm": 0.025106241926550865, "learning_rate": 5.3402012781654525e-06, "loss": 0.0007, "step": 62780 }, { "epoch": 1.061152750902039, "grad_norm": 0.030804786831140518, "learning_rate": 5.3387298519996e-06, "loss": 0.0018, "step": 62790 }, { "epoch": 1.0613217511808894, "grad_norm": 0.06594132632017136, "learning_rate": 5.33725839636261e-06, "loss": 0.0013, "step": 62800 }, { "epoch": 1.0614907514597398, "grad_norm": 0.054694630205631256, "learning_rate": 5.335786911382504e-06, "loss": 0.001, "step": 62810 }, { "epoch": 1.0616597517385904, "grad_norm": 0.03607044741511345, "learning_rate": 5.334315397187311e-06, "loss": 0.0016, "step": 62820 }, { "epoch": 1.0618287520174408, "grad_norm": 0.1117258295416832, "learning_rate": 5.332843853905059e-06, "loss": 0.0014, "step": 62830 }, { "epoch": 1.0619977522962913, "grad_norm": 0.03165534511208534, "learning_rate": 5.331372281663778e-06, "loss": 0.0017, "step": 62840 }, { "epoch": 1.0621667525751417, "grad_norm": 0.04908168315887451, "learning_rate": 5.329900680591505e-06, "loss": 0.0016, "step": 62850 }, { "epoch": 1.0623357528539923, "grad_norm": 0.07058288902044296, "learning_rate": 5.328429050816272e-06, "loss": 0.0011, "step": 62860 }, { "epoch": 1.0625047531328426, "grad_norm": 0.04213688522577286, "learning_rate": 5.326957392466121e-06, "loss": 0.0023, "step": 62870 }, { "epoch": 1.0626737534116932, "grad_norm": 0.06736316531896591, "learning_rate": 5.3254857056690936e-06, "loss": 0.001, "step": 62880 }, { "epoch": 1.0628427536905436, "grad_norm": 0.12017694860696793, "learning_rate": 5.3240139905532314e-06, "loss": 0.0015, "step": 62890 }, { "epoch": 1.063011753969394, "grad_norm": 0.06744863092899323, "learning_rate": 5.322542247246583e-06, "loss": 0.0013, "step": 62900 }, { "epoch": 1.0631807542482445, "grad_norm": 0.045404065400362015, "learning_rate": 5.321070475877196e-06, "loss": 0.0018, "step": 62910 }, { "epoch": 1.063349754527095, "grad_norm": 0.06868870556354523, "learning_rate": 5.319598676573121e-06, "loss": 0.0014, "step": 62920 }, { "epoch": 1.0635187548059455, "grad_norm": 0.04988713935017586, "learning_rate": 5.318126849462414e-06, "loss": 0.0009, "step": 62930 }, { "epoch": 1.0636877550847958, "grad_norm": 0.05224590376019478, "learning_rate": 5.31665499467313e-06, "loss": 0.0023, "step": 62940 }, { "epoch": 1.0638567553636464, "grad_norm": 0.05000672861933708, "learning_rate": 5.315183112333326e-06, "loss": 0.0014, "step": 62950 }, { "epoch": 1.0640257556424968, "grad_norm": 0.011630984954535961, "learning_rate": 5.313711202571065e-06, "loss": 0.001, "step": 62960 }, { "epoch": 1.0641947559213474, "grad_norm": 0.07782811671495438, "learning_rate": 5.312239265514409e-06, "loss": 0.0012, "step": 62970 }, { "epoch": 1.0643637562001977, "grad_norm": 0.16299135982990265, "learning_rate": 5.310767301291425e-06, "loss": 0.0027, "step": 62980 }, { "epoch": 1.064532756479048, "grad_norm": 0.05368863046169281, "learning_rate": 5.309295310030179e-06, "loss": 0.0017, "step": 62990 }, { "epoch": 1.0647017567578987, "grad_norm": 0.06027548760175705, "learning_rate": 5.307823291858743e-06, "loss": 0.0012, "step": 63000 }, { "epoch": 1.064870757036749, "grad_norm": 0.022448888048529625, "learning_rate": 5.306351246905188e-06, "loss": 0.0012, "step": 63010 }, { "epoch": 1.0650397573155996, "grad_norm": 0.06651446968317032, "learning_rate": 5.304879175297592e-06, "loss": 0.002, "step": 63020 }, { "epoch": 1.06520875759445, "grad_norm": 0.07847457379102707, "learning_rate": 5.30340707716403e-06, "loss": 0.0009, "step": 63030 }, { "epoch": 1.0653777578733006, "grad_norm": 0.02626158483326435, "learning_rate": 5.301934952632583e-06, "loss": 0.0014, "step": 63040 }, { "epoch": 1.065546758152151, "grad_norm": 0.0006106089567765594, "learning_rate": 5.300462801831331e-06, "loss": 0.0011, "step": 63050 }, { "epoch": 1.0657157584310015, "grad_norm": 0.022541480138897896, "learning_rate": 5.298990624888359e-06, "loss": 0.0013, "step": 63060 }, { "epoch": 1.0658847587098519, "grad_norm": 0.012469821609556675, "learning_rate": 5.297518421931755e-06, "loss": 0.0007, "step": 63070 }, { "epoch": 1.0660537589887022, "grad_norm": 0.025536231696605682, "learning_rate": 5.296046193089607e-06, "loss": 0.0007, "step": 63080 }, { "epoch": 1.0662227592675528, "grad_norm": 0.014045816846191883, "learning_rate": 5.2945739384900045e-06, "loss": 0.0006, "step": 63090 }, { "epoch": 1.0663917595464032, "grad_norm": 0.024714739993214607, "learning_rate": 5.293101658261043e-06, "loss": 0.0009, "step": 63100 }, { "epoch": 1.0665607598252538, "grad_norm": 0.05849656090140343, "learning_rate": 5.291629352530817e-06, "loss": 0.0009, "step": 63110 }, { "epoch": 1.0667297601041041, "grad_norm": 0.03162253275513649, "learning_rate": 5.290157021427424e-06, "loss": 0.0008, "step": 63120 }, { "epoch": 1.0668987603829547, "grad_norm": 0.046496231108903885, "learning_rate": 5.288684665078966e-06, "loss": 0.0011, "step": 63130 }, { "epoch": 1.067067760661805, "grad_norm": 0.09088483452796936, "learning_rate": 5.287212283613542e-06, "loss": 0.0008, "step": 63140 }, { "epoch": 1.0672367609406554, "grad_norm": 0.028946420177817345, "learning_rate": 5.285739877159258e-06, "loss": 0.0011, "step": 63150 }, { "epoch": 1.067405761219506, "grad_norm": 0.07098438590765, "learning_rate": 5.28426744584422e-06, "loss": 0.0016, "step": 63160 }, { "epoch": 1.0675747614983564, "grad_norm": 0.007547794375568628, "learning_rate": 5.282794989796536e-06, "loss": 0.0006, "step": 63170 }, { "epoch": 1.067743761777207, "grad_norm": 0.013502542860805988, "learning_rate": 5.281322509144319e-06, "loss": 0.0008, "step": 63180 }, { "epoch": 1.0679127620560573, "grad_norm": 0.02497120201587677, "learning_rate": 5.279850004015681e-06, "loss": 0.0009, "step": 63190 }, { "epoch": 1.068081762334908, "grad_norm": 0.14901821315288544, "learning_rate": 5.278377474538735e-06, "loss": 0.0035, "step": 63200 }, { "epoch": 1.0682507626137583, "grad_norm": 0.02553505264222622, "learning_rate": 5.276904920841601e-06, "loss": 0.001, "step": 63210 }, { "epoch": 1.0684197628926089, "grad_norm": 0.014704728499054909, "learning_rate": 5.275432343052398e-06, "loss": 0.0006, "step": 63220 }, { "epoch": 1.0685887631714592, "grad_norm": 0.026760470122098923, "learning_rate": 5.273959741299246e-06, "loss": 0.0011, "step": 63230 }, { "epoch": 1.0687577634503096, "grad_norm": 0.14907385408878326, "learning_rate": 5.27248711571027e-06, "loss": 0.0008, "step": 63240 }, { "epoch": 1.0689267637291602, "grad_norm": 0.03087923489511013, "learning_rate": 5.271014466413593e-06, "loss": 0.002, "step": 63250 }, { "epoch": 1.0690957640080105, "grad_norm": 0.019032321870326996, "learning_rate": 5.269541793537346e-06, "loss": 0.0005, "step": 63260 }, { "epoch": 1.0692647642868611, "grad_norm": 0.017722396180033684, "learning_rate": 5.268069097209656e-06, "loss": 0.0014, "step": 63270 }, { "epoch": 1.0694337645657115, "grad_norm": 0.06067734956741333, "learning_rate": 5.266596377558656e-06, "loss": 0.001, "step": 63280 }, { "epoch": 1.069602764844562, "grad_norm": 0.03219889476895332, "learning_rate": 5.265123634712478e-06, "loss": 0.0011, "step": 63290 }, { "epoch": 1.0697717651234124, "grad_norm": 0.04848824813961983, "learning_rate": 5.263650868799261e-06, "loss": 0.0012, "step": 63300 }, { "epoch": 1.069940765402263, "grad_norm": 0.09955284744501114, "learning_rate": 5.262178079947141e-06, "loss": 0.0008, "step": 63310 }, { "epoch": 1.0701097656811134, "grad_norm": 0.1374005228281021, "learning_rate": 5.260705268284258e-06, "loss": 0.0013, "step": 63320 }, { "epoch": 1.0702787659599637, "grad_norm": 0.03295893967151642, "learning_rate": 5.259232433938752e-06, "loss": 0.0006, "step": 63330 }, { "epoch": 1.0704477662388143, "grad_norm": 0.012826895341277122, "learning_rate": 5.2577595770387705e-06, "loss": 0.001, "step": 63340 }, { "epoch": 1.0706167665176647, "grad_norm": 0.01251635979861021, "learning_rate": 5.256286697712455e-06, "loss": 0.0014, "step": 63350 }, { "epoch": 1.0707857667965153, "grad_norm": 0.07876032590866089, "learning_rate": 5.254813796087956e-06, "loss": 0.0027, "step": 63360 }, { "epoch": 1.0709547670753656, "grad_norm": 0.04843059927225113, "learning_rate": 5.253340872293421e-06, "loss": 0.0034, "step": 63370 }, { "epoch": 1.0711237673542162, "grad_norm": 0.0048154802061617374, "learning_rate": 5.251867926457003e-06, "loss": 0.0009, "step": 63380 }, { "epoch": 1.0712927676330666, "grad_norm": 0.02693239226937294, "learning_rate": 5.250394958706856e-06, "loss": 0.0008, "step": 63390 }, { "epoch": 1.071461767911917, "grad_norm": 0.17818357050418854, "learning_rate": 5.248921969171134e-06, "loss": 0.0038, "step": 63400 }, { "epoch": 1.0716307681907675, "grad_norm": 0.06279647350311279, "learning_rate": 5.247448957977994e-06, "loss": 0.0013, "step": 63410 }, { "epoch": 1.0717997684696179, "grad_norm": 0.00845323409885168, "learning_rate": 5.245975925255596e-06, "loss": 0.0005, "step": 63420 }, { "epoch": 1.0719687687484685, "grad_norm": 0.11213696748018265, "learning_rate": 5.2445028711321e-06, "loss": 0.0014, "step": 63430 }, { "epoch": 1.0721377690273188, "grad_norm": 0.005753799341619015, "learning_rate": 5.243029795735671e-06, "loss": 0.0004, "step": 63440 }, { "epoch": 1.0723067693061694, "grad_norm": 0.03215279430150986, "learning_rate": 5.2415566991944725e-06, "loss": 0.001, "step": 63450 }, { "epoch": 1.0724757695850198, "grad_norm": 0.11824256181716919, "learning_rate": 5.24008358163667e-06, "loss": 0.0016, "step": 63460 }, { "epoch": 1.0726447698638704, "grad_norm": 0.023332377895712852, "learning_rate": 5.2386104431904325e-06, "loss": 0.0009, "step": 63470 }, { "epoch": 1.0728137701427207, "grad_norm": 0.026816241443157196, "learning_rate": 5.2371372839839305e-06, "loss": 0.0016, "step": 63480 }, { "epoch": 1.072982770421571, "grad_norm": 0.07007268816232681, "learning_rate": 5.235664104145335e-06, "loss": 0.0008, "step": 63490 }, { "epoch": 1.0731517707004217, "grad_norm": 0.09333419799804688, "learning_rate": 5.2341909038028216e-06, "loss": 0.0019, "step": 63500 }, { "epoch": 1.073320770979272, "grad_norm": 0.028545813634991646, "learning_rate": 5.232717683084565e-06, "loss": 0.0011, "step": 63510 }, { "epoch": 1.0734897712581226, "grad_norm": 0.030302291736006737, "learning_rate": 5.231244442118742e-06, "loss": 0.001, "step": 63520 }, { "epoch": 1.073658771536973, "grad_norm": 0.04001889377832413, "learning_rate": 5.229771181033534e-06, "loss": 0.0015, "step": 63530 }, { "epoch": 1.0738277718158236, "grad_norm": 0.02755117230117321, "learning_rate": 5.228297899957117e-06, "loss": 0.001, "step": 63540 }, { "epoch": 1.073996772094674, "grad_norm": 0.009755623526871204, "learning_rate": 5.226824599017679e-06, "loss": 0.0008, "step": 63550 }, { "epoch": 1.0741657723735245, "grad_norm": 0.01657235063612461, "learning_rate": 5.225351278343401e-06, "loss": 0.0012, "step": 63560 }, { "epoch": 1.0743347726523749, "grad_norm": 0.027861325070261955, "learning_rate": 5.223877938062471e-06, "loss": 0.001, "step": 63570 }, { "epoch": 1.0745037729312252, "grad_norm": 0.021976448595523834, "learning_rate": 5.222404578303072e-06, "loss": 0.0012, "step": 63580 }, { "epoch": 1.0746727732100758, "grad_norm": 0.058850470930337906, "learning_rate": 5.2209311991934e-06, "loss": 0.0014, "step": 63590 }, { "epoch": 1.0748417734889262, "grad_norm": 0.013691349886357784, "learning_rate": 5.2194578008616426e-06, "loss": 0.001, "step": 63600 }, { "epoch": 1.0750107737677768, "grad_norm": 0.03913870081305504, "learning_rate": 5.217984383435995e-06, "loss": 0.0013, "step": 63610 }, { "epoch": 1.0751797740466271, "grad_norm": 0.019784534350037575, "learning_rate": 5.216510947044647e-06, "loss": 0.0009, "step": 63620 }, { "epoch": 1.0753487743254777, "grad_norm": 0.027006670832633972, "learning_rate": 5.2150374918158e-06, "loss": 0.001, "step": 63630 }, { "epoch": 1.075517774604328, "grad_norm": 0.0347067154943943, "learning_rate": 5.213564017877648e-06, "loss": 0.0017, "step": 63640 }, { "epoch": 1.0756867748831787, "grad_norm": 0.08511506021022797, "learning_rate": 5.212090525358392e-06, "loss": 0.001, "step": 63650 }, { "epoch": 1.075855775162029, "grad_norm": 0.050413474440574646, "learning_rate": 5.2106170143862324e-06, "loss": 0.001, "step": 63660 }, { "epoch": 1.0760247754408794, "grad_norm": 0.020677484571933746, "learning_rate": 5.209143485089372e-06, "loss": 0.0013, "step": 63670 }, { "epoch": 1.07619377571973, "grad_norm": 0.03208847716450691, "learning_rate": 5.207669937596014e-06, "loss": 0.0011, "step": 63680 }, { "epoch": 1.0763627759985803, "grad_norm": 0.08395759016275406, "learning_rate": 5.206196372034367e-06, "loss": 0.0012, "step": 63690 }, { "epoch": 1.076531776277431, "grad_norm": 0.028258448466658592, "learning_rate": 5.204722788532637e-06, "loss": 0.0013, "step": 63700 }, { "epoch": 1.0767007765562813, "grad_norm": 0.16747726500034332, "learning_rate": 5.203249187219032e-06, "loss": 0.0014, "step": 63710 }, { "epoch": 1.0768697768351319, "grad_norm": 0.016199585050344467, "learning_rate": 5.201775568221762e-06, "loss": 0.0015, "step": 63720 }, { "epoch": 1.0770387771139822, "grad_norm": 0.1536611169576645, "learning_rate": 5.20030193166904e-06, "loss": 0.0014, "step": 63730 }, { "epoch": 1.0772077773928328, "grad_norm": 0.10717665404081345, "learning_rate": 5.198828277689081e-06, "loss": 0.0015, "step": 63740 }, { "epoch": 1.0773767776716832, "grad_norm": 0.018150193616747856, "learning_rate": 5.197354606410098e-06, "loss": 0.0007, "step": 63750 }, { "epoch": 1.0775457779505335, "grad_norm": 0.017907075583934784, "learning_rate": 5.195880917960307e-06, "loss": 0.0011, "step": 63760 }, { "epoch": 1.077714778229384, "grad_norm": 0.06702237576246262, "learning_rate": 5.1944072124679265e-06, "loss": 0.0011, "step": 63770 }, { "epoch": 1.0778837785082345, "grad_norm": 0.006694977171719074, "learning_rate": 5.192933490061179e-06, "loss": 0.0014, "step": 63780 }, { "epoch": 1.078052778787085, "grad_norm": 0.033626001328229904, "learning_rate": 5.191459750868282e-06, "loss": 0.001, "step": 63790 }, { "epoch": 1.0782217790659354, "grad_norm": 0.059294719249010086, "learning_rate": 5.189985995017458e-06, "loss": 0.0019, "step": 63800 }, { "epoch": 1.078390779344786, "grad_norm": 0.05930497124791145, "learning_rate": 5.188512222636933e-06, "loss": 0.001, "step": 63810 }, { "epoch": 1.0785597796236364, "grad_norm": 0.0449826642870903, "learning_rate": 5.187038433854932e-06, "loss": 0.0009, "step": 63820 }, { "epoch": 1.078728779902487, "grad_norm": 0.05867033451795578, "learning_rate": 5.1855646287996795e-06, "loss": 0.0011, "step": 63830 }, { "epoch": 1.0788977801813373, "grad_norm": 0.030678899958729744, "learning_rate": 5.1840908075994065e-06, "loss": 0.0012, "step": 63840 }, { "epoch": 1.0790667804601877, "grad_norm": 0.0650521069765091, "learning_rate": 5.18261697038234e-06, "loss": 0.0015, "step": 63850 }, { "epoch": 1.0792357807390383, "grad_norm": 0.07857781648635864, "learning_rate": 5.181143117276712e-06, "loss": 0.002, "step": 63860 }, { "epoch": 1.0794047810178886, "grad_norm": 0.07133198529481888, "learning_rate": 5.179669248410757e-06, "loss": 0.0014, "step": 63870 }, { "epoch": 1.0795737812967392, "grad_norm": 0.08174729347229004, "learning_rate": 5.178195363912706e-06, "loss": 0.0006, "step": 63880 }, { "epoch": 1.0797427815755896, "grad_norm": 0.015073689632117748, "learning_rate": 5.176721463910795e-06, "loss": 0.0015, "step": 63890 }, { "epoch": 1.0799117818544401, "grad_norm": 0.02266828902065754, "learning_rate": 5.17524754853326e-06, "loss": 0.0003, "step": 63900 }, { "epoch": 1.0800807821332905, "grad_norm": 0.02230769768357277, "learning_rate": 5.17377361790834e-06, "loss": 0.0008, "step": 63910 }, { "epoch": 1.080249782412141, "grad_norm": 0.05522468313574791, "learning_rate": 5.172299672164273e-06, "loss": 0.001, "step": 63920 }, { "epoch": 1.0804187826909915, "grad_norm": 0.023726046085357666, "learning_rate": 5.170825711429298e-06, "loss": 0.0015, "step": 63930 }, { "epoch": 1.0805877829698418, "grad_norm": 0.03070794604718685, "learning_rate": 5.16935173583166e-06, "loss": 0.0003, "step": 63940 }, { "epoch": 1.0807567832486924, "grad_norm": 0.002701481804251671, "learning_rate": 5.167877745499601e-06, "loss": 0.0009, "step": 63950 }, { "epoch": 1.0809257835275428, "grad_norm": 0.02209090068936348, "learning_rate": 5.166403740561363e-06, "loss": 0.0007, "step": 63960 }, { "epoch": 1.0810947838063933, "grad_norm": 0.06571850180625916, "learning_rate": 5.164929721145194e-06, "loss": 0.0017, "step": 63970 }, { "epoch": 1.0812637840852437, "grad_norm": 0.09760637581348419, "learning_rate": 5.163455687379341e-06, "loss": 0.0015, "step": 63980 }, { "epoch": 1.0814327843640943, "grad_norm": 0.04042598232626915, "learning_rate": 5.161981639392051e-06, "loss": 0.0009, "step": 63990 }, { "epoch": 1.0816017846429447, "grad_norm": 0.016159027814865112, "learning_rate": 5.160507577311573e-06, "loss": 0.0012, "step": 64000 }, { "epoch": 1.0817707849217952, "grad_norm": 0.012882671318948269, "learning_rate": 5.1590335012661584e-06, "loss": 0.0013, "step": 64010 }, { "epoch": 1.0819397852006456, "grad_norm": 0.025966521352529526, "learning_rate": 5.15755941138406e-06, "loss": 0.0005, "step": 64020 }, { "epoch": 1.082108785479496, "grad_norm": 0.0443761870265007, "learning_rate": 5.156085307793528e-06, "loss": 0.0013, "step": 64030 }, { "epoch": 1.0822777857583465, "grad_norm": 0.05958549305796623, "learning_rate": 5.154611190622818e-06, "loss": 0.0018, "step": 64040 }, { "epoch": 1.082446786037197, "grad_norm": 0.1546487808227539, "learning_rate": 5.1531370600001855e-06, "loss": 0.0012, "step": 64050 }, { "epoch": 1.0826157863160475, "grad_norm": 0.04316802695393562, "learning_rate": 5.151662916053886e-06, "loss": 0.0006, "step": 64060 }, { "epoch": 1.0827847865948979, "grad_norm": 0.07794240862131119, "learning_rate": 5.1501887589121794e-06, "loss": 0.0008, "step": 64070 }, { "epoch": 1.0829537868737484, "grad_norm": 0.1040305644273758, "learning_rate": 5.1487145887033214e-06, "loss": 0.0021, "step": 64080 }, { "epoch": 1.0831227871525988, "grad_norm": 0.07008879631757736, "learning_rate": 5.147240405555574e-06, "loss": 0.0009, "step": 64090 }, { "epoch": 1.0832917874314492, "grad_norm": 0.019183754920959473, "learning_rate": 5.145766209597199e-06, "loss": 0.0008, "step": 64100 }, { "epoch": 1.0834607877102997, "grad_norm": 0.00969164352864027, "learning_rate": 5.1442920009564576e-06, "loss": 0.0009, "step": 64110 }, { "epoch": 1.08362978798915, "grad_norm": 0.023686842992901802, "learning_rate": 5.142817779761613e-06, "loss": 0.0011, "step": 64120 }, { "epoch": 1.0837987882680007, "grad_norm": 0.12426068633794785, "learning_rate": 5.141343546140929e-06, "loss": 0.0018, "step": 64130 }, { "epoch": 1.083967788546851, "grad_norm": 0.02402712032198906, "learning_rate": 5.139869300222672e-06, "loss": 0.0007, "step": 64140 }, { "epoch": 1.0841367888257016, "grad_norm": 0.04177405312657356, "learning_rate": 5.138395042135107e-06, "loss": 0.0006, "step": 64150 }, { "epoch": 1.084305789104552, "grad_norm": 0.021412570029497147, "learning_rate": 5.136920772006504e-06, "loss": 0.0009, "step": 64160 }, { "epoch": 1.0844747893834026, "grad_norm": 0.061743564903736115, "learning_rate": 5.13544648996513e-06, "loss": 0.0005, "step": 64170 }, { "epoch": 1.084643789662253, "grad_norm": 0.050587743520736694, "learning_rate": 5.133972196139256e-06, "loss": 0.0023, "step": 64180 }, { "epoch": 1.0848127899411033, "grad_norm": 0.0479382649064064, "learning_rate": 5.1324978906571495e-06, "loss": 0.0015, "step": 64190 }, { "epoch": 1.084981790219954, "grad_norm": 0.06690307706594467, "learning_rate": 5.131023573647087e-06, "loss": 0.0012, "step": 64200 }, { "epoch": 1.0851507904988043, "grad_norm": 0.0020674539264291525, "learning_rate": 5.129549245237337e-06, "loss": 0.0014, "step": 64210 }, { "epoch": 1.0853197907776548, "grad_norm": 0.031590402126312256, "learning_rate": 5.128074905556177e-06, "loss": 0.0008, "step": 64220 }, { "epoch": 1.0854887910565052, "grad_norm": 0.18099823594093323, "learning_rate": 5.126600554731878e-06, "loss": 0.0014, "step": 64230 }, { "epoch": 1.0856577913353558, "grad_norm": 0.018791290000081062, "learning_rate": 5.125126192892719e-06, "loss": 0.0011, "step": 64240 }, { "epoch": 1.0858267916142061, "grad_norm": 0.0774259865283966, "learning_rate": 5.123651820166973e-06, "loss": 0.0011, "step": 64250 }, { "epoch": 1.0859957918930565, "grad_norm": 0.03627191111445427, "learning_rate": 5.12217743668292e-06, "loss": 0.001, "step": 64260 }, { "epoch": 1.086164792171907, "grad_norm": 0.05594772845506668, "learning_rate": 5.120703042568838e-06, "loss": 0.0026, "step": 64270 }, { "epoch": 1.0863337924507575, "grad_norm": 0.06517887860536575, "learning_rate": 5.119228637953007e-06, "loss": 0.0008, "step": 64280 }, { "epoch": 1.086502792729608, "grad_norm": 0.012375900521874428, "learning_rate": 5.117754222963708e-06, "loss": 0.0021, "step": 64290 }, { "epoch": 1.0866717930084584, "grad_norm": 0.051953885704278946, "learning_rate": 5.11627979772922e-06, "loss": 0.0009, "step": 64300 }, { "epoch": 1.086840793287309, "grad_norm": 0.019279394298791885, "learning_rate": 5.114805362377826e-06, "loss": 0.0014, "step": 64310 }, { "epoch": 1.0870097935661593, "grad_norm": 0.04088306799530983, "learning_rate": 5.11333091703781e-06, "loss": 0.0036, "step": 64320 }, { "epoch": 1.08717879384501, "grad_norm": 0.10127560049295425, "learning_rate": 5.111856461837454e-06, "loss": 0.0009, "step": 64330 }, { "epoch": 1.0873477941238603, "grad_norm": 0.019220391288399696, "learning_rate": 5.110381996905044e-06, "loss": 0.0008, "step": 64340 }, { "epoch": 1.0875167944027107, "grad_norm": 0.03603460267186165, "learning_rate": 5.108907522368865e-06, "loss": 0.0015, "step": 64350 }, { "epoch": 1.0876857946815612, "grad_norm": 0.0060151806101202965, "learning_rate": 5.107433038357205e-06, "loss": 0.0008, "step": 64360 }, { "epoch": 1.0878547949604116, "grad_norm": 0.04247978329658508, "learning_rate": 5.10595854499835e-06, "loss": 0.0016, "step": 64370 }, { "epoch": 1.0880237952392622, "grad_norm": 0.002622088650241494, "learning_rate": 5.104484042420588e-06, "loss": 0.0004, "step": 64380 }, { "epoch": 1.0881927955181125, "grad_norm": 0.05791708454489708, "learning_rate": 5.103009530752209e-06, "loss": 0.0031, "step": 64390 }, { "epoch": 1.0883617957969631, "grad_norm": 0.0468166321516037, "learning_rate": 5.1015350101215e-06, "loss": 0.0017, "step": 64400 }, { "epoch": 1.0885307960758135, "grad_norm": 0.056557219475507736, "learning_rate": 5.100060480656754e-06, "loss": 0.0013, "step": 64410 }, { "epoch": 1.088699796354664, "grad_norm": 0.05838380381464958, "learning_rate": 5.098585942486262e-06, "loss": 0.0013, "step": 64420 }, { "epoch": 1.0888687966335144, "grad_norm": 0.04393266513943672, "learning_rate": 5.0971113957383135e-06, "loss": 0.003, "step": 64430 }, { "epoch": 1.0890377969123648, "grad_norm": 0.05707855895161629, "learning_rate": 5.095636840541204e-06, "loss": 0.0011, "step": 64440 }, { "epoch": 1.0892067971912154, "grad_norm": 0.13437679409980774, "learning_rate": 5.094162277023225e-06, "loss": 0.002, "step": 64450 }, { "epoch": 1.0893757974700657, "grad_norm": 0.12905579805374146, "learning_rate": 5.092687705312673e-06, "loss": 0.0016, "step": 64460 }, { "epoch": 1.0895447977489163, "grad_norm": 0.0845353752374649, "learning_rate": 5.091213125537842e-06, "loss": 0.0015, "step": 64470 }, { "epoch": 1.0897137980277667, "grad_norm": 0.017485635355114937, "learning_rate": 5.089738537827027e-06, "loss": 0.0013, "step": 64480 }, { "epoch": 1.0898827983066173, "grad_norm": 0.04234213009476662, "learning_rate": 5.088263942308523e-06, "loss": 0.0026, "step": 64490 }, { "epoch": 1.0900517985854676, "grad_norm": 0.02672339603304863, "learning_rate": 5.086789339110631e-06, "loss": 0.0013, "step": 64500 }, { "epoch": 1.0902207988643182, "grad_norm": 0.22146789729595184, "learning_rate": 5.085314728361644e-06, "loss": 0.0026, "step": 64510 }, { "epoch": 1.0903897991431686, "grad_norm": 0.008131207898259163, "learning_rate": 5.083840110189863e-06, "loss": 0.0007, "step": 64520 }, { "epoch": 1.090558799422019, "grad_norm": 0.25171422958374023, "learning_rate": 5.082365484723586e-06, "loss": 0.0013, "step": 64530 }, { "epoch": 1.0907277997008695, "grad_norm": 0.06383270770311356, "learning_rate": 5.080890852091111e-06, "loss": 0.001, "step": 64540 }, { "epoch": 1.0908967999797199, "grad_norm": 0.03389836847782135, "learning_rate": 5.079416212420742e-06, "loss": 0.0009, "step": 64550 }, { "epoch": 1.0910658002585705, "grad_norm": 0.030062347650527954, "learning_rate": 5.077941565840777e-06, "loss": 0.0013, "step": 64560 }, { "epoch": 1.0912348005374208, "grad_norm": 0.02097107470035553, "learning_rate": 5.076466912479519e-06, "loss": 0.002, "step": 64570 }, { "epoch": 1.0914038008162714, "grad_norm": 0.0992652028799057, "learning_rate": 5.074992252465268e-06, "loss": 0.0014, "step": 64580 }, { "epoch": 1.0915728010951218, "grad_norm": 0.04203925281763077, "learning_rate": 5.073517585926328e-06, "loss": 0.0013, "step": 64590 }, { "epoch": 1.0917418013739724, "grad_norm": 0.03911573812365532, "learning_rate": 5.072042912991003e-06, "loss": 0.001, "step": 64600 }, { "epoch": 1.0919108016528227, "grad_norm": 0.028423482552170753, "learning_rate": 5.070568233787595e-06, "loss": 0.0016, "step": 64610 }, { "epoch": 1.092079801931673, "grad_norm": 0.03550324961543083, "learning_rate": 5.069093548444408e-06, "loss": 0.0012, "step": 64620 }, { "epoch": 1.0922488022105237, "grad_norm": 0.09978547692298889, "learning_rate": 5.067618857089747e-06, "loss": 0.001, "step": 64630 }, { "epoch": 1.092417802489374, "grad_norm": 0.060302022844552994, "learning_rate": 5.066144159851919e-06, "loss": 0.0012, "step": 64640 }, { "epoch": 1.0925868027682246, "grad_norm": 0.08676223456859589, "learning_rate": 5.064669456859228e-06, "loss": 0.0009, "step": 64650 }, { "epoch": 1.092755803047075, "grad_norm": 0.04188084974884987, "learning_rate": 5.0631947482399815e-06, "loss": 0.0008, "step": 64660 }, { "epoch": 1.0929248033259256, "grad_norm": 0.15830457210540771, "learning_rate": 5.061720034122486e-06, "loss": 0.002, "step": 64670 }, { "epoch": 1.093093803604776, "grad_norm": 0.03596467152237892, "learning_rate": 5.060245314635049e-06, "loss": 0.0013, "step": 64680 }, { "epoch": 1.0932628038836265, "grad_norm": 0.019881989806890488, "learning_rate": 5.058770589905976e-06, "loss": 0.0007, "step": 64690 }, { "epoch": 1.0934318041624769, "grad_norm": 0.07606848329305649, "learning_rate": 5.057295860063575e-06, "loss": 0.0008, "step": 64700 }, { "epoch": 1.0936008044413272, "grad_norm": 0.11718254536390305, "learning_rate": 5.05582112523616e-06, "loss": 0.0012, "step": 64710 }, { "epoch": 1.0937698047201778, "grad_norm": 0.04764671251177788, "learning_rate": 5.054346385552036e-06, "loss": 0.0014, "step": 64720 }, { "epoch": 1.0939388049990282, "grad_norm": 0.10151364654302597, "learning_rate": 5.0528716411395126e-06, "loss": 0.0014, "step": 64730 }, { "epoch": 1.0941078052778788, "grad_norm": 0.02287687174975872, "learning_rate": 5.0513968921269006e-06, "loss": 0.0007, "step": 64740 }, { "epoch": 1.0942768055567291, "grad_norm": 0.11843504756689072, "learning_rate": 5.04992213864251e-06, "loss": 0.0007, "step": 64750 }, { "epoch": 1.0944458058355797, "grad_norm": 0.000851675751619041, "learning_rate": 5.048447380814652e-06, "loss": 0.0009, "step": 64760 }, { "epoch": 1.09461480611443, "grad_norm": 0.04916158318519592, "learning_rate": 5.0469726187716365e-06, "loss": 0.0019, "step": 64770 }, { "epoch": 1.0947838063932807, "grad_norm": 0.03001948446035385, "learning_rate": 5.045497852641775e-06, "loss": 0.0008, "step": 64780 }, { "epoch": 1.094952806672131, "grad_norm": 0.021211637184023857, "learning_rate": 5.044023082553381e-06, "loss": 0.0012, "step": 64790 }, { "epoch": 1.0951218069509814, "grad_norm": 0.05235179513692856, "learning_rate": 5.042548308634765e-06, "loss": 0.0007, "step": 64800 }, { "epoch": 1.095290807229832, "grad_norm": 0.1343381553888321, "learning_rate": 5.04107353101424e-06, "loss": 0.0012, "step": 64810 }, { "epoch": 1.0954598075086823, "grad_norm": 0.009196394123136997, "learning_rate": 5.039598749820119e-06, "loss": 0.0007, "step": 64820 }, { "epoch": 1.095628807787533, "grad_norm": 0.03460807725787163, "learning_rate": 5.038123965180713e-06, "loss": 0.0008, "step": 64830 }, { "epoch": 1.0957978080663833, "grad_norm": 0.4813380837440491, "learning_rate": 5.036649177224339e-06, "loss": 0.0008, "step": 64840 }, { "epoch": 1.0959668083452339, "grad_norm": 0.04702454060316086, "learning_rate": 5.03517438607931e-06, "loss": 0.0018, "step": 64850 }, { "epoch": 1.0961358086240842, "grad_norm": 0.12171593308448792, "learning_rate": 5.033699591873939e-06, "loss": 0.0012, "step": 64860 }, { "epoch": 1.0963048089029348, "grad_norm": 0.13131020963191986, "learning_rate": 5.032224794736539e-06, "loss": 0.0011, "step": 64870 }, { "epoch": 1.0964738091817852, "grad_norm": 0.0072104958817362785, "learning_rate": 5.030749994795426e-06, "loss": 0.001, "step": 64880 }, { "epoch": 1.0966428094606355, "grad_norm": 0.028363553807139397, "learning_rate": 5.029275192178914e-06, "loss": 0.0013, "step": 64890 }, { "epoch": 1.096811809739486, "grad_norm": 0.16457876563072205, "learning_rate": 5.027800387015319e-06, "loss": 0.0009, "step": 64900 }, { "epoch": 1.0969808100183365, "grad_norm": 0.07283145189285278, "learning_rate": 5.026325579432954e-06, "loss": 0.0019, "step": 64910 }, { "epoch": 1.097149810297187, "grad_norm": 0.03155135735869408, "learning_rate": 5.024850769560139e-06, "loss": 0.0006, "step": 64920 }, { "epoch": 1.0973188105760374, "grad_norm": 0.031150473281741142, "learning_rate": 5.023375957525185e-06, "loss": 0.0009, "step": 64930 }, { "epoch": 1.097487810854888, "grad_norm": 0.024125665426254272, "learning_rate": 5.021901143456409e-06, "loss": 0.0014, "step": 64940 }, { "epoch": 1.0976568111337384, "grad_norm": 0.01180550642311573, "learning_rate": 5.0204263274821275e-06, "loss": 0.0008, "step": 64950 }, { "epoch": 1.097825811412589, "grad_norm": 0.06741633266210556, "learning_rate": 5.018951509730657e-06, "loss": 0.0009, "step": 64960 }, { "epoch": 1.0979948116914393, "grad_norm": 0.013154249638319016, "learning_rate": 5.017476690330314e-06, "loss": 0.0006, "step": 64970 }, { "epoch": 1.0981638119702897, "grad_norm": 0.005113786086440086, "learning_rate": 5.016001869409414e-06, "loss": 0.0012, "step": 64980 }, { "epoch": 1.0983328122491403, "grad_norm": 0.038728177547454834, "learning_rate": 5.014527047096273e-06, "loss": 0.0014, "step": 64990 }, { "epoch": 1.0985018125279906, "grad_norm": 0.0678655281662941, "learning_rate": 5.0130522235192095e-06, "loss": 0.0012, "step": 65000 }, { "epoch": 1.0986708128068412, "grad_norm": 0.0647139623761177, "learning_rate": 5.011577398806539e-06, "loss": 0.002, "step": 65010 }, { "epoch": 1.0988398130856916, "grad_norm": 0.04998895153403282, "learning_rate": 5.010102573086577e-06, "loss": 0.0012, "step": 65020 }, { "epoch": 1.0990088133645421, "grad_norm": 0.08676137030124664, "learning_rate": 5.008627746487644e-06, "loss": 0.0007, "step": 65030 }, { "epoch": 1.0991778136433925, "grad_norm": 0.05025715008378029, "learning_rate": 5.0071529191380555e-06, "loss": 0.0007, "step": 65040 }, { "epoch": 1.0993468139222429, "grad_norm": 0.03282903879880905, "learning_rate": 5.005678091166128e-06, "loss": 0.001, "step": 65050 }, { "epoch": 1.0995158142010935, "grad_norm": 0.04838985204696655, "learning_rate": 5.004203262700179e-06, "loss": 0.0014, "step": 65060 }, { "epoch": 1.0996848144799438, "grad_norm": 0.050552740693092346, "learning_rate": 5.002728433868525e-06, "loss": 0.001, "step": 65070 }, { "epoch": 1.0998538147587944, "grad_norm": 0.08461810648441315, "learning_rate": 5.001253604799485e-06, "loss": 0.0014, "step": 65080 }, { "epoch": 1.1000228150376448, "grad_norm": 0.06626548618078232, "learning_rate": 4.999778775621375e-06, "loss": 0.0021, "step": 65090 }, { "epoch": 1.1001918153164953, "grad_norm": 0.09405693411827087, "learning_rate": 4.998303946462514e-06, "loss": 0.0015, "step": 65100 }, { "epoch": 1.1003608155953457, "grad_norm": 0.013804643414914608, "learning_rate": 4.996829117451215e-06, "loss": 0.001, "step": 65110 }, { "epoch": 1.1005298158741963, "grad_norm": 0.0818895548582077, "learning_rate": 4.9953542887158e-06, "loss": 0.0011, "step": 65120 }, { "epoch": 1.1006988161530467, "grad_norm": 0.04563478007912636, "learning_rate": 4.993879460384583e-06, "loss": 0.0008, "step": 65130 }, { "epoch": 1.100867816431897, "grad_norm": 0.08573456853628159, "learning_rate": 4.992404632585885e-06, "loss": 0.0013, "step": 65140 }, { "epoch": 1.1010368167107476, "grad_norm": 0.09419383853673935, "learning_rate": 4.990929805448018e-06, "loss": 0.0028, "step": 65150 }, { "epoch": 1.101205816989598, "grad_norm": 0.014462495222687721, "learning_rate": 4.989454979099305e-06, "loss": 0.0009, "step": 65160 }, { "epoch": 1.1013748172684485, "grad_norm": 0.057236574590206146, "learning_rate": 4.987980153668057e-06, "loss": 0.0028, "step": 65170 }, { "epoch": 1.101543817547299, "grad_norm": 0.05355300381779671, "learning_rate": 4.986505329282596e-06, "loss": 0.001, "step": 65180 }, { "epoch": 1.1017128178261495, "grad_norm": 0.03428352624177933, "learning_rate": 4.985030506071235e-06, "loss": 0.002, "step": 65190 }, { "epoch": 1.1018818181049999, "grad_norm": 0.001353323576040566, "learning_rate": 4.983555684162294e-06, "loss": 0.0024, "step": 65200 }, { "epoch": 1.1020508183838502, "grad_norm": 0.017991015687584877, "learning_rate": 4.982080863684087e-06, "loss": 0.0014, "step": 65210 }, { "epoch": 1.1022198186627008, "grad_norm": 0.057966869324445724, "learning_rate": 4.980606044764932e-06, "loss": 0.0007, "step": 65220 }, { "epoch": 1.1023888189415512, "grad_norm": 0.02501198649406433, "learning_rate": 4.979131227533145e-06, "loss": 0.0007, "step": 65230 }, { "epoch": 1.1025578192204017, "grad_norm": 0.07594097405672073, "learning_rate": 4.9776564121170435e-06, "loss": 0.0019, "step": 65240 }, { "epoch": 1.102726819499252, "grad_norm": 0.025860225781798363, "learning_rate": 4.9761815986449405e-06, "loss": 0.0008, "step": 65250 }, { "epoch": 1.1028958197781027, "grad_norm": 0.03838937729597092, "learning_rate": 4.974706787245156e-06, "loss": 0.0014, "step": 65260 }, { "epoch": 1.103064820056953, "grad_norm": 0.03614159673452377, "learning_rate": 4.973231978046001e-06, "loss": 0.0008, "step": 65270 }, { "epoch": 1.1032338203358036, "grad_norm": 0.015224494971334934, "learning_rate": 4.971757171175797e-06, "loss": 0.0011, "step": 65280 }, { "epoch": 1.103402820614654, "grad_norm": 0.059320464730262756, "learning_rate": 4.9702823667628526e-06, "loss": 0.0012, "step": 65290 }, { "epoch": 1.1035718208935044, "grad_norm": 0.06934010982513428, "learning_rate": 4.968807564935487e-06, "loss": 0.0011, "step": 65300 }, { "epoch": 1.103740821172355, "grad_norm": 0.1251693069934845, "learning_rate": 4.967332765822014e-06, "loss": 0.0016, "step": 65310 }, { "epoch": 1.1039098214512053, "grad_norm": 0.08632376044988632, "learning_rate": 4.9658579695507515e-06, "loss": 0.0011, "step": 65320 }, { "epoch": 1.104078821730056, "grad_norm": 0.07911311835050583, "learning_rate": 4.964383176250008e-06, "loss": 0.0011, "step": 65330 }, { "epoch": 1.1042478220089063, "grad_norm": 0.02994832582771778, "learning_rate": 4.9629083860481005e-06, "loss": 0.0012, "step": 65340 }, { "epoch": 1.1044168222877568, "grad_norm": 0.0312307458370924, "learning_rate": 4.9614335990733455e-06, "loss": 0.0012, "step": 65350 }, { "epoch": 1.1045858225666072, "grad_norm": 0.06177021190524101, "learning_rate": 4.959958815454053e-06, "loss": 0.0011, "step": 65360 }, { "epoch": 1.1047548228454578, "grad_norm": 0.036960311233997345, "learning_rate": 4.9584840353185384e-06, "loss": 0.001, "step": 65370 }, { "epoch": 1.1049238231243081, "grad_norm": 0.019146258011460304, "learning_rate": 4.957009258795113e-06, "loss": 0.0014, "step": 65380 }, { "epoch": 1.1050928234031585, "grad_norm": 0.05155621096491814, "learning_rate": 4.955534486012092e-06, "loss": 0.0014, "step": 65390 }, { "epoch": 1.105261823682009, "grad_norm": 0.11039842665195465, "learning_rate": 4.954059717097783e-06, "loss": 0.0017, "step": 65400 }, { "epoch": 1.1054308239608595, "grad_norm": 0.08442549407482147, "learning_rate": 4.952584952180504e-06, "loss": 0.0011, "step": 65410 }, { "epoch": 1.10559982423971, "grad_norm": 0.02455849200487137, "learning_rate": 4.951110191388562e-06, "loss": 0.0004, "step": 65420 }, { "epoch": 1.1057688245185604, "grad_norm": 0.06026019901037216, "learning_rate": 4.949635434850272e-06, "loss": 0.0007, "step": 65430 }, { "epoch": 1.105937824797411, "grad_norm": 0.0009374105138704181, "learning_rate": 4.948160682693941e-06, "loss": 0.0007, "step": 65440 }, { "epoch": 1.1061068250762613, "grad_norm": 0.026579247787594795, "learning_rate": 4.946685935047884e-06, "loss": 0.0007, "step": 65450 }, { "epoch": 1.106275825355112, "grad_norm": 0.04251579940319061, "learning_rate": 4.945211192040408e-06, "loss": 0.0006, "step": 65460 }, { "epoch": 1.1064448256339623, "grad_norm": 0.02869006060063839, "learning_rate": 4.943736453799824e-06, "loss": 0.0006, "step": 65470 }, { "epoch": 1.1066138259128127, "grad_norm": 0.1354653239250183, "learning_rate": 4.94226172045444e-06, "loss": 0.0011, "step": 65480 }, { "epoch": 1.1067828261916632, "grad_norm": 0.06362824887037277, "learning_rate": 4.940786992132568e-06, "loss": 0.0017, "step": 65490 }, { "epoch": 1.1069518264705136, "grad_norm": 0.02064809948205948, "learning_rate": 4.939312268962513e-06, "loss": 0.0011, "step": 65500 }, { "epoch": 1.1071208267493642, "grad_norm": 0.01066114567220211, "learning_rate": 4.9378375510725856e-06, "loss": 0.0015, "step": 65510 }, { "epoch": 1.1072898270282145, "grad_norm": 0.01520493533462286, "learning_rate": 4.936362838591091e-06, "loss": 0.0014, "step": 65520 }, { "epoch": 1.1074588273070651, "grad_norm": 0.0660543218255043, "learning_rate": 4.9348881316463406e-06, "loss": 0.0009, "step": 65530 }, { "epoch": 1.1076278275859155, "grad_norm": 0.02784154936671257, "learning_rate": 4.9334134303666355e-06, "loss": 0.0011, "step": 65540 }, { "epoch": 1.107796827864766, "grad_norm": 0.06158173829317093, "learning_rate": 4.931938734880287e-06, "loss": 0.0009, "step": 65550 }, { "epoch": 1.1079658281436164, "grad_norm": 0.02641584351658821, "learning_rate": 4.9304640453155956e-06, "loss": 0.001, "step": 65560 }, { "epoch": 1.1081348284224668, "grad_norm": 0.0512726865708828, "learning_rate": 4.928989361800871e-06, "loss": 0.0009, "step": 65570 }, { "epoch": 1.1083038287013174, "grad_norm": 0.04487888887524605, "learning_rate": 4.927514684464415e-06, "loss": 0.0016, "step": 65580 }, { "epoch": 1.1084728289801677, "grad_norm": 0.007292418275028467, "learning_rate": 4.926040013434532e-06, "loss": 0.0005, "step": 65590 }, { "epoch": 1.1086418292590183, "grad_norm": 0.14908470213413239, "learning_rate": 4.924565348839528e-06, "loss": 0.002, "step": 65600 }, { "epoch": 1.1088108295378687, "grad_norm": 0.12314429134130478, "learning_rate": 4.923090690807701e-06, "loss": 0.0017, "step": 65610 }, { "epoch": 1.1089798298167193, "grad_norm": 0.028657900169491768, "learning_rate": 4.9216160394673605e-06, "loss": 0.0008, "step": 65620 }, { "epoch": 1.1091488300955696, "grad_norm": 0.011674246750772, "learning_rate": 4.920141394946802e-06, "loss": 0.0013, "step": 65630 }, { "epoch": 1.1093178303744202, "grad_norm": 0.05232135206460953, "learning_rate": 4.918666757374331e-06, "loss": 0.001, "step": 65640 }, { "epoch": 1.1094868306532706, "grad_norm": 0.059035900980234146, "learning_rate": 4.917192126878244e-06, "loss": 0.0015, "step": 65650 }, { "epoch": 1.109655830932121, "grad_norm": 0.03378590941429138, "learning_rate": 4.9157175035868455e-06, "loss": 0.0022, "step": 65660 }, { "epoch": 1.1098248312109715, "grad_norm": 0.10433820635080338, "learning_rate": 4.91424288762843e-06, "loss": 0.0019, "step": 65670 }, { "epoch": 1.109993831489822, "grad_norm": 0.05964815244078636, "learning_rate": 4.9127682791313e-06, "loss": 0.0034, "step": 65680 }, { "epoch": 1.1101628317686725, "grad_norm": 0.03746423125267029, "learning_rate": 4.911293678223753e-06, "loss": 0.0016, "step": 65690 }, { "epoch": 1.1103318320475228, "grad_norm": 0.043141648173332214, "learning_rate": 4.909819085034085e-06, "loss": 0.001, "step": 65700 }, { "epoch": 1.1105008323263734, "grad_norm": 0.025546276941895485, "learning_rate": 4.9083444996905926e-06, "loss": 0.0006, "step": 65710 }, { "epoch": 1.1106698326052238, "grad_norm": 0.0015741289826110005, "learning_rate": 4.9068699223215756e-06, "loss": 0.0012, "step": 65720 }, { "epoch": 1.1108388328840744, "grad_norm": 0.05023641139268875, "learning_rate": 4.905395353055323e-06, "loss": 0.0013, "step": 65730 }, { "epoch": 1.1110078331629247, "grad_norm": 0.06063641980290413, "learning_rate": 4.903920792020136e-06, "loss": 0.0005, "step": 65740 }, { "epoch": 1.111176833441775, "grad_norm": 0.06451669335365295, "learning_rate": 4.902446239344305e-06, "loss": 0.0011, "step": 65750 }, { "epoch": 1.1113458337206257, "grad_norm": 0.03882955014705658, "learning_rate": 4.900971695156124e-06, "loss": 0.0014, "step": 65760 }, { "epoch": 1.111514833999476, "grad_norm": 0.048457760363817215, "learning_rate": 4.899497159583883e-06, "loss": 0.0015, "step": 65770 }, { "epoch": 1.1116838342783266, "grad_norm": 0.02654680795967579, "learning_rate": 4.898022632755878e-06, "loss": 0.0016, "step": 65780 }, { "epoch": 1.111852834557177, "grad_norm": 0.051069073379039764, "learning_rate": 4.896548114800397e-06, "loss": 0.0016, "step": 65790 }, { "epoch": 1.1120218348360276, "grad_norm": 0.013700217008590698, "learning_rate": 4.895073605845733e-06, "loss": 0.0009, "step": 65800 }, { "epoch": 1.112190835114878, "grad_norm": 0.03329027071595192, "learning_rate": 4.893599106020172e-06, "loss": 0.0013, "step": 65810 }, { "epoch": 1.1123598353937285, "grad_norm": 0.07942863553762436, "learning_rate": 4.892124615452007e-06, "loss": 0.0009, "step": 65820 }, { "epoch": 1.1125288356725789, "grad_norm": 0.07885333895683289, "learning_rate": 4.890650134269519e-06, "loss": 0.0008, "step": 65830 }, { "epoch": 1.1126978359514292, "grad_norm": 0.055787790566682816, "learning_rate": 4.8891756626010035e-06, "loss": 0.0012, "step": 65840 }, { "epoch": 1.1128668362302798, "grad_norm": 0.006853477098047733, "learning_rate": 4.887701200574739e-06, "loss": 0.0009, "step": 65850 }, { "epoch": 1.1130358365091302, "grad_norm": 0.016508853062987328, "learning_rate": 4.886226748319014e-06, "loss": 0.0008, "step": 65860 }, { "epoch": 1.1132048367879808, "grad_norm": 0.0349777527153492, "learning_rate": 4.884752305962115e-06, "loss": 0.0019, "step": 65870 }, { "epoch": 1.1133738370668311, "grad_norm": 0.0494348369538784, "learning_rate": 4.883277873632323e-06, "loss": 0.0012, "step": 65880 }, { "epoch": 1.1135428373456817, "grad_norm": 0.02532506175339222, "learning_rate": 4.881803451457922e-06, "loss": 0.0008, "step": 65890 }, { "epoch": 1.113711837624532, "grad_norm": 0.01580948196351528, "learning_rate": 4.8803290395671916e-06, "loss": 0.0008, "step": 65900 }, { "epoch": 1.1138808379033824, "grad_norm": 0.06238386780023575, "learning_rate": 4.8788546380884175e-06, "loss": 0.0018, "step": 65910 }, { "epoch": 1.114049838182233, "grad_norm": 0.04577694088220596, "learning_rate": 4.877380247149874e-06, "loss": 0.001, "step": 65920 }, { "epoch": 1.1142188384610834, "grad_norm": 0.03396439179778099, "learning_rate": 4.875905866879846e-06, "loss": 0.0009, "step": 65930 }, { "epoch": 1.114387838739934, "grad_norm": 0.03604747727513313, "learning_rate": 4.874431497406607e-06, "loss": 0.0013, "step": 65940 }, { "epoch": 1.1145568390187843, "grad_norm": 0.09017330408096313, "learning_rate": 4.8729571388584365e-06, "loss": 0.0011, "step": 65950 }, { "epoch": 1.114725839297635, "grad_norm": 0.48425760865211487, "learning_rate": 4.87148279136361e-06, "loss": 0.001, "step": 65960 }, { "epoch": 1.1148948395764853, "grad_norm": 0.06308889389038086, "learning_rate": 4.870008455050404e-06, "loss": 0.0013, "step": 65970 }, { "epoch": 1.1150638398553359, "grad_norm": 0.00627787783741951, "learning_rate": 4.868534130047092e-06, "loss": 0.0009, "step": 65980 }, { "epoch": 1.1152328401341862, "grad_norm": 0.042335394769907, "learning_rate": 4.867059816481948e-06, "loss": 0.0012, "step": 65990 }, { "epoch": 1.1154018404130366, "grad_norm": 0.033770300447940826, "learning_rate": 4.865585514483243e-06, "loss": 0.0014, "step": 66000 }, { "epoch": 1.1155708406918872, "grad_norm": 0.016967343166470528, "learning_rate": 4.864111224179251e-06, "loss": 0.0006, "step": 66010 }, { "epoch": 1.1157398409707375, "grad_norm": 0.014699154533445835, "learning_rate": 4.862636945698239e-06, "loss": 0.0012, "step": 66020 }, { "epoch": 1.1159088412495881, "grad_norm": 0.019063862040638924, "learning_rate": 4.861162679168481e-06, "loss": 0.0022, "step": 66030 }, { "epoch": 1.1160778415284385, "grad_norm": 0.04024651646614075, "learning_rate": 4.85968842471824e-06, "loss": 0.0005, "step": 66040 }, { "epoch": 1.116246841807289, "grad_norm": 0.03585030138492584, "learning_rate": 4.858214182475786e-06, "loss": 0.0015, "step": 66050 }, { "epoch": 1.1164158420861394, "grad_norm": 0.07168002426624298, "learning_rate": 4.856739952569386e-06, "loss": 0.0007, "step": 66060 }, { "epoch": 1.11658484236499, "grad_norm": 0.09482478350400925, "learning_rate": 4.855265735127305e-06, "loss": 0.0014, "step": 66070 }, { "epoch": 1.1167538426438404, "grad_norm": 0.03974776715040207, "learning_rate": 4.853791530277804e-06, "loss": 0.0006, "step": 66080 }, { "epoch": 1.1169228429226907, "grad_norm": 0.037464916706085205, "learning_rate": 4.852317338149151e-06, "loss": 0.0008, "step": 66090 }, { "epoch": 1.1170918432015413, "grad_norm": 0.14354050159454346, "learning_rate": 4.850843158869603e-06, "loss": 0.0013, "step": 66100 }, { "epoch": 1.1172608434803917, "grad_norm": 0.04343586042523384, "learning_rate": 4.849368992567422e-06, "loss": 0.0007, "step": 66110 }, { "epoch": 1.1174298437592423, "grad_norm": 0.05415260046720505, "learning_rate": 4.847894839370872e-06, "loss": 0.0016, "step": 66120 }, { "epoch": 1.1175988440380926, "grad_norm": 0.0037539496552199125, "learning_rate": 4.846420699408205e-06, "loss": 0.0007, "step": 66130 }, { "epoch": 1.1177678443169432, "grad_norm": 0.12358132749795914, "learning_rate": 4.844946572807684e-06, "loss": 0.0013, "step": 66140 }, { "epoch": 1.1179368445957936, "grad_norm": 0.024757402017712593, "learning_rate": 4.84347245969756e-06, "loss": 0.0013, "step": 66150 }, { "epoch": 1.118105844874644, "grad_norm": 0.13422614336013794, "learning_rate": 4.841998360206091e-06, "loss": 0.0008, "step": 66160 }, { "epoch": 1.1182748451534945, "grad_norm": 0.011023541912436485, "learning_rate": 4.840524274461531e-06, "loss": 0.0011, "step": 66170 }, { "epoch": 1.1184438454323449, "grad_norm": 0.025453826412558556, "learning_rate": 4.839050202592131e-06, "loss": 0.0016, "step": 66180 }, { "epoch": 1.1186128457111955, "grad_norm": 0.09494690597057343, "learning_rate": 4.837576144726142e-06, "loss": 0.001, "step": 66190 }, { "epoch": 1.1187818459900458, "grad_norm": 0.16201642155647278, "learning_rate": 4.836102100991818e-06, "loss": 0.0014, "step": 66200 }, { "epoch": 1.1189508462688964, "grad_norm": 0.026667365804314613, "learning_rate": 4.8346280715174034e-06, "loss": 0.0012, "step": 66210 }, { "epoch": 1.1191198465477468, "grad_norm": 0.06574511528015137, "learning_rate": 4.83315405643115e-06, "loss": 0.0015, "step": 66220 }, { "epoch": 1.1192888468265974, "grad_norm": 0.05048028379678726, "learning_rate": 4.8316800558612995e-06, "loss": 0.0021, "step": 66230 }, { "epoch": 1.1194578471054477, "grad_norm": 0.03519992530345917, "learning_rate": 4.830206069936102e-06, "loss": 0.0007, "step": 66240 }, { "epoch": 1.119626847384298, "grad_norm": 0.025307748466730118, "learning_rate": 4.828732098783796e-06, "loss": 0.0021, "step": 66250 }, { "epoch": 1.1197958476631487, "grad_norm": 0.03679641708731651, "learning_rate": 4.827258142532629e-06, "loss": 0.0006, "step": 66260 }, { "epoch": 1.119964847941999, "grad_norm": 0.12949617207050323, "learning_rate": 4.82578420131084e-06, "loss": 0.0009, "step": 66270 }, { "epoch": 1.1201338482208496, "grad_norm": 0.03277931734919548, "learning_rate": 4.824310275246671e-06, "loss": 0.0012, "step": 66280 }, { "epoch": 1.1203028484997, "grad_norm": 0.05668530985713005, "learning_rate": 4.8228363644683575e-06, "loss": 0.0009, "step": 66290 }, { "epoch": 1.1204718487785505, "grad_norm": 0.024053629487752914, "learning_rate": 4.821362469104141e-06, "loss": 0.0012, "step": 66300 }, { "epoch": 1.120640849057401, "grad_norm": 0.07685207575559616, "learning_rate": 4.819888589282254e-06, "loss": 0.0012, "step": 66310 }, { "epoch": 1.1208098493362515, "grad_norm": 0.18044798076152802, "learning_rate": 4.818414725130933e-06, "loss": 0.0008, "step": 66320 }, { "epoch": 1.1209788496151019, "grad_norm": 0.0692492127418518, "learning_rate": 4.816940876778409e-06, "loss": 0.001, "step": 66330 }, { "epoch": 1.1211478498939522, "grad_norm": 0.038930077105760574, "learning_rate": 4.8154670443529165e-06, "loss": 0.0011, "step": 66340 }, { "epoch": 1.1213168501728028, "grad_norm": 0.0401238352060318, "learning_rate": 4.813993227982685e-06, "loss": 0.0004, "step": 66350 }, { "epoch": 1.1214858504516532, "grad_norm": 0.034223757684230804, "learning_rate": 4.812519427795944e-06, "loss": 0.0016, "step": 66360 }, { "epoch": 1.1216548507305037, "grad_norm": 0.05561797693371773, "learning_rate": 4.811045643920921e-06, "loss": 0.001, "step": 66370 }, { "epoch": 1.1218238510093541, "grad_norm": 0.06368566304445267, "learning_rate": 4.809571876485842e-06, "loss": 0.0012, "step": 66380 }, { "epoch": 1.1219928512882047, "grad_norm": 0.07481633871793747, "learning_rate": 4.808098125618934e-06, "loss": 0.0016, "step": 66390 }, { "epoch": 1.122161851567055, "grad_norm": 0.033868514001369476, "learning_rate": 4.8066243914484175e-06, "loss": 0.0005, "step": 66400 }, { "epoch": 1.1223308518459056, "grad_norm": 1.8786673545837402, "learning_rate": 4.805150674102518e-06, "loss": 0.0009, "step": 66410 }, { "epoch": 1.122499852124756, "grad_norm": 0.06060846894979477, "learning_rate": 4.803676973709451e-06, "loss": 0.0006, "step": 66420 }, { "epoch": 1.1226688524036064, "grad_norm": 0.037026919424533844, "learning_rate": 4.802203290397441e-06, "loss": 0.0014, "step": 66430 }, { "epoch": 1.122837852682457, "grad_norm": 0.03700173646211624, "learning_rate": 4.800729624294701e-06, "loss": 0.001, "step": 66440 }, { "epoch": 1.1230068529613073, "grad_norm": 0.014215175062417984, "learning_rate": 4.799255975529451e-06, "loss": 0.0011, "step": 66450 }, { "epoch": 1.123175853240158, "grad_norm": 0.18786773085594177, "learning_rate": 4.797782344229902e-06, "loss": 0.0011, "step": 66460 }, { "epoch": 1.1233448535190083, "grad_norm": 0.014328324235975742, "learning_rate": 4.7963087305242705e-06, "loss": 0.0008, "step": 66470 }, { "epoch": 1.1235138537978588, "grad_norm": 0.07028224319219589, "learning_rate": 4.794835134540764e-06, "loss": 0.0007, "step": 66480 }, { "epoch": 1.1236828540767092, "grad_norm": 0.031667813658714294, "learning_rate": 4.793361556407598e-06, "loss": 0.001, "step": 66490 }, { "epoch": 1.1238518543555598, "grad_norm": 0.10028263181447983, "learning_rate": 4.791887996252976e-06, "loss": 0.0016, "step": 66500 }, { "epoch": 1.1240208546344101, "grad_norm": 0.01621779054403305, "learning_rate": 4.7904144542051065e-06, "loss": 0.001, "step": 66510 }, { "epoch": 1.1241898549132605, "grad_norm": 0.06411411613225937, "learning_rate": 4.788940930392195e-06, "loss": 0.0012, "step": 66520 }, { "epoch": 1.124358855192111, "grad_norm": 0.08739247918128967, "learning_rate": 4.787467424942446e-06, "loss": 0.002, "step": 66530 }, { "epoch": 1.1245278554709615, "grad_norm": 0.13845498859882355, "learning_rate": 4.78599393798406e-06, "loss": 0.0014, "step": 66540 }, { "epoch": 1.124696855749812, "grad_norm": 0.029364168643951416, "learning_rate": 4.7845204696452385e-06, "loss": 0.0018, "step": 66550 }, { "epoch": 1.1248658560286624, "grad_norm": 0.14498049020767212, "learning_rate": 4.783047020054179e-06, "loss": 0.0009, "step": 66560 }, { "epoch": 1.125034856307513, "grad_norm": 0.013374504633247852, "learning_rate": 4.7815735893390824e-06, "loss": 0.0012, "step": 66570 }, { "epoch": 1.1252038565863633, "grad_norm": 0.021575110033154488, "learning_rate": 4.78010017762814e-06, "loss": 0.0012, "step": 66580 }, { "epoch": 1.125372856865214, "grad_norm": 0.0907462015748024, "learning_rate": 4.778626785049548e-06, "loss": 0.0016, "step": 66590 }, { "epoch": 1.1255418571440643, "grad_norm": 0.010888309217989445, "learning_rate": 4.777153411731498e-06, "loss": 0.0011, "step": 66600 }, { "epoch": 1.1257108574229147, "grad_norm": 0.018221450969576836, "learning_rate": 4.775680057802181e-06, "loss": 0.001, "step": 66610 }, { "epoch": 1.1258798577017652, "grad_norm": 0.0347580760717392, "learning_rate": 4.774206723389787e-06, "loss": 0.0016, "step": 66620 }, { "epoch": 1.1260488579806156, "grad_norm": 0.005065588746219873, "learning_rate": 4.7727334086225e-06, "loss": 0.0017, "step": 66630 }, { "epoch": 1.1262178582594662, "grad_norm": 0.0021246259566396475, "learning_rate": 4.771260113628509e-06, "loss": 0.0006, "step": 66640 }, { "epoch": 1.1263868585383165, "grad_norm": 0.07427023351192474, "learning_rate": 4.769786838535996e-06, "loss": 0.0006, "step": 66650 }, { "epoch": 1.1265558588171671, "grad_norm": 0.10658226162195206, "learning_rate": 4.768313583473144e-06, "loss": 0.0015, "step": 66660 }, { "epoch": 1.1267248590960175, "grad_norm": 0.022666780278086662, "learning_rate": 4.7668403485681305e-06, "loss": 0.0014, "step": 66670 }, { "epoch": 1.126893859374868, "grad_norm": 0.051752593368291855, "learning_rate": 4.76536713394914e-06, "loss": 0.0014, "step": 66680 }, { "epoch": 1.1270628596537184, "grad_norm": 0.08772103488445282, "learning_rate": 4.763893939744343e-06, "loss": 0.0022, "step": 66690 }, { "epoch": 1.1272318599325688, "grad_norm": 0.04196293652057648, "learning_rate": 4.76242076608192e-06, "loss": 0.0016, "step": 66700 }, { "epoch": 1.1274008602114194, "grad_norm": 0.008633043617010117, "learning_rate": 4.760947613090038e-06, "loss": 0.0005, "step": 66710 }, { "epoch": 1.1275698604902697, "grad_norm": 0.04248087853193283, "learning_rate": 4.7594744808968746e-06, "loss": 0.0013, "step": 66720 }, { "epoch": 1.1277388607691203, "grad_norm": 0.050820015370845795, "learning_rate": 4.758001369630594e-06, "loss": 0.0011, "step": 66730 }, { "epoch": 1.1279078610479707, "grad_norm": 0.16319343447685242, "learning_rate": 4.756528279419369e-06, "loss": 0.0014, "step": 66740 }, { "epoch": 1.1280768613268213, "grad_norm": 0.09688375890254974, "learning_rate": 4.755055210391362e-06, "loss": 0.0014, "step": 66750 }, { "epoch": 1.1282458616056716, "grad_norm": 0.025112776085734367, "learning_rate": 4.75358216267474e-06, "loss": 0.0009, "step": 66760 }, { "epoch": 1.1284148618845222, "grad_norm": 0.045800063759088516, "learning_rate": 4.7521091363976615e-06, "loss": 0.0009, "step": 66770 }, { "epoch": 1.1285838621633726, "grad_norm": 0.0996677353978157, "learning_rate": 4.750636131688292e-06, "loss": 0.0007, "step": 66780 }, { "epoch": 1.128752862442223, "grad_norm": 0.06506549566984177, "learning_rate": 4.7491631486747845e-06, "loss": 0.0012, "step": 66790 }, { "epoch": 1.1289218627210735, "grad_norm": 0.03400241583585739, "learning_rate": 4.747690187485301e-06, "loss": 0.0009, "step": 66800 }, { "epoch": 1.129090862999924, "grad_norm": 0.013674917630851269, "learning_rate": 4.746217248247992e-06, "loss": 0.0011, "step": 66810 }, { "epoch": 1.1292598632787745, "grad_norm": 0.026519564911723137, "learning_rate": 4.7447443310910125e-06, "loss": 0.0008, "step": 66820 }, { "epoch": 1.1294288635576248, "grad_norm": 0.03270822390913963, "learning_rate": 4.7432714361425126e-06, "loss": 0.0007, "step": 66830 }, { "epoch": 1.1295978638364754, "grad_norm": 0.0035512656904757023, "learning_rate": 4.7417985635306425e-06, "loss": 0.0025, "step": 66840 }, { "epoch": 1.1297668641153258, "grad_norm": 0.26910850405693054, "learning_rate": 4.740325713383546e-06, "loss": 0.0023, "step": 66850 }, { "epoch": 1.1299358643941764, "grad_norm": 0.028381630778312683, "learning_rate": 4.7388528858293746e-06, "loss": 0.0011, "step": 66860 }, { "epoch": 1.1301048646730267, "grad_norm": 0.052497945725917816, "learning_rate": 4.7373800809962635e-06, "loss": 0.0006, "step": 66870 }, { "epoch": 1.130273864951877, "grad_norm": 0.003387107513844967, "learning_rate": 4.735907299012358e-06, "loss": 0.0012, "step": 66880 }, { "epoch": 1.1304428652307277, "grad_norm": 0.04416866600513458, "learning_rate": 4.7344345400058e-06, "loss": 0.0013, "step": 66890 }, { "epoch": 1.130611865509578, "grad_norm": 0.08238162845373154, "learning_rate": 4.732961804104721e-06, "loss": 0.0004, "step": 66900 }, { "epoch": 1.1307808657884286, "grad_norm": 0.10110866278409958, "learning_rate": 4.731489091437262e-06, "loss": 0.0018, "step": 66910 }, { "epoch": 1.130949866067279, "grad_norm": 0.030607158318161964, "learning_rate": 4.73001640213155e-06, "loss": 0.0022, "step": 66920 }, { "epoch": 1.1311188663461293, "grad_norm": 0.045541878789663315, "learning_rate": 4.7285437363157205e-06, "loss": 0.0011, "step": 66930 }, { "epoch": 1.13128786662498, "grad_norm": 0.12421415001153946, "learning_rate": 4.727071094117901e-06, "loss": 0.0018, "step": 66940 }, { "epoch": 1.1314568669038305, "grad_norm": 0.14488165080547333, "learning_rate": 4.725598475666218e-06, "loss": 0.0013, "step": 66950 }, { "epoch": 1.1316258671826809, "grad_norm": 0.05202499404549599, "learning_rate": 4.7241258810887966e-06, "loss": 0.0013, "step": 66960 }, { "epoch": 1.1317948674615312, "grad_norm": 0.011785942129790783, "learning_rate": 4.722653310513763e-06, "loss": 0.001, "step": 66970 }, { "epoch": 1.1319638677403818, "grad_norm": 0.10218577831983566, "learning_rate": 4.721180764069232e-06, "loss": 0.0015, "step": 66980 }, { "epoch": 1.1321328680192322, "grad_norm": 0.0605245977640152, "learning_rate": 4.719708241883329e-06, "loss": 0.0011, "step": 66990 }, { "epoch": 1.1323018682980828, "grad_norm": 0.03460155054926872, "learning_rate": 4.718235744084164e-06, "loss": 0.0008, "step": 67000 }, { "epoch": 1.1324708685769331, "grad_norm": 0.023951146751642227, "learning_rate": 4.716763270799856e-06, "loss": 0.0014, "step": 67010 }, { "epoch": 1.1326398688557835, "grad_norm": 0.04681273549795151, "learning_rate": 4.715290822158514e-06, "loss": 0.0012, "step": 67020 }, { "epoch": 1.132808869134634, "grad_norm": 0.03391709923744202, "learning_rate": 4.713818398288251e-06, "loss": 0.001, "step": 67030 }, { "epoch": 1.1329778694134844, "grad_norm": 0.0008612428791821003, "learning_rate": 4.7123459993171735e-06, "loss": 0.0013, "step": 67040 }, { "epoch": 1.133146869692335, "grad_norm": 0.04772978276014328, "learning_rate": 4.710873625373389e-06, "loss": 0.0012, "step": 67050 }, { "epoch": 1.1333158699711854, "grad_norm": 0.06175963208079338, "learning_rate": 4.709401276584998e-06, "loss": 0.0012, "step": 67060 }, { "epoch": 1.133484870250036, "grad_norm": 0.09109710901975632, "learning_rate": 4.707928953080106e-06, "loss": 0.0017, "step": 67070 }, { "epoch": 1.1336538705288863, "grad_norm": 0.021391067653894424, "learning_rate": 4.706456654986809e-06, "loss": 0.0008, "step": 67080 }, { "epoch": 1.133822870807737, "grad_norm": 0.04566289111971855, "learning_rate": 4.704984382433207e-06, "loss": 0.0011, "step": 67090 }, { "epoch": 1.1339918710865873, "grad_norm": 0.08351574093103409, "learning_rate": 4.70351213554739e-06, "loss": 0.0011, "step": 67100 }, { "epoch": 1.1341608713654376, "grad_norm": 0.05140083283185959, "learning_rate": 4.702039914457456e-06, "loss": 0.0007, "step": 67110 }, { "epoch": 1.1343298716442882, "grad_norm": 0.016718655824661255, "learning_rate": 4.700567719291493e-06, "loss": 0.0013, "step": 67120 }, { "epoch": 1.1344988719231386, "grad_norm": 0.14465704560279846, "learning_rate": 4.699095550177587e-06, "loss": 0.0007, "step": 67130 }, { "epoch": 1.1346678722019892, "grad_norm": 0.04567522555589676, "learning_rate": 4.697623407243827e-06, "loss": 0.0013, "step": 67140 }, { "epoch": 1.1348368724808395, "grad_norm": 0.012041511945426464, "learning_rate": 4.696151290618296e-06, "loss": 0.0011, "step": 67150 }, { "epoch": 1.1350058727596901, "grad_norm": 0.012735763564705849, "learning_rate": 4.6946792004290765e-06, "loss": 0.0008, "step": 67160 }, { "epoch": 1.1351748730385405, "grad_norm": 0.05037974193692207, "learning_rate": 4.693207136804244e-06, "loss": 0.0009, "step": 67170 }, { "epoch": 1.135343873317391, "grad_norm": 0.022585468366742134, "learning_rate": 4.691735099871878e-06, "loss": 0.0008, "step": 67180 }, { "epoch": 1.1355128735962414, "grad_norm": 0.06173543259501457, "learning_rate": 4.690263089760051e-06, "loss": 0.0011, "step": 67190 }, { "epoch": 1.1356818738750918, "grad_norm": 0.020044632256031036, "learning_rate": 4.688791106596837e-06, "loss": 0.0007, "step": 67200 }, { "epoch": 1.1358508741539424, "grad_norm": 0.15761317312717438, "learning_rate": 4.687319150510304e-06, "loss": 0.0036, "step": 67210 }, { "epoch": 1.1360198744327927, "grad_norm": 0.006815786939114332, "learning_rate": 4.68584722162852e-06, "loss": 0.0006, "step": 67220 }, { "epoch": 1.1361888747116433, "grad_norm": 0.04175468906760216, "learning_rate": 4.684375320079548e-06, "loss": 0.0016, "step": 67230 }, { "epoch": 1.1363578749904937, "grad_norm": 0.24287596344947815, "learning_rate": 4.682903445991456e-06, "loss": 0.0011, "step": 67240 }, { "epoch": 1.1365268752693443, "grad_norm": 0.05699858069419861, "learning_rate": 4.681431599492297e-06, "loss": 0.0008, "step": 67250 }, { "epoch": 1.1366958755481946, "grad_norm": 0.06530001759529114, "learning_rate": 4.679959780710136e-06, "loss": 0.0012, "step": 67260 }, { "epoch": 1.1368648758270452, "grad_norm": 0.08941524475812912, "learning_rate": 4.6784879897730215e-06, "loss": 0.0018, "step": 67270 }, { "epoch": 1.1370338761058956, "grad_norm": 0.04054303839802742, "learning_rate": 4.677016226809012e-06, "loss": 0.0015, "step": 67280 }, { "epoch": 1.137202876384746, "grad_norm": 0.061838019639253616, "learning_rate": 4.675544491946154e-06, "loss": 0.0008, "step": 67290 }, { "epoch": 1.1373718766635965, "grad_norm": 0.1300504505634308, "learning_rate": 4.674072785312497e-06, "loss": 0.0033, "step": 67300 }, { "epoch": 1.1375408769424469, "grad_norm": 0.010322902351617813, "learning_rate": 4.672601107036088e-06, "loss": 0.0005, "step": 67310 }, { "epoch": 1.1377098772212975, "grad_norm": 0.04041688144207001, "learning_rate": 4.671129457244968e-06, "loss": 0.0007, "step": 67320 }, { "epoch": 1.1378788775001478, "grad_norm": 0.09284889698028564, "learning_rate": 4.6696578360671785e-06, "loss": 0.0006, "step": 67330 }, { "epoch": 1.1380478777789984, "grad_norm": 0.0779462456703186, "learning_rate": 4.66818624363076e-06, "loss": 0.0008, "step": 67340 }, { "epoch": 1.1382168780578488, "grad_norm": 0.06471201032400131, "learning_rate": 4.666714680063743e-06, "loss": 0.0013, "step": 67350 }, { "epoch": 1.1383858783366994, "grad_norm": 0.0035908652935177088, "learning_rate": 4.665243145494167e-06, "loss": 0.0012, "step": 67360 }, { "epoch": 1.1385548786155497, "grad_norm": 0.1354219913482666, "learning_rate": 4.663771640050056e-06, "loss": 0.0016, "step": 67370 }, { "epoch": 1.1387238788944, "grad_norm": 0.08462052792310715, "learning_rate": 4.662300163859442e-06, "loss": 0.0006, "step": 67380 }, { "epoch": 1.1388928791732507, "grad_norm": 0.00776013545691967, "learning_rate": 4.660828717050352e-06, "loss": 0.0005, "step": 67390 }, { "epoch": 1.139061879452101, "grad_norm": 0.03530941531062126, "learning_rate": 4.659357299750804e-06, "loss": 0.0005, "step": 67400 }, { "epoch": 1.1392308797309516, "grad_norm": 0.022199373692274094, "learning_rate": 4.657885912088824e-06, "loss": 0.0013, "step": 67410 }, { "epoch": 1.139399880009802, "grad_norm": 0.005407856311649084, "learning_rate": 4.656414554192426e-06, "loss": 0.0012, "step": 67420 }, { "epoch": 1.1395688802886526, "grad_norm": 0.04724668711423874, "learning_rate": 4.654943226189627e-06, "loss": 0.0027, "step": 67430 }, { "epoch": 1.139737880567503, "grad_norm": 0.05699315667152405, "learning_rate": 4.653471928208437e-06, "loss": 0.0013, "step": 67440 }, { "epoch": 1.1399068808463535, "grad_norm": 0.0339827723801136, "learning_rate": 4.652000660376872e-06, "loss": 0.0011, "step": 67450 }, { "epoch": 1.1400758811252039, "grad_norm": 0.02883787266910076, "learning_rate": 4.650529422822932e-06, "loss": 0.001, "step": 67460 }, { "epoch": 1.1402448814040542, "grad_norm": 0.009684630669653416, "learning_rate": 4.6490582156746285e-06, "loss": 0.0011, "step": 67470 }, { "epoch": 1.1404138816829048, "grad_norm": 0.023157119750976562, "learning_rate": 4.647587039059958e-06, "loss": 0.0016, "step": 67480 }, { "epoch": 1.1405828819617552, "grad_norm": 0.07178868353366852, "learning_rate": 4.646115893106926e-06, "loss": 0.0015, "step": 67490 }, { "epoch": 1.1407518822406058, "grad_norm": 0.16174933314323425, "learning_rate": 4.644644777943522e-06, "loss": 0.0017, "step": 67500 }, { "epoch": 1.1409208825194561, "grad_norm": 0.06819438189268112, "learning_rate": 4.643173693697747e-06, "loss": 0.0012, "step": 67510 }, { "epoch": 1.1410898827983067, "grad_norm": 0.12232200801372528, "learning_rate": 4.641702640497587e-06, "loss": 0.0012, "step": 67520 }, { "epoch": 1.141258883077157, "grad_norm": 0.00984139647334814, "learning_rate": 4.640231618471036e-06, "loss": 0.001, "step": 67530 }, { "epoch": 1.1414278833560076, "grad_norm": 0.05852857604622841, "learning_rate": 4.638760627746075e-06, "loss": 0.0015, "step": 67540 }, { "epoch": 1.141596883634858, "grad_norm": 0.03056730516254902, "learning_rate": 4.637289668450692e-06, "loss": 0.0007, "step": 67550 }, { "epoch": 1.1417658839137084, "grad_norm": 0.062109462916851044, "learning_rate": 4.6358187407128625e-06, "loss": 0.0011, "step": 67560 }, { "epoch": 1.141934884192559, "grad_norm": 0.006442485377192497, "learning_rate": 4.63434784466057e-06, "loss": 0.0018, "step": 67570 }, { "epoch": 1.1421038844714093, "grad_norm": 0.06605667620897293, "learning_rate": 4.6328769804217835e-06, "loss": 0.0004, "step": 67580 }, { "epoch": 1.14227288475026, "grad_norm": 0.06362783908843994, "learning_rate": 4.63140614812448e-06, "loss": 0.001, "step": 67590 }, { "epoch": 1.1424418850291103, "grad_norm": 0.040291350334882736, "learning_rate": 4.6299353478966275e-06, "loss": 0.001, "step": 67600 }, { "epoch": 1.1426108853079608, "grad_norm": 0.15025590360164642, "learning_rate": 4.628464579866192e-06, "loss": 0.0021, "step": 67610 }, { "epoch": 1.1427798855868112, "grad_norm": 0.07118073850870132, "learning_rate": 4.626993844161139e-06, "loss": 0.0008, "step": 67620 }, { "epoch": 1.1429488858656618, "grad_norm": 0.29182419180870056, "learning_rate": 4.625523140909427e-06, "loss": 0.001, "step": 67630 }, { "epoch": 1.1431178861445122, "grad_norm": 0.013745423406362534, "learning_rate": 4.624052470239019e-06, "loss": 0.0014, "step": 67640 }, { "epoch": 1.1432868864233625, "grad_norm": 0.08487940579652786, "learning_rate": 4.6225818322778655e-06, "loss": 0.0009, "step": 67650 }, { "epoch": 1.143455886702213, "grad_norm": 0.04990014806389809, "learning_rate": 4.6211112271539235e-06, "loss": 0.0008, "step": 67660 }, { "epoch": 1.1436248869810635, "grad_norm": 0.011304205283522606, "learning_rate": 4.619640654995138e-06, "loss": 0.0009, "step": 67670 }, { "epoch": 1.143793887259914, "grad_norm": 0.09348660707473755, "learning_rate": 4.6181701159294605e-06, "loss": 0.0016, "step": 67680 }, { "epoch": 1.1439628875387644, "grad_norm": 0.016546789556741714, "learning_rate": 4.616699610084831e-06, "loss": 0.0009, "step": 67690 }, { "epoch": 1.144131887817615, "grad_norm": 0.03539736196398735, "learning_rate": 4.615229137589193e-06, "loss": 0.0017, "step": 67700 }, { "epoch": 1.1443008880964654, "grad_norm": 0.09660420566797256, "learning_rate": 4.613758698570485e-06, "loss": 0.0025, "step": 67710 }, { "epoch": 1.144469888375316, "grad_norm": 0.01964586041867733, "learning_rate": 4.612288293156642e-06, "loss": 0.0008, "step": 67720 }, { "epoch": 1.1446388886541663, "grad_norm": 0.10755769908428192, "learning_rate": 4.610817921475595e-06, "loss": 0.0008, "step": 67730 }, { "epoch": 1.1448078889330167, "grad_norm": 0.040303055197000504, "learning_rate": 4.609347583655275e-06, "loss": 0.0009, "step": 67740 }, { "epoch": 1.1449768892118672, "grad_norm": 0.04203404486179352, "learning_rate": 4.607877279823607e-06, "loss": 0.0009, "step": 67750 }, { "epoch": 1.1451458894907176, "grad_norm": 0.044235777109861374, "learning_rate": 4.606407010108518e-06, "loss": 0.001, "step": 67760 }, { "epoch": 1.1453148897695682, "grad_norm": 0.061796385794878006, "learning_rate": 4.604936774637923e-06, "loss": 0.0009, "step": 67770 }, { "epoch": 1.1454838900484186, "grad_norm": 0.03359169140458107, "learning_rate": 4.603466573539745e-06, "loss": 0.0007, "step": 67780 }, { "epoch": 1.1456528903272691, "grad_norm": 0.012582000344991684, "learning_rate": 4.601996406941895e-06, "loss": 0.0012, "step": 67790 }, { "epoch": 1.1458218906061195, "grad_norm": 0.07218701392412186, "learning_rate": 4.600526274972287e-06, "loss": 0.0011, "step": 67800 }, { "epoch": 1.14599089088497, "grad_norm": 0.033153899013996124, "learning_rate": 4.599056177758827e-06, "loss": 0.0009, "step": 67810 }, { "epoch": 1.1461598911638204, "grad_norm": 0.08208203315734863, "learning_rate": 4.597586115429424e-06, "loss": 0.0015, "step": 67820 }, { "epoch": 1.1463288914426708, "grad_norm": 0.05478335916996002, "learning_rate": 4.596116088111977e-06, "loss": 0.0006, "step": 67830 }, { "epoch": 1.1464978917215214, "grad_norm": 0.021915815770626068, "learning_rate": 4.594646095934389e-06, "loss": 0.0017, "step": 67840 }, { "epoch": 1.1466668920003718, "grad_norm": 0.023021148517727852, "learning_rate": 4.593176139024553e-06, "loss": 0.0032, "step": 67850 }, { "epoch": 1.1468358922792223, "grad_norm": 0.0049620214849710464, "learning_rate": 4.591706217510366e-06, "loss": 0.0011, "step": 67860 }, { "epoch": 1.1470048925580727, "grad_norm": 0.05627528950572014, "learning_rate": 4.590236331519714e-06, "loss": 0.0007, "step": 67870 }, { "epoch": 1.147173892836923, "grad_norm": 0.061663877218961716, "learning_rate": 4.588766481180487e-06, "loss": 0.0011, "step": 67880 }, { "epoch": 1.1473428931157736, "grad_norm": 0.08341017365455627, "learning_rate": 4.587296666620569e-06, "loss": 0.0021, "step": 67890 }, { "epoch": 1.147511893394624, "grad_norm": 0.012174475006759167, "learning_rate": 4.585826887967841e-06, "loss": 0.0006, "step": 67900 }, { "epoch": 1.1476808936734746, "grad_norm": 0.04059426859021187, "learning_rate": 4.584357145350181e-06, "loss": 0.0018, "step": 67910 }, { "epoch": 1.147849893952325, "grad_norm": 0.05350024253129959, "learning_rate": 4.5828874388954605e-06, "loss": 0.0009, "step": 67920 }, { "epoch": 1.1480188942311755, "grad_norm": 0.04600991681218147, "learning_rate": 4.581417768731558e-06, "loss": 0.0011, "step": 67930 }, { "epoch": 1.148187894510026, "grad_norm": 0.014942926354706287, "learning_rate": 4.579948134986334e-06, "loss": 0.0008, "step": 67940 }, { "epoch": 1.1483568947888765, "grad_norm": 0.02997707575559616, "learning_rate": 4.578478537787659e-06, "loss": 0.0012, "step": 67950 }, { "epoch": 1.1485258950677268, "grad_norm": 0.0712277963757515, "learning_rate": 4.577008977263393e-06, "loss": 0.0006, "step": 67960 }, { "epoch": 1.1486948953465772, "grad_norm": 0.020164404064416885, "learning_rate": 4.5755394535413976e-06, "loss": 0.0009, "step": 67970 }, { "epoch": 1.1488638956254278, "grad_norm": 0.06729643791913986, "learning_rate": 4.574069966749523e-06, "loss": 0.0012, "step": 67980 }, { "epoch": 1.1490328959042782, "grad_norm": 0.00353659363463521, "learning_rate": 4.572600517015627e-06, "loss": 0.0006, "step": 67990 }, { "epoch": 1.1492018961831287, "grad_norm": 0.09982866048812866, "learning_rate": 4.571131104467555e-06, "loss": 0.0013, "step": 68000 }, { "epoch": 1.149370896461979, "grad_norm": 0.016557859256863594, "learning_rate": 4.569661729233158e-06, "loss": 0.0009, "step": 68010 }, { "epoch": 1.1495398967408297, "grad_norm": 0.028532059863209724, "learning_rate": 4.568192391440272e-06, "loss": 0.0007, "step": 68020 }, { "epoch": 1.14970889701968, "grad_norm": 0.027837703004479408, "learning_rate": 4.566723091216743e-06, "loss": 0.0011, "step": 68030 }, { "epoch": 1.1498778972985306, "grad_norm": 0.049436330795288086, "learning_rate": 4.565253828690402e-06, "loss": 0.0011, "step": 68040 }, { "epoch": 1.150046897577381, "grad_norm": 0.039981693029403687, "learning_rate": 4.563784603989087e-06, "loss": 0.0009, "step": 68050 }, { "epoch": 1.1502158978562314, "grad_norm": 0.007820505648851395, "learning_rate": 4.562315417240622e-06, "loss": 0.0006, "step": 68060 }, { "epoch": 1.150384898135082, "grad_norm": 0.0816367045044899, "learning_rate": 4.560846268572838e-06, "loss": 0.001, "step": 68070 }, { "epoch": 1.1505538984139323, "grad_norm": 0.0554218627512455, "learning_rate": 4.559377158113557e-06, "loss": 0.0007, "step": 68080 }, { "epoch": 1.1507228986927829, "grad_norm": 0.002819223329424858, "learning_rate": 4.557908085990597e-06, "loss": 0.005, "step": 68090 }, { "epoch": 1.1508918989716332, "grad_norm": 0.04873186722397804, "learning_rate": 4.556439052331775e-06, "loss": 0.0014, "step": 68100 }, { "epoch": 1.1510608992504838, "grad_norm": 0.07898057997226715, "learning_rate": 4.554970057264907e-06, "loss": 0.0011, "step": 68110 }, { "epoch": 1.1512298995293342, "grad_norm": 0.04096459597349167, "learning_rate": 4.5535011009177965e-06, "loss": 0.0011, "step": 68120 }, { "epoch": 1.1513988998081848, "grad_norm": 0.050151173025369644, "learning_rate": 4.552032183418257e-06, "loss": 0.001, "step": 68130 }, { "epoch": 1.1515679000870351, "grad_norm": 0.06796563416719437, "learning_rate": 4.550563304894086e-06, "loss": 0.0009, "step": 68140 }, { "epoch": 1.1517369003658855, "grad_norm": 0.02465634234249592, "learning_rate": 4.549094465473085e-06, "loss": 0.0011, "step": 68150 }, { "epoch": 1.151905900644736, "grad_norm": 0.033169254660606384, "learning_rate": 4.547625665283051e-06, "loss": 0.0004, "step": 68160 }, { "epoch": 1.1520749009235864, "grad_norm": 0.018418651074171066, "learning_rate": 4.546156904451775e-06, "loss": 0.0018, "step": 68170 }, { "epoch": 1.152243901202437, "grad_norm": 0.0853998214006424, "learning_rate": 4.544688183107048e-06, "loss": 0.0009, "step": 68180 }, { "epoch": 1.1524129014812874, "grad_norm": 0.01995234563946724, "learning_rate": 4.5432195013766555e-06, "loss": 0.0019, "step": 68190 }, { "epoch": 1.152581901760138, "grad_norm": 0.0021944534964859486, "learning_rate": 4.541750859388379e-06, "loss": 0.0009, "step": 68200 }, { "epoch": 1.1527509020389883, "grad_norm": 0.06678059697151184, "learning_rate": 4.5402822572699976e-06, "loss": 0.0013, "step": 68210 }, { "epoch": 1.152919902317839, "grad_norm": 0.03664885088801384, "learning_rate": 4.538813695149289e-06, "loss": 0.0009, "step": 68220 }, { "epoch": 1.1530889025966893, "grad_norm": 0.05517364665865898, "learning_rate": 4.537345173154021e-06, "loss": 0.0006, "step": 68230 }, { "epoch": 1.1532579028755396, "grad_norm": 0.06198059394955635, "learning_rate": 4.535876691411967e-06, "loss": 0.0019, "step": 68240 }, { "epoch": 1.1534269031543902, "grad_norm": 0.012155015021562576, "learning_rate": 4.5344082500508874e-06, "loss": 0.0007, "step": 68250 }, { "epoch": 1.1535959034332406, "grad_norm": 0.04442523047327995, "learning_rate": 4.532939849198547e-06, "loss": 0.0009, "step": 68260 }, { "epoch": 1.1537649037120912, "grad_norm": 0.0031372327357530594, "learning_rate": 4.531471488982702e-06, "loss": 0.0013, "step": 68270 }, { "epoch": 1.1539339039909415, "grad_norm": 0.037043992429971695, "learning_rate": 4.530003169531108e-06, "loss": 0.0006, "step": 68280 }, { "epoch": 1.1541029042697921, "grad_norm": 0.018792105838656425, "learning_rate": 4.528534890971515e-06, "loss": 0.001, "step": 68290 }, { "epoch": 1.1542719045486425, "grad_norm": 0.053301285952329636, "learning_rate": 4.527066653431672e-06, "loss": 0.0012, "step": 68300 }, { "epoch": 1.154440904827493, "grad_norm": 0.18253421783447266, "learning_rate": 4.525598457039319e-06, "loss": 0.0008, "step": 68310 }, { "epoch": 1.1546099051063434, "grad_norm": 0.04951026290655136, "learning_rate": 4.524130301922203e-06, "loss": 0.0018, "step": 68320 }, { "epoch": 1.1547789053851938, "grad_norm": 0.06628241389989853, "learning_rate": 4.522662188208052e-06, "loss": 0.0008, "step": 68330 }, { "epoch": 1.1549479056640444, "grad_norm": 0.018393343314528465, "learning_rate": 4.521194116024607e-06, "loss": 0.0006, "step": 68340 }, { "epoch": 1.1551169059428947, "grad_norm": 0.10031246393918991, "learning_rate": 4.519726085499591e-06, "loss": 0.0017, "step": 68350 }, { "epoch": 1.1552859062217453, "grad_norm": 0.012092884629964828, "learning_rate": 4.518258096760734e-06, "loss": 0.001, "step": 68360 }, { "epoch": 1.1554549065005957, "grad_norm": 0.08911342918872833, "learning_rate": 4.5167901499357565e-06, "loss": 0.0015, "step": 68370 }, { "epoch": 1.1556239067794463, "grad_norm": 0.07730668783187866, "learning_rate": 4.515322245152377e-06, "loss": 0.001, "step": 68380 }, { "epoch": 1.1557929070582966, "grad_norm": 0.05149351805448532, "learning_rate": 4.513854382538309e-06, "loss": 0.0011, "step": 68390 }, { "epoch": 1.1559619073371472, "grad_norm": 0.05476520210504532, "learning_rate": 4.512386562221266e-06, "loss": 0.0018, "step": 68400 }, { "epoch": 1.1561309076159976, "grad_norm": 1.653435230255127, "learning_rate": 4.510918784328956e-06, "loss": 0.0016, "step": 68410 }, { "epoch": 1.156299907894848, "grad_norm": 0.015039338730275631, "learning_rate": 4.50945104898908e-06, "loss": 0.0009, "step": 68420 }, { "epoch": 1.1564689081736985, "grad_norm": 0.03901722654700279, "learning_rate": 4.507983356329341e-06, "loss": 0.0008, "step": 68430 }, { "epoch": 1.1566379084525489, "grad_norm": 0.07246369868516922, "learning_rate": 4.506515706477433e-06, "loss": 0.0007, "step": 68440 }, { "epoch": 1.1568069087313995, "grad_norm": 0.252845823764801, "learning_rate": 4.50504809956105e-06, "loss": 0.0006, "step": 68450 }, { "epoch": 1.1569759090102498, "grad_norm": 0.028666259720921516, "learning_rate": 4.503580535707879e-06, "loss": 0.0013, "step": 68460 }, { "epoch": 1.1571449092891004, "grad_norm": 0.02905077487230301, "learning_rate": 4.502113015045608e-06, "loss": 0.0007, "step": 68470 }, { "epoch": 1.1573139095679508, "grad_norm": 0.037265345454216, "learning_rate": 4.500645537701915e-06, "loss": 0.0006, "step": 68480 }, { "epoch": 1.1574829098468014, "grad_norm": 0.03981657326221466, "learning_rate": 4.499178103804483e-06, "loss": 0.0011, "step": 68490 }, { "epoch": 1.1576519101256517, "grad_norm": 0.0667591542005539, "learning_rate": 4.4977107134809796e-06, "loss": 0.0009, "step": 68500 }, { "epoch": 1.157820910404502, "grad_norm": 0.025540033355355263, "learning_rate": 4.49624336685908e-06, "loss": 0.0008, "step": 68510 }, { "epoch": 1.1579899106833527, "grad_norm": 0.03754876181483269, "learning_rate": 4.4947760640664465e-06, "loss": 0.0007, "step": 68520 }, { "epoch": 1.158158910962203, "grad_norm": 0.07566549628973007, "learning_rate": 4.493308805230745e-06, "loss": 0.0014, "step": 68530 }, { "epoch": 1.1583279112410536, "grad_norm": 0.043285906314849854, "learning_rate": 4.49184159047963e-06, "loss": 0.0004, "step": 68540 }, { "epoch": 1.158496911519904, "grad_norm": 0.008623643778264523, "learning_rate": 4.49037441994076e-06, "loss": 0.0007, "step": 68550 }, { "epoch": 1.1586659117987546, "grad_norm": 0.09939569234848022, "learning_rate": 4.488907293741785e-06, "loss": 0.0007, "step": 68560 }, { "epoch": 1.158834912077605, "grad_norm": 0.05292747914791107, "learning_rate": 4.487440212010352e-06, "loss": 0.0016, "step": 68570 }, { "epoch": 1.1590039123564555, "grad_norm": 0.03995686396956444, "learning_rate": 4.485973174874102e-06, "loss": 0.0006, "step": 68580 }, { "epoch": 1.1591729126353059, "grad_norm": 0.02431139536201954, "learning_rate": 4.484506182460679e-06, "loss": 0.0013, "step": 68590 }, { "epoch": 1.1593419129141562, "grad_norm": 0.07645460963249207, "learning_rate": 4.483039234897713e-06, "loss": 0.002, "step": 68600 }, { "epoch": 1.1595109131930068, "grad_norm": 0.012741954997181892, "learning_rate": 4.481572332312842e-06, "loss": 0.0016, "step": 68610 }, { "epoch": 1.1596799134718572, "grad_norm": 0.02713189460337162, "learning_rate": 4.480105474833686e-06, "loss": 0.0015, "step": 68620 }, { "epoch": 1.1598489137507078, "grad_norm": 0.07144239544868469, "learning_rate": 4.478638662587876e-06, "loss": 0.0028, "step": 68630 }, { "epoch": 1.1600179140295581, "grad_norm": 0.018587511032819748, "learning_rate": 4.477171895703026e-06, "loss": 0.0016, "step": 68640 }, { "epoch": 1.1601869143084087, "grad_norm": 0.13099528849124908, "learning_rate": 4.475705174306754e-06, "loss": 0.0011, "step": 68650 }, { "epoch": 1.160355914587259, "grad_norm": 0.0970952957868576, "learning_rate": 4.474238498526673e-06, "loss": 0.0012, "step": 68660 }, { "epoch": 1.1605249148661096, "grad_norm": 0.014326502569019794, "learning_rate": 4.47277186849039e-06, "loss": 0.0008, "step": 68670 }, { "epoch": 1.16069391514496, "grad_norm": 0.02737031690776348, "learning_rate": 4.47130528432551e-06, "loss": 0.0009, "step": 68680 }, { "epoch": 1.1608629154238104, "grad_norm": 0.04608790948987007, "learning_rate": 4.469838746159629e-06, "loss": 0.0017, "step": 68690 }, { "epoch": 1.161031915702661, "grad_norm": 0.0980270653963089, "learning_rate": 4.468372254120349e-06, "loss": 0.0017, "step": 68700 }, { "epoch": 1.1612009159815113, "grad_norm": 0.012178586795926094, "learning_rate": 4.466905808335256e-06, "loss": 0.0017, "step": 68710 }, { "epoch": 1.161369916260362, "grad_norm": 0.04473332688212395, "learning_rate": 4.465439408931943e-06, "loss": 0.0008, "step": 68720 }, { "epoch": 1.1615389165392123, "grad_norm": 0.006338580511510372, "learning_rate": 4.46397305603799e-06, "loss": 0.0008, "step": 68730 }, { "epoch": 1.1617079168180628, "grad_norm": 0.04377632215619087, "learning_rate": 4.4625067497809795e-06, "loss": 0.0012, "step": 68740 }, { "epoch": 1.1618769170969132, "grad_norm": 0.017771463841199875, "learning_rate": 4.4610404902884845e-06, "loss": 0.0008, "step": 68750 }, { "epoch": 1.1620459173757638, "grad_norm": 0.013027802109718323, "learning_rate": 4.459574277688078e-06, "loss": 0.001, "step": 68760 }, { "epoch": 1.1622149176546142, "grad_norm": 0.01781844161450863, "learning_rate": 4.458108112107328e-06, "loss": 0.0008, "step": 68770 }, { "epoch": 1.1623839179334645, "grad_norm": 0.03677886351943016, "learning_rate": 4.4566419936737995e-06, "loss": 0.0006, "step": 68780 }, { "epoch": 1.162552918212315, "grad_norm": 0.03237110748887062, "learning_rate": 4.455175922515048e-06, "loss": 0.0019, "step": 68790 }, { "epoch": 1.1627219184911655, "grad_norm": 0.1242651417851448, "learning_rate": 4.453709898758633e-06, "loss": 0.0006, "step": 68800 }, { "epoch": 1.162890918770016, "grad_norm": 0.18245325982570648, "learning_rate": 4.452243922532101e-06, "loss": 0.0018, "step": 68810 }, { "epoch": 1.1630599190488664, "grad_norm": 0.018882328644394875, "learning_rate": 4.450777993963004e-06, "loss": 0.0007, "step": 68820 }, { "epoch": 1.1632289193277168, "grad_norm": 0.03818240389227867, "learning_rate": 4.449312113178882e-06, "loss": 0.001, "step": 68830 }, { "epoch": 1.1633979196065674, "grad_norm": 0.06863964349031448, "learning_rate": 4.447846280307274e-06, "loss": 0.0014, "step": 68840 }, { "epoch": 1.1635669198854177, "grad_norm": 0.03688272461295128, "learning_rate": 4.446380495475715e-06, "loss": 0.0013, "step": 68850 }, { "epoch": 1.1637359201642683, "grad_norm": 0.2076423168182373, "learning_rate": 4.444914758811735e-06, "loss": 0.0015, "step": 68860 }, { "epoch": 1.1639049204431187, "grad_norm": 0.0038024107925593853, "learning_rate": 4.44344907044286e-06, "loss": 0.001, "step": 68870 }, { "epoch": 1.1640739207219692, "grad_norm": 0.021800760179758072, "learning_rate": 4.441983430496614e-06, "loss": 0.001, "step": 68880 }, { "epoch": 1.1642429210008196, "grad_norm": 0.007418881636112928, "learning_rate": 4.440517839100512e-06, "loss": 0.0013, "step": 68890 }, { "epoch": 1.1644119212796702, "grad_norm": 0.05220571160316467, "learning_rate": 4.43905229638207e-06, "loss": 0.0012, "step": 68900 }, { "epoch": 1.1645809215585206, "grad_norm": 0.010897345840930939, "learning_rate": 4.437586802468794e-06, "loss": 0.0006, "step": 68910 }, { "epoch": 1.164749921837371, "grad_norm": 0.14263969659805298, "learning_rate": 4.436121357488191e-06, "loss": 0.0031, "step": 68920 }, { "epoch": 1.1649189221162215, "grad_norm": 0.01010989025235176, "learning_rate": 4.434655961567764e-06, "loss": 0.0013, "step": 68930 }, { "epoch": 1.1650879223950719, "grad_norm": 0.027407299727201462, "learning_rate": 4.433190614835006e-06, "loss": 0.0011, "step": 68940 }, { "epoch": 1.1652569226739224, "grad_norm": 0.07728515565395355, "learning_rate": 4.431725317417412e-06, "loss": 0.0015, "step": 68950 }, { "epoch": 1.1654259229527728, "grad_norm": 0.03701232746243477, "learning_rate": 4.430260069442467e-06, "loss": 0.0006, "step": 68960 }, { "epoch": 1.1655949232316234, "grad_norm": 0.029807789251208305, "learning_rate": 4.428794871037659e-06, "loss": 0.0007, "step": 68970 }, { "epoch": 1.1657639235104738, "grad_norm": 0.10100337117910385, "learning_rate": 4.427329722330462e-06, "loss": 0.0011, "step": 68980 }, { "epoch": 1.1659329237893243, "grad_norm": 0.041717350482940674, "learning_rate": 4.425864623448357e-06, "loss": 0.0012, "step": 68990 }, { "epoch": 1.1661019240681747, "grad_norm": 0.3187407851219177, "learning_rate": 4.4243995745188076e-06, "loss": 0.0034, "step": 69000 }, { "epoch": 1.166270924347025, "grad_norm": 0.05901302397251129, "learning_rate": 4.4229345756692875e-06, "loss": 0.0008, "step": 69010 }, { "epoch": 1.1664399246258756, "grad_norm": 0.04284448176622391, "learning_rate": 4.421469627027253e-06, "loss": 0.0008, "step": 69020 }, { "epoch": 1.166608924904726, "grad_norm": 0.019676562398672104, "learning_rate": 4.4200047287201654e-06, "loss": 0.0022, "step": 69030 }, { "epoch": 1.1667779251835766, "grad_norm": 0.0075326645746827126, "learning_rate": 4.418539880875476e-06, "loss": 0.001, "step": 69040 }, { "epoch": 1.166946925462427, "grad_norm": 0.06856024265289307, "learning_rate": 4.4170750836206345e-06, "loss": 0.0014, "step": 69050 }, { "epoch": 1.1671159257412775, "grad_norm": 0.09046686440706253, "learning_rate": 4.415610337083084e-06, "loss": 0.0014, "step": 69060 }, { "epoch": 1.167284926020128, "grad_norm": 0.013177769258618355, "learning_rate": 4.4141456413902676e-06, "loss": 0.0014, "step": 69070 }, { "epoch": 1.1674539262989785, "grad_norm": 0.11559558659791946, "learning_rate": 4.412680996669616e-06, "loss": 0.0012, "step": 69080 }, { "epoch": 1.1676229265778288, "grad_norm": 0.08094480633735657, "learning_rate": 4.411216403048567e-06, "loss": 0.0007, "step": 69090 }, { "epoch": 1.1677919268566792, "grad_norm": 0.05502336844801903, "learning_rate": 4.4097518606545404e-06, "loss": 0.0011, "step": 69100 }, { "epoch": 1.1679609271355298, "grad_norm": 0.049272000789642334, "learning_rate": 4.408287369614965e-06, "loss": 0.0011, "step": 69110 }, { "epoch": 1.1681299274143802, "grad_norm": 0.10433492809534073, "learning_rate": 4.406822930057252e-06, "loss": 0.001, "step": 69120 }, { "epoch": 1.1682989276932307, "grad_norm": 0.009764991700649261, "learning_rate": 4.405358542108819e-06, "loss": 0.0008, "step": 69130 }, { "epoch": 1.168467927972081, "grad_norm": 0.016498325392603874, "learning_rate": 4.4038942058970735e-06, "loss": 0.0006, "step": 69140 }, { "epoch": 1.1686369282509317, "grad_norm": 0.06452282518148422, "learning_rate": 4.402429921549423e-06, "loss": 0.0013, "step": 69150 }, { "epoch": 1.168805928529782, "grad_norm": 0.0777597650885582, "learning_rate": 4.400965689193262e-06, "loss": 0.0008, "step": 69160 }, { "epoch": 1.1689749288086326, "grad_norm": 0.04923969507217407, "learning_rate": 4.399501508955988e-06, "loss": 0.0018, "step": 69170 }, { "epoch": 1.169143929087483, "grad_norm": 0.06249144300818443, "learning_rate": 4.398037380964995e-06, "loss": 0.0011, "step": 69180 }, { "epoch": 1.1693129293663334, "grad_norm": 0.015166563913226128, "learning_rate": 4.396573305347663e-06, "loss": 0.0012, "step": 69190 }, { "epoch": 1.169481929645184, "grad_norm": 0.03178955987095833, "learning_rate": 4.395109282231381e-06, "loss": 0.0006, "step": 69200 }, { "epoch": 1.1696509299240343, "grad_norm": 0.04976994916796684, "learning_rate": 4.393645311743519e-06, "loss": 0.0008, "step": 69210 }, { "epoch": 1.1698199302028849, "grad_norm": 0.02770996280014515, "learning_rate": 4.3921813940114545e-06, "loss": 0.001, "step": 69220 }, { "epoch": 1.1699889304817352, "grad_norm": 0.07976898550987244, "learning_rate": 4.390717529162553e-06, "loss": 0.0009, "step": 69230 }, { "epoch": 1.1701579307605858, "grad_norm": 0.012683488428592682, "learning_rate": 4.389253717324178e-06, "loss": 0.0006, "step": 69240 }, { "epoch": 1.1703269310394362, "grad_norm": 0.006705684121698141, "learning_rate": 4.387789958623689e-06, "loss": 0.001, "step": 69250 }, { "epoch": 1.1704959313182868, "grad_norm": 0.03242988511919975, "learning_rate": 4.386326253188441e-06, "loss": 0.0003, "step": 69260 }, { "epoch": 1.1706649315971371, "grad_norm": 0.04196963831782341, "learning_rate": 4.384862601145781e-06, "loss": 0.0004, "step": 69270 }, { "epoch": 1.1708339318759875, "grad_norm": 0.07474924623966217, "learning_rate": 4.383399002623057e-06, "loss": 0.0011, "step": 69280 }, { "epoch": 1.171002932154838, "grad_norm": 0.01058933325111866, "learning_rate": 4.3819354577476045e-06, "loss": 0.0003, "step": 69290 }, { "epoch": 1.1711719324336884, "grad_norm": 0.08922499418258667, "learning_rate": 4.380471966646765e-06, "loss": 0.0016, "step": 69300 }, { "epoch": 1.171340932712539, "grad_norm": 0.0907360389828682, "learning_rate": 4.3790085294478626e-06, "loss": 0.0014, "step": 69310 }, { "epoch": 1.1715099329913894, "grad_norm": 0.08829468488693237, "learning_rate": 4.377545146278228e-06, "loss": 0.0012, "step": 69320 }, { "epoch": 1.17167893327024, "grad_norm": 0.02312985248863697, "learning_rate": 4.376081817265182e-06, "loss": 0.0012, "step": 69330 }, { "epoch": 1.1718479335490903, "grad_norm": 0.010803410783410072, "learning_rate": 4.3746185425360416e-06, "loss": 0.0007, "step": 69340 }, { "epoch": 1.172016933827941, "grad_norm": 0.23178300261497498, "learning_rate": 4.373155322218116e-06, "loss": 0.0015, "step": 69350 }, { "epoch": 1.1721859341067913, "grad_norm": 0.01983078569173813, "learning_rate": 4.371692156438717e-06, "loss": 0.0005, "step": 69360 }, { "epoch": 1.1723549343856416, "grad_norm": 0.014242745004594326, "learning_rate": 4.370229045325142e-06, "loss": 0.0009, "step": 69370 }, { "epoch": 1.1725239346644922, "grad_norm": 0.03428000211715698, "learning_rate": 4.368765989004695e-06, "loss": 0.0006, "step": 69380 }, { "epoch": 1.1726929349433426, "grad_norm": 0.015881333500146866, "learning_rate": 4.3673029876046625e-06, "loss": 0.0012, "step": 69390 }, { "epoch": 1.1728619352221932, "grad_norm": 0.022723102942109108, "learning_rate": 4.3658400412523375e-06, "loss": 0.0011, "step": 69400 }, { "epoch": 1.1730309355010435, "grad_norm": 0.024130703881382942, "learning_rate": 4.364377150074998e-06, "loss": 0.0006, "step": 69410 }, { "epoch": 1.1731999357798941, "grad_norm": 0.06383432447910309, "learning_rate": 4.362914314199928e-06, "loss": 0.0039, "step": 69420 }, { "epoch": 1.1733689360587445, "grad_norm": 0.016523024067282677, "learning_rate": 4.3614515337544e-06, "loss": 0.001, "step": 69430 }, { "epoch": 1.173537936337595, "grad_norm": 0.016466936096549034, "learning_rate": 4.359988808865682e-06, "loss": 0.0011, "step": 69440 }, { "epoch": 1.1737069366164454, "grad_norm": 0.023523833602666855, "learning_rate": 4.358526139661039e-06, "loss": 0.0011, "step": 69450 }, { "epoch": 1.1738759368952958, "grad_norm": 0.052278563380241394, "learning_rate": 4.357063526267729e-06, "loss": 0.0009, "step": 69460 }, { "epoch": 1.1740449371741464, "grad_norm": 0.11861348897218704, "learning_rate": 4.355600968813009e-06, "loss": 0.0014, "step": 69470 }, { "epoch": 1.1742139374529967, "grad_norm": 0.028317276388406754, "learning_rate": 4.354138467424125e-06, "loss": 0.0013, "step": 69480 }, { "epoch": 1.1743829377318473, "grad_norm": 0.04897003620862961, "learning_rate": 4.352676022228326e-06, "loss": 0.0012, "step": 69490 }, { "epoch": 1.1745519380106977, "grad_norm": 0.029942458495497704, "learning_rate": 4.351213633352846e-06, "loss": 0.0017, "step": 69500 }, { "epoch": 1.1747209382895483, "grad_norm": 0.026986481621861458, "learning_rate": 4.349751300924926e-06, "loss": 0.001, "step": 69510 }, { "epoch": 1.1748899385683986, "grad_norm": 0.03264378383755684, "learning_rate": 4.348289025071792e-06, "loss": 0.0009, "step": 69520 }, { "epoch": 1.1750589388472492, "grad_norm": 0.038534559309482574, "learning_rate": 4.346826805920671e-06, "loss": 0.0012, "step": 69530 }, { "epoch": 1.1752279391260996, "grad_norm": 0.05850238725543022, "learning_rate": 4.345364643598782e-06, "loss": 0.0012, "step": 69540 }, { "epoch": 1.17539693940495, "grad_norm": 0.04611220955848694, "learning_rate": 4.343902538233342e-06, "loss": 0.0009, "step": 69550 }, { "epoch": 1.1755659396838005, "grad_norm": 0.03227304667234421, "learning_rate": 4.3424404899515584e-06, "loss": 0.0008, "step": 69560 }, { "epoch": 1.1757349399626509, "grad_norm": 0.051997821778059006, "learning_rate": 4.34097849888064e-06, "loss": 0.001, "step": 69570 }, { "epoch": 1.1759039402415015, "grad_norm": 0.055068809539079666, "learning_rate": 4.339516565147783e-06, "loss": 0.0008, "step": 69580 }, { "epoch": 1.1760729405203518, "grad_norm": 0.01576688513159752, "learning_rate": 4.3380546888801875e-06, "loss": 0.0017, "step": 69590 }, { "epoch": 1.1762419407992024, "grad_norm": 0.024856723845005035, "learning_rate": 4.3365928702050395e-06, "loss": 0.0008, "step": 69600 }, { "epoch": 1.1764109410780528, "grad_norm": 0.020589660853147507, "learning_rate": 4.335131109249527e-06, "loss": 0.0011, "step": 69610 }, { "epoch": 1.1765799413569034, "grad_norm": 0.02228492684662342, "learning_rate": 4.333669406140828e-06, "loss": 0.0008, "step": 69620 }, { "epoch": 1.1767489416357537, "grad_norm": 0.033601656556129456, "learning_rate": 4.332207761006121e-06, "loss": 0.0013, "step": 69630 }, { "epoch": 1.176917941914604, "grad_norm": 0.03049769066274166, "learning_rate": 4.330746173972573e-06, "loss": 0.0007, "step": 69640 }, { "epoch": 1.1770869421934547, "grad_norm": 0.07383257150650024, "learning_rate": 4.329284645167351e-06, "loss": 0.0002, "step": 69650 }, { "epoch": 1.177255942472305, "grad_norm": 0.0007665101438760757, "learning_rate": 4.327823174717614e-06, "loss": 0.0011, "step": 69660 }, { "epoch": 1.1774249427511556, "grad_norm": 0.03920375555753708, "learning_rate": 4.326361762750519e-06, "loss": 0.0006, "step": 69670 }, { "epoch": 1.177593943030006, "grad_norm": 0.018308712169528008, "learning_rate": 4.324900409393212e-06, "loss": 0.0007, "step": 69680 }, { "epoch": 1.1777629433088563, "grad_norm": 0.030627572908997536, "learning_rate": 4.3234391147728415e-06, "loss": 0.0018, "step": 69690 }, { "epoch": 1.177931943587707, "grad_norm": 0.042753156274557114, "learning_rate": 4.321977879016547e-06, "loss": 0.0007, "step": 69700 }, { "epoch": 1.1781009438665575, "grad_norm": 0.0701015368103981, "learning_rate": 4.320516702251461e-06, "loss": 0.001, "step": 69710 }, { "epoch": 1.1782699441454079, "grad_norm": 0.031348250806331635, "learning_rate": 4.319055584604714e-06, "loss": 0.0013, "step": 69720 }, { "epoch": 1.1784389444242582, "grad_norm": 0.01220119558274746, "learning_rate": 4.31759452620343e-06, "loss": 0.0008, "step": 69730 }, { "epoch": 1.1786079447031088, "grad_norm": 0.05721985921263695, "learning_rate": 4.316133527174731e-06, "loss": 0.0011, "step": 69740 }, { "epoch": 1.1787769449819592, "grad_norm": 0.04866541922092438, "learning_rate": 4.314672587645726e-06, "loss": 0.0012, "step": 69750 }, { "epoch": 1.1789459452608098, "grad_norm": 0.11219864338636398, "learning_rate": 4.313211707743529e-06, "loss": 0.0018, "step": 69760 }, { "epoch": 1.1791149455396601, "grad_norm": 0.010870439931750298, "learning_rate": 4.311750887595238e-06, "loss": 0.0007, "step": 69770 }, { "epoch": 1.1792839458185105, "grad_norm": 0.032270606607198715, "learning_rate": 4.310290127327957e-06, "loss": 0.0009, "step": 69780 }, { "epoch": 1.179452946097361, "grad_norm": 0.11621737480163574, "learning_rate": 4.308829427068775e-06, "loss": 0.001, "step": 69790 }, { "epoch": 1.1796219463762114, "grad_norm": 0.027848485857248306, "learning_rate": 4.307368786944782e-06, "loss": 0.0013, "step": 69800 }, { "epoch": 1.179790946655062, "grad_norm": 0.015552806667983532, "learning_rate": 4.3059082070830604e-06, "loss": 0.0008, "step": 69810 }, { "epoch": 1.1799599469339124, "grad_norm": 0.09121126681566238, "learning_rate": 4.3044476876106876e-06, "loss": 0.0009, "step": 69820 }, { "epoch": 1.180128947212763, "grad_norm": 0.037971653044223785, "learning_rate": 4.302987228654735e-06, "loss": 0.0008, "step": 69830 }, { "epoch": 1.1802979474916133, "grad_norm": 0.09327095001935959, "learning_rate": 4.301526830342274e-06, "loss": 0.001, "step": 69840 }, { "epoch": 1.180466947770464, "grad_norm": 0.09236549586057663, "learning_rate": 4.3000664928003594e-06, "loss": 0.0009, "step": 69850 }, { "epoch": 1.1806359480493143, "grad_norm": 0.027757450938224792, "learning_rate": 4.2986062161560534e-06, "loss": 0.0012, "step": 69860 }, { "epoch": 1.1808049483281646, "grad_norm": 0.3103329837322235, "learning_rate": 4.297146000536403e-06, "loss": 0.001, "step": 69870 }, { "epoch": 1.1809739486070152, "grad_norm": 0.035489924252033234, "learning_rate": 4.295685846068459e-06, "loss": 0.0005, "step": 69880 }, { "epoch": 1.1811429488858656, "grad_norm": 0.029097232967615128, "learning_rate": 4.2942257528792555e-06, "loss": 0.0014, "step": 69890 }, { "epoch": 1.1813119491647162, "grad_norm": 0.24144315719604492, "learning_rate": 4.292765721095833e-06, "loss": 0.0019, "step": 69900 }, { "epoch": 1.1814809494435665, "grad_norm": 0.022012850269675255, "learning_rate": 4.2913057508452175e-06, "loss": 0.0015, "step": 69910 }, { "epoch": 1.181649949722417, "grad_norm": 0.03142053633928299, "learning_rate": 4.289845842254438e-06, "loss": 0.0017, "step": 69920 }, { "epoch": 1.1818189500012675, "grad_norm": 0.040227316319942474, "learning_rate": 4.2883859954505085e-06, "loss": 0.0012, "step": 69930 }, { "epoch": 1.181987950280118, "grad_norm": 0.055836841464042664, "learning_rate": 4.286926210560446e-06, "loss": 0.0013, "step": 69940 }, { "epoch": 1.1821569505589684, "grad_norm": 0.04443049803376198, "learning_rate": 4.2854664877112595e-06, "loss": 0.0013, "step": 69950 }, { "epoch": 1.1823259508378188, "grad_norm": 0.013825136236846447, "learning_rate": 4.284006827029949e-06, "loss": 0.0016, "step": 69960 }, { "epoch": 1.1824949511166694, "grad_norm": 0.05497096851468086, "learning_rate": 4.2825472286435145e-06, "loss": 0.0007, "step": 69970 }, { "epoch": 1.1826639513955197, "grad_norm": 0.024187199771404266, "learning_rate": 4.281087692678946e-06, "loss": 0.0005, "step": 69980 }, { "epoch": 1.1828329516743703, "grad_norm": 0.05082495138049126, "learning_rate": 4.279628219263235e-06, "loss": 0.001, "step": 69990 }, { "epoch": 1.1830019519532207, "grad_norm": 0.13151904940605164, "learning_rate": 4.278168808523355e-06, "loss": 0.0024, "step": 70000 }, { "epoch": 1.1831709522320712, "grad_norm": 0.004281432367861271, "learning_rate": 4.2767094605862875e-06, "loss": 0.0015, "step": 70010 }, { "epoch": 1.1833399525109216, "grad_norm": 0.01116862054914236, "learning_rate": 4.275250175579e-06, "loss": 0.0016, "step": 70020 }, { "epoch": 1.1835089527897722, "grad_norm": 0.12776902318000793, "learning_rate": 4.273790953628462e-06, "loss": 0.0021, "step": 70030 }, { "epoch": 1.1836779530686226, "grad_norm": 0.002477040281519294, "learning_rate": 4.272331794861627e-06, "loss": 0.0007, "step": 70040 }, { "epoch": 1.183846953347473, "grad_norm": 0.02622845210134983, "learning_rate": 4.270872699405454e-06, "loss": 0.0009, "step": 70050 }, { "epoch": 1.1840159536263235, "grad_norm": 0.04474570229649544, "learning_rate": 4.2694136673868855e-06, "loss": 0.0011, "step": 70060 }, { "epoch": 1.1841849539051739, "grad_norm": 0.037882525473833084, "learning_rate": 4.267954698932871e-06, "loss": 0.001, "step": 70070 }, { "epoch": 1.1843539541840244, "grad_norm": 0.12966550886631012, "learning_rate": 4.266495794170342e-06, "loss": 0.0012, "step": 70080 }, { "epoch": 1.1845229544628748, "grad_norm": 0.023467622697353363, "learning_rate": 4.2650369532262335e-06, "loss": 0.0008, "step": 70090 }, { "epoch": 1.1846919547417254, "grad_norm": 0.07388714700937271, "learning_rate": 4.263578176227471e-06, "loss": 0.002, "step": 70100 }, { "epoch": 1.1848609550205758, "grad_norm": 0.07243242114782333, "learning_rate": 4.2621194633009764e-06, "loss": 0.0008, "step": 70110 }, { "epoch": 1.1850299552994263, "grad_norm": 0.016735875979065895, "learning_rate": 4.260660814573662e-06, "loss": 0.0006, "step": 70120 }, { "epoch": 1.1851989555782767, "grad_norm": 0.05063145235180855, "learning_rate": 4.25920223017244e-06, "loss": 0.0009, "step": 70130 }, { "epoch": 1.185367955857127, "grad_norm": 0.05517589673399925, "learning_rate": 4.257743710224212e-06, "loss": 0.0011, "step": 70140 }, { "epoch": 1.1855369561359776, "grad_norm": 0.004348905757069588, "learning_rate": 4.25628525485588e-06, "loss": 0.0011, "step": 70150 }, { "epoch": 1.185705956414828, "grad_norm": 0.17707771062850952, "learning_rate": 4.254826864194332e-06, "loss": 0.0015, "step": 70160 }, { "epoch": 1.1858749566936786, "grad_norm": 0.0887473076581955, "learning_rate": 4.253368538366458e-06, "loss": 0.0013, "step": 70170 }, { "epoch": 1.186043956972529, "grad_norm": 0.021793309599161148, "learning_rate": 4.251910277499138e-06, "loss": 0.0012, "step": 70180 }, { "epoch": 1.1862129572513795, "grad_norm": 0.031324952840805054, "learning_rate": 4.250452081719248e-06, "loss": 0.0014, "step": 70190 }, { "epoch": 1.18638195753023, "grad_norm": 0.028805961832404137, "learning_rate": 4.2489939511536595e-06, "loss": 0.0009, "step": 70200 }, { "epoch": 1.1865509578090805, "grad_norm": 0.009609505534172058, "learning_rate": 4.247535885929235e-06, "loss": 0.0006, "step": 70210 }, { "epoch": 1.1867199580879308, "grad_norm": 0.023490216583013535, "learning_rate": 4.2460778861728366e-06, "loss": 0.0008, "step": 70220 }, { "epoch": 1.1868889583667812, "grad_norm": 0.0962447077035904, "learning_rate": 4.244619952011312e-06, "loss": 0.0008, "step": 70230 }, { "epoch": 1.1870579586456318, "grad_norm": 0.04374274984002113, "learning_rate": 4.243162083571514e-06, "loss": 0.0004, "step": 70240 }, { "epoch": 1.1872269589244822, "grad_norm": 0.05494653433561325, "learning_rate": 4.24170428098028e-06, "loss": 0.0009, "step": 70250 }, { "epoch": 1.1873959592033327, "grad_norm": 0.017951268702745438, "learning_rate": 4.240246544364449e-06, "loss": 0.0025, "step": 70260 }, { "epoch": 1.187564959482183, "grad_norm": 0.03345295041799545, "learning_rate": 4.238788873850848e-06, "loss": 0.001, "step": 70270 }, { "epoch": 1.1877339597610337, "grad_norm": 0.06794202327728271, "learning_rate": 4.237331269566304e-06, "loss": 0.0011, "step": 70280 }, { "epoch": 1.187902960039884, "grad_norm": 0.2515491247177124, "learning_rate": 4.2358737316376355e-06, "loss": 0.0021, "step": 70290 }, { "epoch": 1.1880719603187346, "grad_norm": 0.03184161335229874, "learning_rate": 4.234416260191654e-06, "loss": 0.0013, "step": 70300 }, { "epoch": 1.188240960597585, "grad_norm": 0.11049087345600128, "learning_rate": 4.232958855355166e-06, "loss": 0.001, "step": 70310 }, { "epoch": 1.1884099608764354, "grad_norm": 0.06749023497104645, "learning_rate": 4.231501517254977e-06, "loss": 0.0006, "step": 70320 }, { "epoch": 1.188578961155286, "grad_norm": 0.014731463976204395, "learning_rate": 4.2300442460178766e-06, "loss": 0.0012, "step": 70330 }, { "epoch": 1.1887479614341363, "grad_norm": 0.02106318809092045, "learning_rate": 4.228587041770659e-06, "loss": 0.0011, "step": 70340 }, { "epoch": 1.1889169617129869, "grad_norm": 0.06880449503660202, "learning_rate": 4.227129904640105e-06, "loss": 0.001, "step": 70350 }, { "epoch": 1.1890859619918372, "grad_norm": 0.04685996472835541, "learning_rate": 4.225672834752996e-06, "loss": 0.001, "step": 70360 }, { "epoch": 1.1892549622706878, "grad_norm": 0.07952843606472015, "learning_rate": 4.224215832236099e-06, "loss": 0.0011, "step": 70370 }, { "epoch": 1.1894239625495382, "grad_norm": 0.1698327362537384, "learning_rate": 4.222758897216186e-06, "loss": 0.0014, "step": 70380 }, { "epoch": 1.1895929628283888, "grad_norm": 0.09234167635440826, "learning_rate": 4.221302029820013e-06, "loss": 0.0011, "step": 70390 }, { "epoch": 1.1897619631072391, "grad_norm": 0.05689745023846626, "learning_rate": 4.219845230174338e-06, "loss": 0.0011, "step": 70400 }, { "epoch": 1.1899309633860895, "grad_norm": 0.08529359847307205, "learning_rate": 4.2183884984059055e-06, "loss": 0.0009, "step": 70410 }, { "epoch": 1.19009996366494, "grad_norm": 0.009383260272443295, "learning_rate": 4.2169318346414634e-06, "loss": 0.0009, "step": 70420 }, { "epoch": 1.1902689639437904, "grad_norm": 0.07850052416324615, "learning_rate": 4.215475239007744e-06, "loss": 0.0012, "step": 70430 }, { "epoch": 1.190437964222641, "grad_norm": 0.06158560886979103, "learning_rate": 4.214018711631479e-06, "loss": 0.0011, "step": 70440 }, { "epoch": 1.1906069645014914, "grad_norm": 0.04493800550699234, "learning_rate": 4.212562252639397e-06, "loss": 0.0012, "step": 70450 }, { "epoch": 1.190775964780342, "grad_norm": 0.023765722289681435, "learning_rate": 4.211105862158212e-06, "loss": 0.0013, "step": 70460 }, { "epoch": 1.1909449650591923, "grad_norm": 0.022813871502876282, "learning_rate": 4.2096495403146414e-06, "loss": 0.0005, "step": 70470 }, { "epoch": 1.191113965338043, "grad_norm": 0.14413927495479584, "learning_rate": 4.2081932872353884e-06, "loss": 0.0012, "step": 70480 }, { "epoch": 1.1912829656168933, "grad_norm": 0.5556153059005737, "learning_rate": 4.206737103047156e-06, "loss": 0.0008, "step": 70490 }, { "epoch": 1.1914519658957436, "grad_norm": 0.057843245565891266, "learning_rate": 4.205280987876638e-06, "loss": 0.0014, "step": 70500 }, { "epoch": 1.1916209661745942, "grad_norm": 0.01688377559185028, "learning_rate": 4.203824941850527e-06, "loss": 0.0013, "step": 70510 }, { "epoch": 1.1917899664534446, "grad_norm": 0.04101600497961044, "learning_rate": 4.202368965095502e-06, "loss": 0.0013, "step": 70520 }, { "epoch": 1.1919589667322952, "grad_norm": 0.10525278747081757, "learning_rate": 4.2009130577382435e-06, "loss": 0.0009, "step": 70530 }, { "epoch": 1.1921279670111455, "grad_norm": 0.006513623986393213, "learning_rate": 4.199457219905418e-06, "loss": 0.0009, "step": 70540 }, { "epoch": 1.1922969672899961, "grad_norm": 0.03790920600295067, "learning_rate": 4.198001451723696e-06, "loss": 0.0005, "step": 70550 }, { "epoch": 1.1924659675688465, "grad_norm": 0.00907865073531866, "learning_rate": 4.196545753319731e-06, "loss": 0.0004, "step": 70560 }, { "epoch": 1.192634967847697, "grad_norm": 0.0359298475086689, "learning_rate": 4.1950901248201795e-06, "loss": 0.0006, "step": 70570 }, { "epoch": 1.1928039681265474, "grad_norm": 0.008871739730238914, "learning_rate": 4.193634566351687e-06, "loss": 0.0004, "step": 70580 }, { "epoch": 1.1929729684053978, "grad_norm": 0.06354521214962006, "learning_rate": 4.192179078040893e-06, "loss": 0.0009, "step": 70590 }, { "epoch": 1.1931419686842484, "grad_norm": 0.047149330377578735, "learning_rate": 4.190723660014434e-06, "loss": 0.0008, "step": 70600 }, { "epoch": 1.1933109689630987, "grad_norm": 0.08436896651983261, "learning_rate": 4.189268312398938e-06, "loss": 0.0017, "step": 70610 }, { "epoch": 1.1934799692419493, "grad_norm": 0.050835512578487396, "learning_rate": 4.187813035321026e-06, "loss": 0.0009, "step": 70620 }, { "epoch": 1.1936489695207997, "grad_norm": 0.07753974944353104, "learning_rate": 4.186357828907317e-06, "loss": 0.0009, "step": 70630 }, { "epoch": 1.19381796979965, "grad_norm": 0.05515003204345703, "learning_rate": 4.184902693284417e-06, "loss": 0.0009, "step": 70640 }, { "epoch": 1.1939869700785006, "grad_norm": 0.10628211498260498, "learning_rate": 4.1834476285789335e-06, "loss": 0.0011, "step": 70650 }, { "epoch": 1.194155970357351, "grad_norm": 0.02622995153069496, "learning_rate": 4.1819926349174605e-06, "loss": 0.001, "step": 70660 }, { "epoch": 1.1943249706362016, "grad_norm": 0.06788217276334763, "learning_rate": 4.180537712426593e-06, "loss": 0.0011, "step": 70670 }, { "epoch": 1.194493970915052, "grad_norm": 0.0016145723639056087, "learning_rate": 4.179082861232914e-06, "loss": 0.0008, "step": 70680 }, { "epoch": 1.1946629711939025, "grad_norm": 0.012697342783212662, "learning_rate": 4.177628081463005e-06, "loss": 0.001, "step": 70690 }, { "epoch": 1.1948319714727529, "grad_norm": 0.010623389855027199, "learning_rate": 4.176173373243436e-06, "loss": 0.0012, "step": 70700 }, { "epoch": 1.1950009717516035, "grad_norm": 0.04370303824543953, "learning_rate": 4.174718736700774e-06, "loss": 0.002, "step": 70710 }, { "epoch": 1.1951699720304538, "grad_norm": 0.04976026713848114, "learning_rate": 4.173264171961584e-06, "loss": 0.0008, "step": 70720 }, { "epoch": 1.1953389723093042, "grad_norm": 0.04589367285370827, "learning_rate": 4.171809679152414e-06, "loss": 0.0027, "step": 70730 }, { "epoch": 1.1955079725881548, "grad_norm": 0.014074057340621948, "learning_rate": 4.1703552583998165e-06, "loss": 0.0019, "step": 70740 }, { "epoch": 1.1956769728670051, "grad_norm": 0.10194515436887741, "learning_rate": 4.168900909830329e-06, "loss": 0.0012, "step": 70750 }, { "epoch": 1.1958459731458557, "grad_norm": 0.0017360273050144315, "learning_rate": 4.167446633570492e-06, "loss": 0.0009, "step": 70760 }, { "epoch": 1.196014973424706, "grad_norm": 0.05988433584570885, "learning_rate": 4.165992429746829e-06, "loss": 0.0015, "step": 70770 }, { "epoch": 1.1961839737035567, "grad_norm": 0.010143890045583248, "learning_rate": 4.164538298485866e-06, "loss": 0.0015, "step": 70780 }, { "epoch": 1.196352973982407, "grad_norm": 0.023697197437286377, "learning_rate": 4.163084239914119e-06, "loss": 0.001, "step": 70790 }, { "epoch": 1.1965219742612576, "grad_norm": 0.06591352820396423, "learning_rate": 4.1616302541581e-06, "loss": 0.0012, "step": 70800 }, { "epoch": 1.196690974540108, "grad_norm": 0.11869916319847107, "learning_rate": 4.160176341344308e-06, "loss": 0.0018, "step": 70810 }, { "epoch": 1.1968599748189583, "grad_norm": 0.02505866438150406, "learning_rate": 4.158722501599246e-06, "loss": 0.0007, "step": 70820 }, { "epoch": 1.197028975097809, "grad_norm": 0.04185836389660835, "learning_rate": 4.1572687350494e-06, "loss": 0.0011, "step": 70830 }, { "epoch": 1.1971979753766593, "grad_norm": 0.05058329924941063, "learning_rate": 4.155815041821259e-06, "loss": 0.0006, "step": 70840 }, { "epoch": 1.1973669756555099, "grad_norm": 0.013063745573163033, "learning_rate": 4.154361422041298e-06, "loss": 0.0009, "step": 70850 }, { "epoch": 1.1975359759343602, "grad_norm": 0.06879516690969467, "learning_rate": 4.152907875835992e-06, "loss": 0.0014, "step": 70860 }, { "epoch": 1.1977049762132108, "grad_norm": 0.04310673847794533, "learning_rate": 4.151454403331803e-06, "loss": 0.0006, "step": 70870 }, { "epoch": 1.1978739764920612, "grad_norm": 0.03932936117053032, "learning_rate": 4.150001004655195e-06, "loss": 0.001, "step": 70880 }, { "epoch": 1.1980429767709118, "grad_norm": 0.08106826990842819, "learning_rate": 4.1485476799326155e-06, "loss": 0.001, "step": 70890 }, { "epoch": 1.1982119770497621, "grad_norm": 0.03531781956553459, "learning_rate": 4.147094429290516e-06, "loss": 0.0012, "step": 70900 }, { "epoch": 1.1983809773286125, "grad_norm": 0.03120686672627926, "learning_rate": 4.145641252855331e-06, "loss": 0.0009, "step": 70910 }, { "epoch": 1.198549977607463, "grad_norm": 0.010923722758889198, "learning_rate": 4.144188150753498e-06, "loss": 0.0018, "step": 70920 }, { "epoch": 1.1987189778863134, "grad_norm": 0.06399686634540558, "learning_rate": 4.142735123111441e-06, "loss": 0.0008, "step": 70930 }, { "epoch": 1.198887978165164, "grad_norm": 0.024457722902297974, "learning_rate": 4.141282170055583e-06, "loss": 0.0016, "step": 70940 }, { "epoch": 1.1990569784440144, "grad_norm": 0.11252973973751068, "learning_rate": 4.139829291712336e-06, "loss": 0.0013, "step": 70950 }, { "epoch": 1.199225978722865, "grad_norm": 0.061953283846378326, "learning_rate": 4.138376488208108e-06, "loss": 0.0011, "step": 70960 }, { "epoch": 1.1993949790017153, "grad_norm": 0.04622013121843338, "learning_rate": 4.1369237596693005e-06, "loss": 0.0011, "step": 70970 }, { "epoch": 1.199563979280566, "grad_norm": 0.03799670934677124, "learning_rate": 4.135471106222307e-06, "loss": 0.001, "step": 70980 }, { "epoch": 1.1997329795594163, "grad_norm": 0.04052281007170677, "learning_rate": 4.134018527993518e-06, "loss": 0.001, "step": 70990 }, { "epoch": 1.1999019798382666, "grad_norm": 0.1576308012008667, "learning_rate": 4.132566025109311e-06, "loss": 0.0013, "step": 71000 }, { "epoch": 1.2000709801171172, "grad_norm": 0.03853791952133179, "learning_rate": 4.131113597696064e-06, "loss": 0.0011, "step": 71010 }, { "epoch": 1.2002399803959676, "grad_norm": 0.04681782424449921, "learning_rate": 4.129661245880143e-06, "loss": 0.001, "step": 71020 }, { "epoch": 1.2004089806748182, "grad_norm": 0.05324620380997658, "learning_rate": 4.128208969787911e-06, "loss": 0.0007, "step": 71030 }, { "epoch": 1.2005779809536685, "grad_norm": 0.023558132350444794, "learning_rate": 4.1267567695457215e-06, "loss": 0.0015, "step": 71040 }, { "epoch": 1.200746981232519, "grad_norm": 0.04994186758995056, "learning_rate": 4.125304645279925e-06, "loss": 0.0009, "step": 71050 }, { "epoch": 1.2009159815113695, "grad_norm": 0.014957442879676819, "learning_rate": 4.123852597116862e-06, "loss": 0.0022, "step": 71060 }, { "epoch": 1.20108498179022, "grad_norm": 0.019813815131783485, "learning_rate": 4.122400625182868e-06, "loss": 0.0008, "step": 71070 }, { "epoch": 1.2012539820690704, "grad_norm": 0.08933929353952408, "learning_rate": 4.1209487296042715e-06, "loss": 0.0015, "step": 71080 }, { "epoch": 1.2014229823479208, "grad_norm": 0.15247413516044617, "learning_rate": 4.119496910507397e-06, "loss": 0.0028, "step": 71090 }, { "epoch": 1.2015919826267714, "grad_norm": 0.017764583230018616, "learning_rate": 4.118045168018554e-06, "loss": 0.0009, "step": 71100 }, { "epoch": 1.2017609829056217, "grad_norm": 0.017530683428049088, "learning_rate": 4.116593502264057e-06, "loss": 0.0005, "step": 71110 }, { "epoch": 1.2019299831844723, "grad_norm": 0.016762100160121918, "learning_rate": 4.115141913370203e-06, "loss": 0.0007, "step": 71120 }, { "epoch": 1.2020989834633227, "grad_norm": 0.07180909067392349, "learning_rate": 4.113690401463293e-06, "loss": 0.0015, "step": 71130 }, { "epoch": 1.2022679837421733, "grad_norm": 0.005118554458022118, "learning_rate": 4.1122389666696095e-06, "loss": 0.0012, "step": 71140 }, { "epoch": 1.2024369840210236, "grad_norm": 0.03537483513355255, "learning_rate": 4.110787609115439e-06, "loss": 0.0006, "step": 71150 }, { "epoch": 1.2026059842998742, "grad_norm": 0.031238961964845657, "learning_rate": 4.109336328927052e-06, "loss": 0.0012, "step": 71160 }, { "epoch": 1.2027749845787246, "grad_norm": 0.00018730736337602139, "learning_rate": 4.107885126230724e-06, "loss": 0.001, "step": 71170 }, { "epoch": 1.202943984857575, "grad_norm": 0.030509380623698235, "learning_rate": 4.106434001152708e-06, "loss": 0.0014, "step": 71180 }, { "epoch": 1.2031129851364255, "grad_norm": 0.029428528621792793, "learning_rate": 4.104982953819267e-06, "loss": 0.0005, "step": 71190 }, { "epoch": 1.2032819854152759, "grad_norm": 0.07416586577892303, "learning_rate": 4.103531984356644e-06, "loss": 0.0008, "step": 71200 }, { "epoch": 1.2034509856941265, "grad_norm": 0.021068410947918892, "learning_rate": 4.102081092891081e-06, "loss": 0.0009, "step": 71210 }, { "epoch": 1.2036199859729768, "grad_norm": 0.01348723191767931, "learning_rate": 4.100630279548815e-06, "loss": 0.0007, "step": 71220 }, { "epoch": 1.2037889862518274, "grad_norm": 0.037767812609672546, "learning_rate": 4.099179544456071e-06, "loss": 0.001, "step": 71230 }, { "epoch": 1.2039579865306778, "grad_norm": 0.0031448148656636477, "learning_rate": 4.097728887739073e-06, "loss": 0.0006, "step": 71240 }, { "epoch": 1.2041269868095283, "grad_norm": 0.023646043613553047, "learning_rate": 4.096278309524031e-06, "loss": 0.0008, "step": 71250 }, { "epoch": 1.2042959870883787, "grad_norm": 0.08618577569723129, "learning_rate": 4.094827809937156e-06, "loss": 0.0009, "step": 71260 }, { "epoch": 1.204464987367229, "grad_norm": 0.2516619861125946, "learning_rate": 4.093377389104646e-06, "loss": 0.0033, "step": 71270 }, { "epoch": 1.2046339876460797, "grad_norm": 0.028300069272518158, "learning_rate": 4.091927047152698e-06, "loss": 0.0005, "step": 71280 }, { "epoch": 1.20480298792493, "grad_norm": 0.0396931953728199, "learning_rate": 4.090476784207495e-06, "loss": 0.0011, "step": 71290 }, { "epoch": 1.2049719882037806, "grad_norm": 0.009809630922973156, "learning_rate": 4.0890266003952206e-06, "loss": 0.0013, "step": 71300 }, { "epoch": 1.205140988482631, "grad_norm": 0.06519563496112823, "learning_rate": 4.087576495842043e-06, "loss": 0.0012, "step": 71310 }, { "epoch": 1.2053099887614815, "grad_norm": 0.010368295945227146, "learning_rate": 4.0861264706741334e-06, "loss": 0.0012, "step": 71320 }, { "epoch": 1.205478989040332, "grad_norm": 0.0036968374624848366, "learning_rate": 4.0846765250176466e-06, "loss": 0.0034, "step": 71330 }, { "epoch": 1.2056479893191825, "grad_norm": 0.0545491985976696, "learning_rate": 4.083226658998738e-06, "loss": 0.001, "step": 71340 }, { "epoch": 1.2058169895980329, "grad_norm": 0.023208390921354294, "learning_rate": 4.081776872743552e-06, "loss": 0.0009, "step": 71350 }, { "epoch": 1.2059859898768832, "grad_norm": 0.012889409437775612, "learning_rate": 4.080327166378227e-06, "loss": 0.0006, "step": 71360 }, { "epoch": 1.2061549901557338, "grad_norm": 0.26329225301742554, "learning_rate": 4.078877540028893e-06, "loss": 0.0008, "step": 71370 }, { "epoch": 1.2063239904345842, "grad_norm": 0.0013760413276031613, "learning_rate": 4.077427993821678e-06, "loss": 0.0012, "step": 71380 }, { "epoch": 1.2064929907134347, "grad_norm": 0.01563761942088604, "learning_rate": 4.075978527882696e-06, "loss": 0.0008, "step": 71390 }, { "epoch": 1.206661990992285, "grad_norm": 0.16220055520534515, "learning_rate": 4.074529142338061e-06, "loss": 0.0015, "step": 71400 }, { "epoch": 1.2068309912711357, "grad_norm": 0.016545753926038742, "learning_rate": 4.073079837313873e-06, "loss": 0.001, "step": 71410 }, { "epoch": 1.206999991549986, "grad_norm": 0.1064077764749527, "learning_rate": 4.0716306129362295e-06, "loss": 0.0011, "step": 71420 }, { "epoch": 1.2071689918288366, "grad_norm": 0.03617030382156372, "learning_rate": 4.070181469331222e-06, "loss": 0.0007, "step": 71430 }, { "epoch": 1.207337992107687, "grad_norm": 0.046915020793676376, "learning_rate": 4.068732406624932e-06, "loss": 0.0009, "step": 71440 }, { "epoch": 1.2075069923865374, "grad_norm": 0.02355279214680195, "learning_rate": 4.067283424943434e-06, "loss": 0.0006, "step": 71450 }, { "epoch": 1.207675992665388, "grad_norm": 0.0377669483423233, "learning_rate": 4.065834524412796e-06, "loss": 0.0006, "step": 71460 }, { "epoch": 1.2078449929442383, "grad_norm": 0.00027997931465506554, "learning_rate": 4.064385705159083e-06, "loss": 0.0006, "step": 71470 }, { "epoch": 1.2080139932230889, "grad_norm": 0.05463608354330063, "learning_rate": 4.062936967308345e-06, "loss": 0.0009, "step": 71480 }, { "epoch": 1.2081829935019393, "grad_norm": 0.045695651322603226, "learning_rate": 4.061488310986633e-06, "loss": 0.0011, "step": 71490 }, { "epoch": 1.2083519937807898, "grad_norm": 0.053160663694143295, "learning_rate": 4.060039736319982e-06, "loss": 0.0011, "step": 71500 }, { "epoch": 1.2085209940596402, "grad_norm": 0.04242270439863205, "learning_rate": 4.0585912434344314e-06, "loss": 0.0018, "step": 71510 }, { "epoch": 1.2086899943384908, "grad_norm": 0.005821699742227793, "learning_rate": 4.057142832456001e-06, "loss": 0.0007, "step": 71520 }, { "epoch": 1.2088589946173411, "grad_norm": 0.04350055754184723, "learning_rate": 4.055694503510715e-06, "loss": 0.001, "step": 71530 }, { "epoch": 1.2090279948961915, "grad_norm": 0.03617621213197708, "learning_rate": 4.054246256724581e-06, "loss": 0.0012, "step": 71540 }, { "epoch": 1.209196995175042, "grad_norm": 0.14375846087932587, "learning_rate": 4.0527980922236055e-06, "loss": 0.0007, "step": 71550 }, { "epoch": 1.2093659954538925, "grad_norm": 0.08785324543714523, "learning_rate": 4.051350010133784e-06, "loss": 0.0011, "step": 71560 }, { "epoch": 1.209534995732743, "grad_norm": 0.10095148533582687, "learning_rate": 4.049902010581111e-06, "loss": 0.0008, "step": 71570 }, { "epoch": 1.2097039960115934, "grad_norm": 0.08452757447957993, "learning_rate": 4.048454093691563e-06, "loss": 0.0013, "step": 71580 }, { "epoch": 1.2098729962904438, "grad_norm": 0.051274657249450684, "learning_rate": 4.047006259591122e-06, "loss": 0.0014, "step": 71590 }, { "epoch": 1.2100419965692943, "grad_norm": 0.015805142000317574, "learning_rate": 4.0455585084057505e-06, "loss": 0.0009, "step": 71600 }, { "epoch": 1.2102109968481447, "grad_norm": 0.033467043191194534, "learning_rate": 4.044110840261417e-06, "loss": 0.0008, "step": 71610 }, { "epoch": 1.2103799971269953, "grad_norm": 0.09230538457632065, "learning_rate": 4.042663255284068e-06, "loss": 0.0011, "step": 71620 }, { "epoch": 1.2105489974058457, "grad_norm": 0.039076339453458786, "learning_rate": 4.041215753599656e-06, "loss": 0.0005, "step": 71630 }, { "epoch": 1.2107179976846962, "grad_norm": 0.05938393250107765, "learning_rate": 4.039768335334119e-06, "loss": 0.0008, "step": 71640 }, { "epoch": 1.2108869979635466, "grad_norm": 0.11329591274261475, "learning_rate": 4.03832100061339e-06, "loss": 0.0016, "step": 71650 }, { "epoch": 1.2110559982423972, "grad_norm": 0.05184169113636017, "learning_rate": 4.036873749563391e-06, "loss": 0.0015, "step": 71660 }, { "epoch": 1.2112249985212475, "grad_norm": 0.016765085980296135, "learning_rate": 4.035426582310045e-06, "loss": 0.0004, "step": 71670 }, { "epoch": 1.211393998800098, "grad_norm": 0.3198559582233429, "learning_rate": 4.033979498979258e-06, "loss": 0.0009, "step": 71680 }, { "epoch": 1.2115629990789485, "grad_norm": 0.012108954600989819, "learning_rate": 4.032532499696936e-06, "loss": 0.0009, "step": 71690 }, { "epoch": 1.2117319993577989, "grad_norm": 0.10403885692358017, "learning_rate": 4.031085584588974e-06, "loss": 0.0008, "step": 71700 }, { "epoch": 1.2119009996366494, "grad_norm": 0.10198567807674408, "learning_rate": 4.02963875378126e-06, "loss": 0.0006, "step": 71710 }, { "epoch": 1.2120699999154998, "grad_norm": 0.03292039781808853, "learning_rate": 4.028192007399676e-06, "loss": 0.0012, "step": 71720 }, { "epoch": 1.2122390001943504, "grad_norm": 0.05963369458913803, "learning_rate": 4.026745345570096e-06, "loss": 0.0015, "step": 71730 }, { "epoch": 1.2124080004732007, "grad_norm": 0.004073624033480883, "learning_rate": 4.025298768418386e-06, "loss": 0.0011, "step": 71740 }, { "epoch": 1.2125770007520513, "grad_norm": 0.05115113779902458, "learning_rate": 4.023852276070405e-06, "loss": 0.0007, "step": 71750 }, { "epoch": 1.2127460010309017, "grad_norm": 0.09283419698476791, "learning_rate": 4.022405868652008e-06, "loss": 0.0009, "step": 71760 }, { "epoch": 1.212915001309752, "grad_norm": 0.07475849241018295, "learning_rate": 4.020959546289035e-06, "loss": 0.0004, "step": 71770 }, { "epoch": 1.2130840015886026, "grad_norm": 0.021336553618311882, "learning_rate": 4.019513309107327e-06, "loss": 0.0012, "step": 71780 }, { "epoch": 1.213253001867453, "grad_norm": 0.07587479054927826, "learning_rate": 4.018067157232709e-06, "loss": 0.0009, "step": 71790 }, { "epoch": 1.2134220021463036, "grad_norm": 0.056975387036800385, "learning_rate": 4.016621090791008e-06, "loss": 0.001, "step": 71800 }, { "epoch": 1.213591002425154, "grad_norm": 0.0031521848868578672, "learning_rate": 4.015175109908036e-06, "loss": 0.0014, "step": 71810 }, { "epoch": 1.2137600027040045, "grad_norm": 0.04272615164518356, "learning_rate": 4.013729214709601e-06, "loss": 0.0011, "step": 71820 }, { "epoch": 1.2139290029828549, "grad_norm": 0.015306469984352589, "learning_rate": 4.012283405321504e-06, "loss": 0.0014, "step": 71830 }, { "epoch": 1.2140980032617055, "grad_norm": 0.1312740296125412, "learning_rate": 4.010837681869535e-06, "loss": 0.001, "step": 71840 }, { "epoch": 1.2142670035405558, "grad_norm": 0.05064448341727257, "learning_rate": 4.0093920444794796e-06, "loss": 0.0011, "step": 71850 }, { "epoch": 1.2144360038194062, "grad_norm": 0.018574142828583717, "learning_rate": 4.007946493277118e-06, "loss": 0.0006, "step": 71860 }, { "epoch": 1.2146050040982568, "grad_norm": 0.04546702653169632, "learning_rate": 4.006501028388215e-06, "loss": 0.0014, "step": 71870 }, { "epoch": 1.2147740043771071, "grad_norm": 0.053701866418123245, "learning_rate": 4.00505564993854e-06, "loss": 0.0011, "step": 71880 }, { "epoch": 1.2149430046559577, "grad_norm": 0.05961509048938751, "learning_rate": 4.003610358053841e-06, "loss": 0.0006, "step": 71890 }, { "epoch": 1.215112004934808, "grad_norm": 0.044699832797050476, "learning_rate": 4.002165152859871e-06, "loss": 0.0009, "step": 71900 }, { "epoch": 1.2152810052136587, "grad_norm": 0.06642137467861176, "learning_rate": 4.000720034482365e-06, "loss": 0.0005, "step": 71910 }, { "epoch": 1.215450005492509, "grad_norm": 0.0566544309258461, "learning_rate": 3.999275003047059e-06, "loss": 0.0011, "step": 71920 }, { "epoch": 1.2156190057713596, "grad_norm": 0.0494023896753788, "learning_rate": 3.997830058679675e-06, "loss": 0.001, "step": 71930 }, { "epoch": 1.21578800605021, "grad_norm": 0.04000876098871231, "learning_rate": 3.996385201505933e-06, "loss": 0.0009, "step": 71940 }, { "epoch": 1.2159570063290603, "grad_norm": 0.008254399523139, "learning_rate": 3.994940431651541e-06, "loss": 0.001, "step": 71950 }, { "epoch": 1.216126006607911, "grad_norm": 0.09354092180728912, "learning_rate": 3.993495749242201e-06, "loss": 0.0007, "step": 71960 }, { "epoch": 1.2162950068867613, "grad_norm": 0.032391320914030075, "learning_rate": 3.992051154403606e-06, "loss": 0.001, "step": 71970 }, { "epoch": 1.2164640071656119, "grad_norm": 0.00970939826220274, "learning_rate": 3.990606647261445e-06, "loss": 0.0009, "step": 71980 }, { "epoch": 1.2166330074444622, "grad_norm": 0.011265194043517113, "learning_rate": 3.989162227941397e-06, "loss": 0.0004, "step": 71990 }, { "epoch": 1.2168020077233128, "grad_norm": 0.12789775431156158, "learning_rate": 3.987717896569132e-06, "loss": 0.0011, "step": 72000 }, { "epoch": 1.2169710080021632, "grad_norm": 0.016483772546052933, "learning_rate": 3.986273653270315e-06, "loss": 0.0018, "step": 72010 }, { "epoch": 1.2171400082810138, "grad_norm": 0.04038587212562561, "learning_rate": 3.984829498170602e-06, "loss": 0.0007, "step": 72020 }, { "epoch": 1.2173090085598641, "grad_norm": 0.01905478537082672, "learning_rate": 3.983385431395641e-06, "loss": 0.0005, "step": 72030 }, { "epoch": 1.2174780088387145, "grad_norm": 0.01647147536277771, "learning_rate": 3.981941453071072e-06, "loss": 0.0004, "step": 72040 }, { "epoch": 1.217647009117565, "grad_norm": 0.06504178792238235, "learning_rate": 3.9804975633225315e-06, "loss": 0.001, "step": 72050 }, { "epoch": 1.2178160093964154, "grad_norm": 0.0017739549512043595, "learning_rate": 3.979053762275641e-06, "loss": 0.0011, "step": 72060 }, { "epoch": 1.217985009675266, "grad_norm": 0.05531751736998558, "learning_rate": 3.977610050056021e-06, "loss": 0.002, "step": 72070 }, { "epoch": 1.2181540099541164, "grad_norm": 0.090264230966568, "learning_rate": 3.976166426789279e-06, "loss": 0.0012, "step": 72080 }, { "epoch": 1.218323010232967, "grad_norm": 0.02547847293317318, "learning_rate": 3.97472289260102e-06, "loss": 0.0017, "step": 72090 }, { "epoch": 1.2184920105118173, "grad_norm": 0.018822619691491127, "learning_rate": 3.973279447616834e-06, "loss": 0.0013, "step": 72100 }, { "epoch": 1.218661010790668, "grad_norm": 0.08224533498287201, "learning_rate": 3.971836091962314e-06, "loss": 0.001, "step": 72110 }, { "epoch": 1.2188300110695183, "grad_norm": 0.07459520548582077, "learning_rate": 3.9703928257630325e-06, "loss": 0.001, "step": 72120 }, { "epoch": 1.2189990113483686, "grad_norm": 0.024879854172468185, "learning_rate": 3.968949649144566e-06, "loss": 0.001, "step": 72130 }, { "epoch": 1.2191680116272192, "grad_norm": 0.041853051632642746, "learning_rate": 3.9675065622324746e-06, "loss": 0.0007, "step": 72140 }, { "epoch": 1.2193370119060696, "grad_norm": 0.00012978816812392324, "learning_rate": 3.966063565152316e-06, "loss": 0.0012, "step": 72150 }, { "epoch": 1.2195060121849202, "grad_norm": 0.19226211309432983, "learning_rate": 3.964620658029635e-06, "loss": 0.0015, "step": 72160 }, { "epoch": 1.2196750124637705, "grad_norm": 0.0453561507165432, "learning_rate": 3.963177840989975e-06, "loss": 0.0009, "step": 72170 }, { "epoch": 1.219844012742621, "grad_norm": 0.049020733684301376, "learning_rate": 3.961735114158864e-06, "loss": 0.0011, "step": 72180 }, { "epoch": 1.2200130130214715, "grad_norm": 0.07267977297306061, "learning_rate": 3.960292477661831e-06, "loss": 0.0011, "step": 72190 }, { "epoch": 1.220182013300322, "grad_norm": 0.00011391971202101558, "learning_rate": 3.958849931624389e-06, "loss": 0.0023, "step": 72200 }, { "epoch": 1.2203510135791724, "grad_norm": 0.006536595989018679, "learning_rate": 3.957407476172047e-06, "loss": 0.001, "step": 72210 }, { "epoch": 1.2205200138580228, "grad_norm": 0.12899255752563477, "learning_rate": 3.955965111430306e-06, "loss": 0.0025, "step": 72220 }, { "epoch": 1.2206890141368734, "grad_norm": 0.02313540130853653, "learning_rate": 3.954522837524658e-06, "loss": 0.0009, "step": 72230 }, { "epoch": 1.2208580144157237, "grad_norm": 0.020590925589203835, "learning_rate": 3.953080654580591e-06, "loss": 0.0012, "step": 72240 }, { "epoch": 1.2210270146945743, "grad_norm": 0.08852645754814148, "learning_rate": 3.951638562723577e-06, "loss": 0.0011, "step": 72250 }, { "epoch": 1.2211960149734247, "grad_norm": 0.15627345442771912, "learning_rate": 3.950196562079091e-06, "loss": 0.001, "step": 72260 }, { "epoch": 1.2213650152522753, "grad_norm": 0.0558914989233017, "learning_rate": 3.948754652772587e-06, "loss": 0.0012, "step": 72270 }, { "epoch": 1.2215340155311256, "grad_norm": 0.1008479967713356, "learning_rate": 3.947312834929524e-06, "loss": 0.001, "step": 72280 }, { "epoch": 1.2217030158099762, "grad_norm": 0.04521102085709572, "learning_rate": 3.945871108675342e-06, "loss": 0.002, "step": 72290 }, { "epoch": 1.2218720160888266, "grad_norm": 0.09771153330802917, "learning_rate": 3.944429474135484e-06, "loss": 0.0013, "step": 72300 }, { "epoch": 1.222041016367677, "grad_norm": 0.03358219936490059, "learning_rate": 3.942987931435374e-06, "loss": 0.0017, "step": 72310 }, { "epoch": 1.2222100166465275, "grad_norm": 0.06623164564371109, "learning_rate": 3.9415464807004364e-06, "loss": 0.002, "step": 72320 }, { "epoch": 1.2223790169253779, "grad_norm": 0.01089906133711338, "learning_rate": 3.940105122056082e-06, "loss": 0.0009, "step": 72330 }, { "epoch": 1.2225480172042285, "grad_norm": 0.0035987806040793657, "learning_rate": 3.938663855627719e-06, "loss": 0.0006, "step": 72340 }, { "epoch": 1.2227170174830788, "grad_norm": 0.08150385320186615, "learning_rate": 3.937222681540741e-06, "loss": 0.0014, "step": 72350 }, { "epoch": 1.2228860177619294, "grad_norm": 0.10174530744552612, "learning_rate": 3.935781599920541e-06, "loss": 0.0007, "step": 72360 }, { "epoch": 1.2230550180407798, "grad_norm": 0.09403951466083527, "learning_rate": 3.934340610892497e-06, "loss": 0.0015, "step": 72370 }, { "epoch": 1.2232240183196303, "grad_norm": 0.018554072827100754, "learning_rate": 3.932899714581984e-06, "loss": 0.0009, "step": 72380 }, { "epoch": 1.2233930185984807, "grad_norm": 0.025134406983852386, "learning_rate": 3.931458911114364e-06, "loss": 0.0013, "step": 72390 }, { "epoch": 1.223562018877331, "grad_norm": 0.01571774110198021, "learning_rate": 3.930018200614998e-06, "loss": 0.001, "step": 72400 }, { "epoch": 1.2237310191561817, "grad_norm": 0.062236469238996506, "learning_rate": 3.928577583209231e-06, "loss": 0.0005, "step": 72410 }, { "epoch": 1.223900019435032, "grad_norm": 0.0811903327703476, "learning_rate": 3.927137059022407e-06, "loss": 0.0008, "step": 72420 }, { "epoch": 1.2240690197138826, "grad_norm": 0.05504392087459564, "learning_rate": 3.925696628179856e-06, "loss": 0.0014, "step": 72430 }, { "epoch": 1.224238019992733, "grad_norm": 0.08194303512573242, "learning_rate": 3.924256290806905e-06, "loss": 0.0016, "step": 72440 }, { "epoch": 1.2244070202715833, "grad_norm": 0.03738373890519142, "learning_rate": 3.922816047028866e-06, "loss": 0.0009, "step": 72450 }, { "epoch": 1.224576020550434, "grad_norm": 0.03901316598057747, "learning_rate": 3.921375896971053e-06, "loss": 0.005, "step": 72460 }, { "epoch": 1.2247450208292845, "grad_norm": 0.15741872787475586, "learning_rate": 3.919935840758761e-06, "loss": 0.0006, "step": 72470 }, { "epoch": 1.2249140211081349, "grad_norm": 0.025328345596790314, "learning_rate": 3.918495878517284e-06, "loss": 0.0011, "step": 72480 }, { "epoch": 1.2250830213869852, "grad_norm": 0.03609572350978851, "learning_rate": 3.917056010371906e-06, "loss": 0.0013, "step": 72490 }, { "epoch": 1.2252520216658358, "grad_norm": 0.04493815079331398, "learning_rate": 3.915616236447902e-06, "loss": 0.0007, "step": 72500 }, { "epoch": 1.2254210219446862, "grad_norm": 0.052706584334373474, "learning_rate": 3.91417655687054e-06, "loss": 0.0006, "step": 72510 }, { "epoch": 1.2255900222235367, "grad_norm": 0.02652081288397312, "learning_rate": 3.912736971765077e-06, "loss": 0.0003, "step": 72520 }, { "epoch": 1.225759022502387, "grad_norm": 0.14209231734275818, "learning_rate": 3.911297481256767e-06, "loss": 0.0015, "step": 72530 }, { "epoch": 1.2259280227812375, "grad_norm": 0.1366054266691208, "learning_rate": 3.909858085470849e-06, "loss": 0.0012, "step": 72540 }, { "epoch": 1.226097023060088, "grad_norm": 0.17087505757808685, "learning_rate": 3.908418784532562e-06, "loss": 0.0015, "step": 72550 }, { "epoch": 1.2262660233389384, "grad_norm": 0.041053686290979385, "learning_rate": 3.906979578567128e-06, "loss": 0.0014, "step": 72560 }, { "epoch": 1.226435023617789, "grad_norm": 0.034434977918863297, "learning_rate": 3.9055404676997674e-06, "loss": 0.0008, "step": 72570 }, { "epoch": 1.2266040238966394, "grad_norm": 0.06338217854499817, "learning_rate": 3.9041014520556875e-06, "loss": 0.001, "step": 72580 }, { "epoch": 1.22677302417549, "grad_norm": 0.11102625727653503, "learning_rate": 3.902662531760092e-06, "loss": 0.0014, "step": 72590 }, { "epoch": 1.2269420244543403, "grad_norm": 0.036118436604738235, "learning_rate": 3.901223706938172e-06, "loss": 0.0015, "step": 72600 }, { "epoch": 1.227111024733191, "grad_norm": 0.15463878214359283, "learning_rate": 3.8997849777151156e-06, "loss": 0.0013, "step": 72610 }, { "epoch": 1.2272800250120413, "grad_norm": 0.06801795214414597, "learning_rate": 3.898346344216094e-06, "loss": 0.0006, "step": 72620 }, { "epoch": 1.2274490252908916, "grad_norm": 0.029515203088521957, "learning_rate": 3.896907806566281e-06, "loss": 0.0012, "step": 72630 }, { "epoch": 1.2276180255697422, "grad_norm": 0.14273680746555328, "learning_rate": 3.89546936489083e-06, "loss": 0.0009, "step": 72640 }, { "epoch": 1.2277870258485926, "grad_norm": 0.07829339057207108, "learning_rate": 3.894031019314899e-06, "loss": 0.0007, "step": 72650 }, { "epoch": 1.2279560261274431, "grad_norm": 0.07639341056346893, "learning_rate": 3.8925927699636255e-06, "loss": 0.0015, "step": 72660 }, { "epoch": 1.2281250264062935, "grad_norm": 0.012791531160473824, "learning_rate": 3.891154616962148e-06, "loss": 0.001, "step": 72670 }, { "epoch": 1.228294026685144, "grad_norm": 0.03249354660511017, "learning_rate": 3.889716560435591e-06, "loss": 0.0008, "step": 72680 }, { "epoch": 1.2284630269639945, "grad_norm": 0.009360888972878456, "learning_rate": 3.888278600509072e-06, "loss": 0.0024, "step": 72690 }, { "epoch": 1.228632027242845, "grad_norm": 0.11578324437141418, "learning_rate": 3.886840737307701e-06, "loss": 0.0009, "step": 72700 }, { "epoch": 1.2288010275216954, "grad_norm": 0.014583390206098557, "learning_rate": 3.885402970956581e-06, "loss": 0.0005, "step": 72710 }, { "epoch": 1.2289700278005458, "grad_norm": 0.029843565076589584, "learning_rate": 3.883965301580801e-06, "loss": 0.0028, "step": 72720 }, { "epoch": 1.2291390280793963, "grad_norm": 0.028233207762241364, "learning_rate": 3.882527729305448e-06, "loss": 0.001, "step": 72730 }, { "epoch": 1.2293080283582467, "grad_norm": 0.12675841152668, "learning_rate": 3.881090254255596e-06, "loss": 0.0013, "step": 72740 }, { "epoch": 1.2294770286370973, "grad_norm": 0.01370705384761095, "learning_rate": 3.8796528765563135e-06, "loss": 0.0012, "step": 72750 }, { "epoch": 1.2296460289159477, "grad_norm": 0.01915006898343563, "learning_rate": 3.87821559633266e-06, "loss": 0.001, "step": 72760 }, { "epoch": 1.2298150291947982, "grad_norm": 0.046408962458372116, "learning_rate": 3.876778413709683e-06, "loss": 0.002, "step": 72770 }, { "epoch": 1.2299840294736486, "grad_norm": 0.030629215762019157, "learning_rate": 3.875341328812427e-06, "loss": 0.0009, "step": 72780 }, { "epoch": 1.2301530297524992, "grad_norm": 0.023360850289463997, "learning_rate": 3.873904341765925e-06, "loss": 0.0025, "step": 72790 }, { "epoch": 1.2303220300313495, "grad_norm": 0.008983196690678596, "learning_rate": 3.872467452695201e-06, "loss": 0.0009, "step": 72800 }, { "epoch": 1.2304910303102, "grad_norm": 0.03672792389988899, "learning_rate": 3.871030661725271e-06, "loss": 0.0014, "step": 72810 }, { "epoch": 1.2306600305890505, "grad_norm": 0.06108082830905914, "learning_rate": 3.869593968981145e-06, "loss": 0.0012, "step": 72820 }, { "epoch": 1.2308290308679009, "grad_norm": 0.15927709639072418, "learning_rate": 3.868157374587819e-06, "loss": 0.0015, "step": 72830 }, { "epoch": 1.2309980311467514, "grad_norm": 0.05433737114071846, "learning_rate": 3.866720878670287e-06, "loss": 0.0009, "step": 72840 }, { "epoch": 1.2311670314256018, "grad_norm": 0.01710580289363861, "learning_rate": 3.865284481353527e-06, "loss": 0.0012, "step": 72850 }, { "epoch": 1.2313360317044524, "grad_norm": 0.08657790720462799, "learning_rate": 3.8638481827625175e-06, "loss": 0.001, "step": 72860 }, { "epoch": 1.2315050319833027, "grad_norm": 0.042510438710451126, "learning_rate": 3.862411983022219e-06, "loss": 0.0012, "step": 72870 }, { "epoch": 1.2316740322621533, "grad_norm": 0.06462328881025314, "learning_rate": 3.860975882257591e-06, "loss": 0.0012, "step": 72880 }, { "epoch": 1.2318430325410037, "grad_norm": 0.022037969902157784, "learning_rate": 3.859539880593578e-06, "loss": 0.0007, "step": 72890 }, { "epoch": 1.232012032819854, "grad_norm": 0.04847249761223793, "learning_rate": 3.858103978155124e-06, "loss": 0.0017, "step": 72900 }, { "epoch": 1.2321810330987046, "grad_norm": 0.0007479141931980848, "learning_rate": 3.856668175067154e-06, "loss": 0.0011, "step": 72910 }, { "epoch": 1.232350033377555, "grad_norm": 0.0014906972646713257, "learning_rate": 3.855232471454594e-06, "loss": 0.0012, "step": 72920 }, { "epoch": 1.2325190336564056, "grad_norm": 0.0506197065114975, "learning_rate": 3.8537968674423545e-06, "loss": 0.001, "step": 72930 }, { "epoch": 1.232688033935256, "grad_norm": 0.03149702027440071, "learning_rate": 3.852361363155342e-06, "loss": 0.0006, "step": 72940 }, { "epoch": 1.2328570342141065, "grad_norm": 0.002010585507377982, "learning_rate": 3.85092595871845e-06, "loss": 0.0011, "step": 72950 }, { "epoch": 1.233026034492957, "grad_norm": 0.35226213932037354, "learning_rate": 3.8494906542565675e-06, "loss": 0.002, "step": 72960 }, { "epoch": 1.2331950347718075, "grad_norm": 0.13743427395820618, "learning_rate": 3.848055449894573e-06, "loss": 0.0006, "step": 72970 }, { "epoch": 1.2333640350506578, "grad_norm": 0.01349994819611311, "learning_rate": 3.846620345757335e-06, "loss": 0.002, "step": 72980 }, { "epoch": 1.2335330353295082, "grad_norm": 0.14405393600463867, "learning_rate": 3.845185341969715e-06, "loss": 0.0017, "step": 72990 }, { "epoch": 1.2337020356083588, "grad_norm": 0.12188602983951569, "learning_rate": 3.8437504386565636e-06, "loss": 0.0015, "step": 73000 }, { "epoch": 1.2338710358872091, "grad_norm": 0.04898308590054512, "learning_rate": 3.84231563594273e-06, "loss": 0.0008, "step": 73010 }, { "epoch": 1.2340400361660597, "grad_norm": 0.017351139336824417, "learning_rate": 3.840880933953043e-06, "loss": 0.001, "step": 73020 }, { "epoch": 1.23420903644491, "grad_norm": 0.03371371328830719, "learning_rate": 3.8394463328123325e-06, "loss": 0.0009, "step": 73030 }, { "epoch": 1.2343780367237607, "grad_norm": 0.09450079500675201, "learning_rate": 3.838011832645412e-06, "loss": 0.001, "step": 73040 }, { "epoch": 1.234547037002611, "grad_norm": 0.1007283478975296, "learning_rate": 3.836577433577094e-06, "loss": 0.0009, "step": 73050 }, { "epoch": 1.2347160372814616, "grad_norm": 0.03049352392554283, "learning_rate": 3.835143135732175e-06, "loss": 0.001, "step": 73060 }, { "epoch": 1.234885037560312, "grad_norm": 0.06046221777796745, "learning_rate": 3.833708939235446e-06, "loss": 0.0013, "step": 73070 }, { "epoch": 1.2350540378391623, "grad_norm": 0.03119039349257946, "learning_rate": 3.832274844211692e-06, "loss": 0.0011, "step": 73080 }, { "epoch": 1.235223038118013, "grad_norm": 0.05875023454427719, "learning_rate": 3.8308408507856844e-06, "loss": 0.0024, "step": 73090 }, { "epoch": 1.2353920383968633, "grad_norm": 0.053951092064380646, "learning_rate": 3.8294069590821856e-06, "loss": 0.0005, "step": 73100 }, { "epoch": 1.2355610386757139, "grad_norm": 0.09349772334098816, "learning_rate": 3.8279731692259545e-06, "loss": 0.0008, "step": 73110 }, { "epoch": 1.2357300389545642, "grad_norm": 0.01611015386879444, "learning_rate": 3.8265394813417355e-06, "loss": 0.001, "step": 73120 }, { "epoch": 1.2358990392334148, "grad_norm": 0.012593476101756096, "learning_rate": 3.825105895554269e-06, "loss": 0.0007, "step": 73130 }, { "epoch": 1.2360680395122652, "grad_norm": 0.1291211098432541, "learning_rate": 3.823672411988279e-06, "loss": 0.0011, "step": 73140 }, { "epoch": 1.2362370397911158, "grad_norm": 0.0016636535292491317, "learning_rate": 3.822239030768492e-06, "loss": 0.0012, "step": 73150 }, { "epoch": 1.2364060400699661, "grad_norm": 0.044153131544589996, "learning_rate": 3.820805752019613e-06, "loss": 0.0007, "step": 73160 }, { "epoch": 1.2365750403488165, "grad_norm": 0.04015633836388588, "learning_rate": 3.819372575866348e-06, "loss": 0.0012, "step": 73170 }, { "epoch": 1.236744040627667, "grad_norm": 0.0036272837314754725, "learning_rate": 3.8179395024333885e-06, "loss": 0.0009, "step": 73180 }, { "epoch": 1.2369130409065174, "grad_norm": 0.15639354288578033, "learning_rate": 3.816506531845421e-06, "loss": 0.0025, "step": 73190 }, { "epoch": 1.237082041185368, "grad_norm": 0.08149423450231552, "learning_rate": 3.815073664227118e-06, "loss": 0.0009, "step": 73200 }, { "epoch": 1.2372510414642184, "grad_norm": 0.15775452554225922, "learning_rate": 3.8136408997031483e-06, "loss": 0.0019, "step": 73210 }, { "epoch": 1.237420041743069, "grad_norm": 0.36730340123176575, "learning_rate": 3.8122082383981666e-06, "loss": 0.0021, "step": 73220 }, { "epoch": 1.2375890420219193, "grad_norm": 0.06837258487939835, "learning_rate": 3.8107756804368258e-06, "loss": 0.0013, "step": 73230 }, { "epoch": 1.23775804230077, "grad_norm": 0.030986608937382698, "learning_rate": 3.80934322594376e-06, "loss": 0.0008, "step": 73240 }, { "epoch": 1.2379270425796203, "grad_norm": 0.0400238111615181, "learning_rate": 3.8079108750436033e-06, "loss": 0.0009, "step": 73250 }, { "epoch": 1.2380960428584706, "grad_norm": 0.030002465471625328, "learning_rate": 3.8064786278609768e-06, "loss": 0.001, "step": 73260 }, { "epoch": 1.2382650431373212, "grad_norm": 0.1184472143650055, "learning_rate": 3.805046484520492e-06, "loss": 0.0008, "step": 73270 }, { "epoch": 1.2384340434161716, "grad_norm": 0.03609028086066246, "learning_rate": 3.803614445146753e-06, "loss": 0.0007, "step": 73280 }, { "epoch": 1.2386030436950222, "grad_norm": 0.02869199775159359, "learning_rate": 3.8021825098643533e-06, "loss": 0.0009, "step": 73290 }, { "epoch": 1.2387720439738725, "grad_norm": 0.0353069044649601, "learning_rate": 3.800750678797881e-06, "loss": 0.0004, "step": 73300 }, { "epoch": 1.2389410442527231, "grad_norm": 0.10248451679944992, "learning_rate": 3.799318952071907e-06, "loss": 0.0011, "step": 73310 }, { "epoch": 1.2391100445315735, "grad_norm": 0.010547821410000324, "learning_rate": 3.7978873298110046e-06, "loss": 0.0007, "step": 73320 }, { "epoch": 1.239279044810424, "grad_norm": 0.014905786141753197, "learning_rate": 3.7964558121397268e-06, "loss": 0.0016, "step": 73330 }, { "epoch": 1.2394480450892744, "grad_norm": 0.026339823380112648, "learning_rate": 3.7950243991826264e-06, "loss": 0.0013, "step": 73340 }, { "epoch": 1.2396170453681248, "grad_norm": 0.06282757967710495, "learning_rate": 3.7935930910642394e-06, "loss": 0.0013, "step": 73350 }, { "epoch": 1.2397860456469754, "grad_norm": 0.01360777486115694, "learning_rate": 3.7921618879090995e-06, "loss": 0.0003, "step": 73360 }, { "epoch": 1.2399550459258257, "grad_norm": 0.0012074375990778208, "learning_rate": 3.790730789841727e-06, "loss": 0.0006, "step": 73370 }, { "epoch": 1.2401240462046763, "grad_norm": 0.0037419984582811594, "learning_rate": 3.7892997969866362e-06, "loss": 0.0011, "step": 73380 }, { "epoch": 1.2402930464835267, "grad_norm": 0.015408365055918694, "learning_rate": 3.7878689094683274e-06, "loss": 0.0006, "step": 73390 }, { "epoch": 1.240462046762377, "grad_norm": 0.02896169200539589, "learning_rate": 3.7864381274112982e-06, "loss": 0.0011, "step": 73400 }, { "epoch": 1.2406310470412276, "grad_norm": 0.16518239676952362, "learning_rate": 3.7850074509400303e-06, "loss": 0.0011, "step": 73410 }, { "epoch": 1.240800047320078, "grad_norm": 0.11798775941133499, "learning_rate": 3.783576880179003e-06, "loss": 0.001, "step": 73420 }, { "epoch": 1.2409690475989286, "grad_norm": 0.04104173928499222, "learning_rate": 3.7821464152526776e-06, "loss": 0.0014, "step": 73430 }, { "epoch": 1.241138047877779, "grad_norm": 0.05344299599528313, "learning_rate": 3.7807160562855173e-06, "loss": 0.0018, "step": 73440 }, { "epoch": 1.2413070481566295, "grad_norm": 0.09682608395814896, "learning_rate": 3.7792858034019665e-06, "loss": 0.0011, "step": 73450 }, { "epoch": 1.2414760484354799, "grad_norm": 0.002755597699433565, "learning_rate": 3.777855656726466e-06, "loss": 0.0005, "step": 73460 }, { "epoch": 1.2416450487143305, "grad_norm": 0.07560652494430542, "learning_rate": 3.7764256163834435e-06, "loss": 0.0009, "step": 73470 }, { "epoch": 1.2418140489931808, "grad_norm": 0.011091392487287521, "learning_rate": 3.7749956824973223e-06, "loss": 0.0016, "step": 73480 }, { "epoch": 1.2419830492720312, "grad_norm": 0.02337898127734661, "learning_rate": 3.7735658551925103e-06, "loss": 0.0015, "step": 73490 }, { "epoch": 1.2421520495508818, "grad_norm": 0.029667360708117485, "learning_rate": 3.7721361345934138e-06, "loss": 0.0004, "step": 73500 }, { "epoch": 1.2423210498297321, "grad_norm": 0.064625084400177, "learning_rate": 3.77070652082442e-06, "loss": 0.0015, "step": 73510 }, { "epoch": 1.2424900501085827, "grad_norm": 0.010021107271313667, "learning_rate": 3.769277014009915e-06, "loss": 0.0012, "step": 73520 }, { "epoch": 1.242659050387433, "grad_norm": 0.1499299556016922, "learning_rate": 3.7678476142742746e-06, "loss": 0.0026, "step": 73530 }, { "epoch": 1.2428280506662837, "grad_norm": 0.19507773220539093, "learning_rate": 3.7664183217418597e-06, "loss": 0.001, "step": 73540 }, { "epoch": 1.242997050945134, "grad_norm": 0.015503432601690292, "learning_rate": 3.7649891365370294e-06, "loss": 0.001, "step": 73550 }, { "epoch": 1.2431660512239846, "grad_norm": 0.03430059552192688, "learning_rate": 3.763560058784127e-06, "loss": 0.0005, "step": 73560 }, { "epoch": 1.243335051502835, "grad_norm": 0.07987324893474579, "learning_rate": 3.7621310886074912e-06, "loss": 0.0012, "step": 73570 }, { "epoch": 1.2435040517816853, "grad_norm": 0.16322799026966095, "learning_rate": 3.7607022261314465e-06, "loss": 0.0015, "step": 73580 }, { "epoch": 1.243673052060536, "grad_norm": 0.09445358067750931, "learning_rate": 3.759273471480315e-06, "loss": 0.0017, "step": 73590 }, { "epoch": 1.2438420523393863, "grad_norm": 0.03201274573802948, "learning_rate": 3.7578448247784006e-06, "loss": 0.0011, "step": 73600 }, { "epoch": 1.2440110526182369, "grad_norm": 0.059998683631420135, "learning_rate": 3.7564162861500076e-06, "loss": 0.0007, "step": 73610 }, { "epoch": 1.2441800528970872, "grad_norm": 0.0846458300948143, "learning_rate": 3.7549878557194204e-06, "loss": 0.001, "step": 73620 }, { "epoch": 1.2443490531759378, "grad_norm": 0.007185360882431269, "learning_rate": 3.753559533610924e-06, "loss": 0.0009, "step": 73630 }, { "epoch": 1.2445180534547882, "grad_norm": 0.043200962245464325, "learning_rate": 3.752131319948785e-06, "loss": 0.0006, "step": 73640 }, { "epoch": 1.2446870537336387, "grad_norm": 0.026572002097964287, "learning_rate": 3.7507032148572684e-06, "loss": 0.0003, "step": 73650 }, { "epoch": 1.244856054012489, "grad_norm": 0.04520958662033081, "learning_rate": 3.7492752184606253e-06, "loss": 0.0003, "step": 73660 }, { "epoch": 1.2450250542913395, "grad_norm": 0.025674309581518173, "learning_rate": 3.7478473308830986e-06, "loss": 0.0006, "step": 73670 }, { "epoch": 1.24519405457019, "grad_norm": 0.020547570660710335, "learning_rate": 3.7464195522489193e-06, "loss": 0.0024, "step": 73680 }, { "epoch": 1.2453630548490404, "grad_norm": 0.01594940386712551, "learning_rate": 3.7449918826823152e-06, "loss": 0.0013, "step": 73690 }, { "epoch": 1.245532055127891, "grad_norm": 0.005270745139569044, "learning_rate": 3.7435643223074954e-06, "loss": 0.0014, "step": 73700 }, { "epoch": 1.2457010554067414, "grad_norm": 0.10284716635942459, "learning_rate": 3.7421368712486696e-06, "loss": 0.0007, "step": 73710 }, { "epoch": 1.245870055685592, "grad_norm": 0.010431215167045593, "learning_rate": 3.740709529630028e-06, "loss": 0.0009, "step": 73720 }, { "epoch": 1.2460390559644423, "grad_norm": 0.045114561915397644, "learning_rate": 3.7392822975757596e-06, "loss": 0.0016, "step": 73730 }, { "epoch": 1.246208056243293, "grad_norm": 0.06875897198915482, "learning_rate": 3.73785517521004e-06, "loss": 0.0014, "step": 73740 }, { "epoch": 1.2463770565221433, "grad_norm": 0.008149543777108192, "learning_rate": 3.736428162657035e-06, "loss": 0.0012, "step": 73750 }, { "epoch": 1.2465460568009936, "grad_norm": 0.04132377728819847, "learning_rate": 3.735001260040902e-06, "loss": 0.0006, "step": 73760 }, { "epoch": 1.2467150570798442, "grad_norm": 0.048792049288749695, "learning_rate": 3.7335744674857877e-06, "loss": 0.0007, "step": 73770 }, { "epoch": 1.2468840573586946, "grad_norm": 0.02528366446495056, "learning_rate": 3.732147785115833e-06, "loss": 0.0012, "step": 73780 }, { "epoch": 1.2470530576375451, "grad_norm": 0.08471834659576416, "learning_rate": 3.7307212130551612e-06, "loss": 0.0007, "step": 73790 }, { "epoch": 1.2472220579163955, "grad_norm": 0.00016127926937770098, "learning_rate": 3.7292947514278966e-06, "loss": 0.0006, "step": 73800 }, { "epoch": 1.247391058195246, "grad_norm": 0.0032390491105616093, "learning_rate": 3.727868400358142e-06, "loss": 0.0004, "step": 73810 }, { "epoch": 1.2475600584740965, "grad_norm": 0.034975565969944, "learning_rate": 3.726442159970004e-06, "loss": 0.0012, "step": 73820 }, { "epoch": 1.247729058752947, "grad_norm": 0.07279594242572784, "learning_rate": 3.725016030387565e-06, "loss": 0.001, "step": 73830 }, { "epoch": 1.2478980590317974, "grad_norm": 0.04227467626333237, "learning_rate": 3.723590011734911e-06, "loss": 0.0011, "step": 73840 }, { "epoch": 1.2480670593106478, "grad_norm": 0.10138219594955444, "learning_rate": 3.7221641041361084e-06, "loss": 0.0013, "step": 73850 }, { "epoch": 1.2482360595894983, "grad_norm": 0.14254963397979736, "learning_rate": 3.7207383077152227e-06, "loss": 0.0011, "step": 73860 }, { "epoch": 1.2484050598683487, "grad_norm": 0.04934743046760559, "learning_rate": 3.7193126225962993e-06, "loss": 0.0009, "step": 73870 }, { "epoch": 1.2485740601471993, "grad_norm": 0.11442594975233078, "learning_rate": 3.7178870489033857e-06, "loss": 0.0011, "step": 73880 }, { "epoch": 1.2487430604260497, "grad_norm": 0.0025327573530375957, "learning_rate": 3.716461586760509e-06, "loss": 0.0004, "step": 73890 }, { "epoch": 1.2489120607049002, "grad_norm": 0.07135838270187378, "learning_rate": 3.7150362362916946e-06, "loss": 0.0014, "step": 73900 }, { "epoch": 1.2490810609837506, "grad_norm": 0.1056516021490097, "learning_rate": 3.713610997620952e-06, "loss": 0.0009, "step": 73910 }, { "epoch": 1.2492500612626012, "grad_norm": 0.026004845276474953, "learning_rate": 3.712185870872286e-06, "loss": 0.001, "step": 73920 }, { "epoch": 1.2494190615414515, "grad_norm": 0.00045875273644924164, "learning_rate": 3.710760856169689e-06, "loss": 0.0003, "step": 73930 }, { "epoch": 1.249588061820302, "grad_norm": 0.055750828236341476, "learning_rate": 3.7093359536371438e-06, "loss": 0.0009, "step": 73940 }, { "epoch": 1.2497570620991525, "grad_norm": 0.019487323239445686, "learning_rate": 3.707911163398623e-06, "loss": 0.0009, "step": 73950 }, { "epoch": 1.2499260623780029, "grad_norm": 0.05430120974779129, "learning_rate": 3.7064864855780936e-06, "loss": 0.001, "step": 73960 }, { "epoch": 1.2500950626568534, "grad_norm": 0.08097860962152481, "learning_rate": 3.7050619202995048e-06, "loss": 0.0009, "step": 73970 }, { "epoch": 1.2502640629357038, "grad_norm": 0.07676412165164948, "learning_rate": 3.7036374676868053e-06, "loss": 0.0011, "step": 73980 }, { "epoch": 1.2504330632145544, "grad_norm": 0.04987164959311485, "learning_rate": 3.702213127863925e-06, "loss": 0.001, "step": 73990 }, { "epoch": 1.2506020634934047, "grad_norm": 0.1494833081960678, "learning_rate": 3.7007889009547927e-06, "loss": 0.0007, "step": 74000 }, { "epoch": 1.2507710637722553, "grad_norm": 0.04136582463979721, "learning_rate": 3.6993647870833184e-06, "loss": 0.0005, "step": 74010 }, { "epoch": 1.2509400640511057, "grad_norm": 0.03510259464383125, "learning_rate": 3.6979407863734095e-06, "loss": 0.0004, "step": 74020 }, { "epoch": 1.251109064329956, "grad_norm": 0.09876205772161484, "learning_rate": 3.696516898948962e-06, "loss": 0.001, "step": 74030 }, { "epoch": 1.2512780646088066, "grad_norm": 0.10713042318820953, "learning_rate": 3.6950931249338594e-06, "loss": 0.0013, "step": 74040 }, { "epoch": 1.251447064887657, "grad_norm": 0.02865634858608246, "learning_rate": 3.6936694644519777e-06, "loss": 0.0004, "step": 74050 }, { "epoch": 1.2516160651665076, "grad_norm": 0.005249988753348589, "learning_rate": 3.6922459176271813e-06, "loss": 0.0023, "step": 74060 }, { "epoch": 1.251785065445358, "grad_norm": 0.07062037289142609, "learning_rate": 3.690822484583328e-06, "loss": 0.0006, "step": 74070 }, { "epoch": 1.2519540657242083, "grad_norm": 0.056166257709264755, "learning_rate": 3.6893991654442595e-06, "loss": 0.0007, "step": 74080 }, { "epoch": 1.252123066003059, "grad_norm": 0.11034413427114487, "learning_rate": 3.6879759603338154e-06, "loss": 0.0009, "step": 74090 }, { "epoch": 1.2522920662819095, "grad_norm": 0.027039768174290657, "learning_rate": 3.686552869375818e-06, "loss": 0.001, "step": 74100 }, { "epoch": 1.2524610665607598, "grad_norm": 0.03380032628774643, "learning_rate": 3.685129892694087e-06, "loss": 0.0013, "step": 74110 }, { "epoch": 1.2526300668396102, "grad_norm": 0.02615695632994175, "learning_rate": 3.6837070304124236e-06, "loss": 0.0013, "step": 74120 }, { "epoch": 1.2527990671184608, "grad_norm": 0.023271802812814713, "learning_rate": 3.6822842826546267e-06, "loss": 0.0012, "step": 74130 }, { "epoch": 1.2529680673973111, "grad_norm": 0.06563130021095276, "learning_rate": 3.6808616495444816e-06, "loss": 0.0007, "step": 74140 }, { "epoch": 1.2531370676761617, "grad_norm": 0.05916164442896843, "learning_rate": 3.6794391312057664e-06, "loss": 0.0007, "step": 74150 }, { "epoch": 1.253306067955012, "grad_norm": 0.006687621120363474, "learning_rate": 3.6780167277622424e-06, "loss": 0.0023, "step": 74160 }, { "epoch": 1.2534750682338625, "grad_norm": 0.027524176985025406, "learning_rate": 3.676594439337671e-06, "loss": 0.0009, "step": 74170 }, { "epoch": 1.253644068512713, "grad_norm": 0.055483873933553696, "learning_rate": 3.675172266055792e-06, "loss": 0.0012, "step": 74180 }, { "epoch": 1.2538130687915636, "grad_norm": 0.07914508134126663, "learning_rate": 3.673750208040348e-06, "loss": 0.0013, "step": 74190 }, { "epoch": 1.253982069070414, "grad_norm": 0.02009778842329979, "learning_rate": 3.672328265415059e-06, "loss": 0.0004, "step": 74200 }, { "epoch": 1.2541510693492643, "grad_norm": 0.011674889363348484, "learning_rate": 3.6709064383036454e-06, "loss": 0.0006, "step": 74210 }, { "epoch": 1.254320069628115, "grad_norm": 0.008638451807200909, "learning_rate": 3.6694847268298107e-06, "loss": 0.001, "step": 74220 }, { "epoch": 1.2544890699069653, "grad_norm": 0.08116303384304047, "learning_rate": 3.668063131117252e-06, "loss": 0.0009, "step": 74230 }, { "epoch": 1.2546580701858159, "grad_norm": 0.014926801435649395, "learning_rate": 3.6666416512896524e-06, "loss": 0.001, "step": 74240 }, { "epoch": 1.2548270704646662, "grad_norm": 0.03558624908328056, "learning_rate": 3.665220287470692e-06, "loss": 0.0012, "step": 74250 }, { "epoch": 1.2549960707435166, "grad_norm": 0.13499194383621216, "learning_rate": 3.663799039784032e-06, "loss": 0.0014, "step": 74260 }, { "epoch": 1.2551650710223672, "grad_norm": 0.04808972403407097, "learning_rate": 3.6623779083533286e-06, "loss": 0.0011, "step": 74270 }, { "epoch": 1.2553340713012178, "grad_norm": 0.04176962003111839, "learning_rate": 3.6609568933022307e-06, "loss": 0.0005, "step": 74280 }, { "epoch": 1.2555030715800681, "grad_norm": 0.07730311900377274, "learning_rate": 3.6595359947543686e-06, "loss": 0.0007, "step": 74290 }, { "epoch": 1.2556720718589185, "grad_norm": 0.07153651118278503, "learning_rate": 3.658115212833372e-06, "loss": 0.0012, "step": 74300 }, { "epoch": 1.255841072137769, "grad_norm": 0.041518520563840866, "learning_rate": 3.6566945476628514e-06, "loss": 0.0013, "step": 74310 }, { "epoch": 1.2560100724166194, "grad_norm": 0.037388209253549576, "learning_rate": 3.6552739993664144e-06, "loss": 0.0014, "step": 74320 }, { "epoch": 1.25617907269547, "grad_norm": 0.0725250393152237, "learning_rate": 3.6538535680676534e-06, "loss": 0.001, "step": 74330 }, { "epoch": 1.2563480729743204, "grad_norm": 0.006527617108076811, "learning_rate": 3.6524332538901573e-06, "loss": 0.001, "step": 74340 }, { "epoch": 1.2565170732531707, "grad_norm": 0.0347895473241806, "learning_rate": 3.6510130569574943e-06, "loss": 0.0007, "step": 74350 }, { "epoch": 1.2566860735320213, "grad_norm": 0.06007776036858559, "learning_rate": 3.6495929773932338e-06, "loss": 0.0011, "step": 74360 }, { "epoch": 1.256855073810872, "grad_norm": 0.09146324545145035, "learning_rate": 3.648173015320925e-06, "loss": 0.0005, "step": 74370 }, { "epoch": 1.2570240740897223, "grad_norm": 0.043120142072439194, "learning_rate": 3.6467531708641156e-06, "loss": 0.0005, "step": 74380 }, { "epoch": 1.2571930743685726, "grad_norm": 0.05117538943886757, "learning_rate": 3.645333444146335e-06, "loss": 0.0011, "step": 74390 }, { "epoch": 1.2573620746474232, "grad_norm": 0.09959695488214493, "learning_rate": 3.64391383529111e-06, "loss": 0.0009, "step": 74400 }, { "epoch": 1.2575310749262736, "grad_norm": 0.05299863964319229, "learning_rate": 3.642494344421951e-06, "loss": 0.001, "step": 74410 }, { "epoch": 1.2577000752051242, "grad_norm": 0.044101472944021225, "learning_rate": 3.6410749716623604e-06, "loss": 0.0006, "step": 74420 }, { "epoch": 1.2578690754839745, "grad_norm": 0.05830482393503189, "learning_rate": 3.6396557171358317e-06, "loss": 0.0005, "step": 74430 }, { "epoch": 1.258038075762825, "grad_norm": 0.014905164949595928, "learning_rate": 3.6382365809658483e-06, "loss": 0.0005, "step": 74440 }, { "epoch": 1.2582070760416755, "grad_norm": 0.009240568615496159, "learning_rate": 3.6368175632758786e-06, "loss": 0.0019, "step": 74450 }, { "epoch": 1.258376076320526, "grad_norm": 0.0460660383105278, "learning_rate": 3.6353986641893866e-06, "loss": 0.0009, "step": 74460 }, { "epoch": 1.2585450765993764, "grad_norm": 0.07785634696483612, "learning_rate": 3.633979883829821e-06, "loss": 0.0026, "step": 74470 }, { "epoch": 1.2587140768782268, "grad_norm": 0.00043464999180287123, "learning_rate": 3.6325612223206264e-06, "loss": 0.001, "step": 74480 }, { "epoch": 1.2588830771570774, "grad_norm": 0.024840181693434715, "learning_rate": 3.631142679785229e-06, "loss": 0.0001, "step": 74490 }, { "epoch": 1.2590520774359277, "grad_norm": 0.07785341143608093, "learning_rate": 3.6297242563470515e-06, "loss": 0.0005, "step": 74500 }, { "epoch": 1.2592210777147783, "grad_norm": 0.07119804620742798, "learning_rate": 3.6283059521295016e-06, "loss": 0.001, "step": 74510 }, { "epoch": 1.2593900779936287, "grad_norm": 0.0005774807068519294, "learning_rate": 3.6268877672559816e-06, "loss": 0.0007, "step": 74520 }, { "epoch": 1.259559078272479, "grad_norm": 0.049248334020376205, "learning_rate": 3.6254697018498777e-06, "loss": 0.0017, "step": 74530 }, { "epoch": 1.2597280785513296, "grad_norm": 0.02326933853328228, "learning_rate": 3.624051756034568e-06, "loss": 0.0005, "step": 74540 }, { "epoch": 1.2598970788301802, "grad_norm": 0.05022501200437546, "learning_rate": 3.6226339299334256e-06, "loss": 0.0008, "step": 74550 }, { "epoch": 1.2600660791090306, "grad_norm": 0.02887713722884655, "learning_rate": 3.6212162236698017e-06, "loss": 0.0014, "step": 74560 }, { "epoch": 1.260235079387881, "grad_norm": 0.020068364217877388, "learning_rate": 3.619798637367049e-06, "loss": 0.0009, "step": 74570 }, { "epoch": 1.2604040796667315, "grad_norm": 0.04366261512041092, "learning_rate": 3.6183811711485005e-06, "loss": 0.0005, "step": 74580 }, { "epoch": 1.2605730799455819, "grad_norm": 0.054522644728422165, "learning_rate": 3.616963825137486e-06, "loss": 0.0009, "step": 74590 }, { "epoch": 1.2607420802244325, "grad_norm": 0.006772140506654978, "learning_rate": 3.615546599457318e-06, "loss": 0.0004, "step": 74600 }, { "epoch": 1.2609110805032828, "grad_norm": 0.07526401430368423, "learning_rate": 3.6141294942313043e-06, "loss": 0.0013, "step": 74610 }, { "epoch": 1.2610800807821332, "grad_norm": 0.002872828394174576, "learning_rate": 3.612712509582738e-06, "loss": 0.0006, "step": 74620 }, { "epoch": 1.2612490810609838, "grad_norm": 0.026724405586719513, "learning_rate": 3.6112956456349073e-06, "loss": 0.0008, "step": 74630 }, { "epoch": 1.2614180813398341, "grad_norm": 0.019419973716139793, "learning_rate": 3.609878902511082e-06, "loss": 0.0009, "step": 74640 }, { "epoch": 1.2615870816186847, "grad_norm": 0.012238175608217716, "learning_rate": 3.608462280334529e-06, "loss": 0.0018, "step": 74650 }, { "epoch": 1.261756081897535, "grad_norm": 0.1558408886194229, "learning_rate": 3.6070457792284987e-06, "loss": 0.0008, "step": 74660 }, { "epoch": 1.2619250821763857, "grad_norm": 0.06568319350481033, "learning_rate": 3.6056293993162362e-06, "loss": 0.0008, "step": 74670 }, { "epoch": 1.262094082455236, "grad_norm": 0.025579141452908516, "learning_rate": 3.604213140720969e-06, "loss": 0.0006, "step": 74680 }, { "epoch": 1.2622630827340866, "grad_norm": 0.09482182562351227, "learning_rate": 3.6027970035659233e-06, "loss": 0.0013, "step": 74690 }, { "epoch": 1.262432083012937, "grad_norm": 0.0515873022377491, "learning_rate": 3.6013809879743074e-06, "loss": 0.0008, "step": 74700 }, { "epoch": 1.2626010832917873, "grad_norm": 0.03804394602775574, "learning_rate": 3.599965094069322e-06, "loss": 0.0011, "step": 74710 }, { "epoch": 1.262770083570638, "grad_norm": 0.05061568692326546, "learning_rate": 3.598549321974156e-06, "loss": 0.0013, "step": 74720 }, { "epoch": 1.2629390838494883, "grad_norm": 0.06825969368219376, "learning_rate": 3.597133671811991e-06, "loss": 0.0015, "step": 74730 }, { "epoch": 1.2631080841283389, "grad_norm": 0.009210345335304737, "learning_rate": 3.595718143705992e-06, "loss": 0.0006, "step": 74740 }, { "epoch": 1.2632770844071892, "grad_norm": 0.08739422261714935, "learning_rate": 3.59430273777932e-06, "loss": 0.0008, "step": 74750 }, { "epoch": 1.2634460846860398, "grad_norm": 0.011182409711182117, "learning_rate": 3.5928874541551184e-06, "loss": 0.0009, "step": 74760 }, { "epoch": 1.2636150849648902, "grad_norm": 0.0437384769320488, "learning_rate": 3.591472292956528e-06, "loss": 0.0014, "step": 74770 }, { "epoch": 1.2637840852437408, "grad_norm": 0.08981582522392273, "learning_rate": 3.5900572543066707e-06, "loss": 0.0009, "step": 74780 }, { "epoch": 1.2639530855225911, "grad_norm": 0.02562858909368515, "learning_rate": 3.588642338328664e-06, "loss": 0.0007, "step": 74790 }, { "epoch": 1.2641220858014415, "grad_norm": 0.008729532361030579, "learning_rate": 3.587227545145612e-06, "loss": 0.0008, "step": 74800 }, { "epoch": 1.264291086080292, "grad_norm": 0.03705538436770439, "learning_rate": 3.5858128748806097e-06, "loss": 0.0009, "step": 74810 }, { "epoch": 1.2644600863591424, "grad_norm": 0.017782073467969894, "learning_rate": 3.5843983276567384e-06, "loss": 0.0007, "step": 74820 }, { "epoch": 1.264629086637993, "grad_norm": 0.011385198682546616, "learning_rate": 3.582983903597071e-06, "loss": 0.0006, "step": 74830 }, { "epoch": 1.2647980869168434, "grad_norm": 0.05284975469112396, "learning_rate": 3.5815696028246715e-06, "loss": 0.0005, "step": 74840 }, { "epoch": 1.264967087195694, "grad_norm": 0.018762778490781784, "learning_rate": 3.580155425462587e-06, "loss": 0.0009, "step": 74850 }, { "epoch": 1.2651360874745443, "grad_norm": 0.09084206819534302, "learning_rate": 3.5787413716338614e-06, "loss": 0.0016, "step": 74860 }, { "epoch": 1.265305087753395, "grad_norm": 0.019888978451490402, "learning_rate": 3.5773274414615207e-06, "loss": 0.0008, "step": 74870 }, { "epoch": 1.2654740880322453, "grad_norm": 0.036904774606227875, "learning_rate": 3.5759136350685876e-06, "loss": 0.0015, "step": 74880 }, { "epoch": 1.2656430883110956, "grad_norm": 0.10370167344808578, "learning_rate": 3.5744999525780666e-06, "loss": 0.0013, "step": 74890 }, { "epoch": 1.2658120885899462, "grad_norm": 0.09771838784217834, "learning_rate": 3.5730863941129566e-06, "loss": 0.0016, "step": 74900 }, { "epoch": 1.2659810888687966, "grad_norm": 0.032277822494506836, "learning_rate": 3.5716729597962435e-06, "loss": 0.001, "step": 74910 }, { "epoch": 1.2661500891476472, "grad_norm": 0.03454013541340828, "learning_rate": 3.5702596497509053e-06, "loss": 0.0018, "step": 74920 }, { "epoch": 1.2663190894264975, "grad_norm": 0.0273564625531435, "learning_rate": 3.568846464099902e-06, "loss": 0.0008, "step": 74930 }, { "epoch": 1.266488089705348, "grad_norm": 0.04307074472308159, "learning_rate": 3.5674334029661937e-06, "loss": 0.0012, "step": 74940 }, { "epoch": 1.2666570899841985, "grad_norm": 0.05055421218276024, "learning_rate": 3.566020466472717e-06, "loss": 0.0008, "step": 74950 }, { "epoch": 1.266826090263049, "grad_norm": 0.0491347461938858, "learning_rate": 3.5646076547424106e-06, "loss": 0.0008, "step": 74960 }, { "epoch": 1.2669950905418994, "grad_norm": 0.02737373672425747, "learning_rate": 3.5631949678981904e-06, "loss": 0.0011, "step": 74970 }, { "epoch": 1.2671640908207498, "grad_norm": 0.05359240621328354, "learning_rate": 3.561782406062971e-06, "loss": 0.0007, "step": 74980 }, { "epoch": 1.2673330910996004, "grad_norm": 0.040238119661808014, "learning_rate": 3.560369969359649e-06, "loss": 0.0009, "step": 74990 }, { "epoch": 1.2675020913784507, "grad_norm": 0.05049556493759155, "learning_rate": 3.5589576579111167e-06, "loss": 0.0016, "step": 75000 }, { "epoch": 1.2676710916573013, "grad_norm": 0.023869765922427177, "learning_rate": 3.557545471840248e-06, "loss": 0.0008, "step": 75010 }, { "epoch": 1.2678400919361517, "grad_norm": 0.020744100213050842, "learning_rate": 3.5561334112699154e-06, "loss": 0.001, "step": 75020 }, { "epoch": 1.268009092215002, "grad_norm": 0.07367073744535446, "learning_rate": 3.5547214763229686e-06, "loss": 0.0012, "step": 75030 }, { "epoch": 1.2681780924938526, "grad_norm": 0.06526787579059601, "learning_rate": 3.5533096671222556e-06, "loss": 0.0018, "step": 75040 }, { "epoch": 1.2683470927727032, "grad_norm": 0.11561532318592072, "learning_rate": 3.5518979837906136e-06, "loss": 0.001, "step": 75050 }, { "epoch": 1.2685160930515536, "grad_norm": 0.06659328937530518, "learning_rate": 3.550486426450861e-06, "loss": 0.003, "step": 75060 }, { "epoch": 1.268685093330404, "grad_norm": 0.019424647092819214, "learning_rate": 3.549074995225815e-06, "loss": 0.002, "step": 75070 }, { "epoch": 1.2688540936092545, "grad_norm": 0.009543772786855698, "learning_rate": 3.547663690238271e-06, "loss": 0.0004, "step": 75080 }, { "epoch": 1.2690230938881049, "grad_norm": 0.03657336160540581, "learning_rate": 3.5462525116110246e-06, "loss": 0.0013, "step": 75090 }, { "epoch": 1.2691920941669554, "grad_norm": 0.17695069313049316, "learning_rate": 3.5448414594668524e-06, "loss": 0.0014, "step": 75100 }, { "epoch": 1.2693610944458058, "grad_norm": 0.0062867943197488785, "learning_rate": 3.543430533928525e-06, "loss": 0.0018, "step": 75110 }, { "epoch": 1.2695300947246562, "grad_norm": 0.04585151746869087, "learning_rate": 3.5420197351187966e-06, "loss": 0.0008, "step": 75120 }, { "epoch": 1.2696990950035068, "grad_norm": 0.10385863482952118, "learning_rate": 3.540609063160418e-06, "loss": 0.0009, "step": 75130 }, { "epoch": 1.2698680952823573, "grad_norm": 0.03469540923833847, "learning_rate": 3.539198518176119e-06, "loss": 0.0008, "step": 75140 }, { "epoch": 1.2700370955612077, "grad_norm": 0.06419689953327179, "learning_rate": 3.5377881002886293e-06, "loss": 0.0005, "step": 75150 }, { "epoch": 1.270206095840058, "grad_norm": 0.05026857554912567, "learning_rate": 3.536377809620657e-06, "loss": 0.0007, "step": 75160 }, { "epoch": 1.2703750961189086, "grad_norm": 0.015963979065418243, "learning_rate": 3.534967646294908e-06, "loss": 0.0007, "step": 75170 }, { "epoch": 1.270544096397759, "grad_norm": 0.08267915993928909, "learning_rate": 3.5335576104340715e-06, "loss": 0.0006, "step": 75180 }, { "epoch": 1.2707130966766096, "grad_norm": 0.2071213573217392, "learning_rate": 3.532147702160828e-06, "loss": 0.0013, "step": 75190 }, { "epoch": 1.27088209695546, "grad_norm": 0.054513413459062576, "learning_rate": 3.5307379215978453e-06, "loss": 0.0006, "step": 75200 }, { "epoch": 1.2710510972343103, "grad_norm": 0.06152530387043953, "learning_rate": 3.5293282688677843e-06, "loss": 0.0006, "step": 75210 }, { "epoch": 1.271220097513161, "grad_norm": 0.05172324180603027, "learning_rate": 3.5279187440932883e-06, "loss": 0.0011, "step": 75220 }, { "epoch": 1.2713890977920115, "grad_norm": 0.020962441340088844, "learning_rate": 3.5265093473969948e-06, "loss": 0.0007, "step": 75230 }, { "epoch": 1.2715580980708618, "grad_norm": 0.04996019974350929, "learning_rate": 3.5251000789015255e-06, "loss": 0.0009, "step": 75240 }, { "epoch": 1.2717270983497122, "grad_norm": 0.03674384206533432, "learning_rate": 3.523690938729498e-06, "loss": 0.0007, "step": 75250 }, { "epoch": 1.2718960986285628, "grad_norm": 0.08864463865756989, "learning_rate": 3.522281927003509e-06, "loss": 0.0007, "step": 75260 }, { "epoch": 1.2720650989074131, "grad_norm": 0.03137887269258499, "learning_rate": 3.5208730438461535e-06, "loss": 0.0004, "step": 75270 }, { "epoch": 1.2722340991862637, "grad_norm": 0.004854999948292971, "learning_rate": 3.519464289380009e-06, "loss": 0.0012, "step": 75280 }, { "epoch": 1.272403099465114, "grad_norm": 0.01660696417093277, "learning_rate": 3.5180556637276454e-06, "loss": 0.0007, "step": 75290 }, { "epoch": 1.2725720997439645, "grad_norm": 0.1040768101811409, "learning_rate": 3.5166471670116188e-06, "loss": 0.0007, "step": 75300 }, { "epoch": 1.272741100022815, "grad_norm": 0.06876754760742188, "learning_rate": 3.5152387993544753e-06, "loss": 0.0011, "step": 75310 }, { "epoch": 1.2729101003016656, "grad_norm": 0.004151761531829834, "learning_rate": 3.5138305608787514e-06, "loss": 0.0008, "step": 75320 }, { "epoch": 1.273079100580516, "grad_norm": 0.003927671350538731, "learning_rate": 3.5124224517069683e-06, "loss": 0.0015, "step": 75330 }, { "epoch": 1.2732481008593663, "grad_norm": 0.05192035809159279, "learning_rate": 3.5110144719616408e-06, "loss": 0.0007, "step": 75340 }, { "epoch": 1.273417101138217, "grad_norm": 0.011413088999688625, "learning_rate": 3.5096066217652668e-06, "loss": 0.0005, "step": 75350 }, { "epoch": 1.2735861014170673, "grad_norm": 0.020405396819114685, "learning_rate": 3.5081989012403395e-06, "loss": 0.0011, "step": 75360 }, { "epoch": 1.2737551016959179, "grad_norm": 0.048676881939172745, "learning_rate": 3.5067913105093337e-06, "loss": 0.0005, "step": 75370 }, { "epoch": 1.2739241019747682, "grad_norm": 0.09745845198631287, "learning_rate": 3.50538384969472e-06, "loss": 0.0014, "step": 75380 }, { "epoch": 1.2740931022536186, "grad_norm": 0.0010723049053922296, "learning_rate": 3.5039765189189515e-06, "loss": 0.0011, "step": 75390 }, { "epoch": 1.2742621025324692, "grad_norm": 0.12608641386032104, "learning_rate": 3.5025693183044766e-06, "loss": 0.0009, "step": 75400 }, { "epoch": 1.2744311028113198, "grad_norm": 0.04926927015185356, "learning_rate": 3.501162247973724e-06, "loss": 0.0006, "step": 75410 }, { "epoch": 1.2746001030901701, "grad_norm": 0.07403943687677383, "learning_rate": 3.4997553080491203e-06, "loss": 0.002, "step": 75420 }, { "epoch": 1.2747691033690205, "grad_norm": 0.05200944095849991, "learning_rate": 3.4983484986530713e-06, "loss": 0.0011, "step": 75430 }, { "epoch": 1.274938103647871, "grad_norm": 0.02016104944050312, "learning_rate": 3.496941819907981e-06, "loss": 0.0005, "step": 75440 }, { "epoch": 1.2751071039267214, "grad_norm": 0.0014036950888112187, "learning_rate": 3.4955352719362323e-06, "loss": 0.0014, "step": 75450 }, { "epoch": 1.275276104205572, "grad_norm": 0.04418787732720375, "learning_rate": 3.4941288548602056e-06, "loss": 0.0007, "step": 75460 }, { "epoch": 1.2754451044844224, "grad_norm": 0.023052405565977097, "learning_rate": 3.492722568802265e-06, "loss": 0.0015, "step": 75470 }, { "epoch": 1.2756141047632727, "grad_norm": 0.02741224505007267, "learning_rate": 3.491316413884763e-06, "loss": 0.0012, "step": 75480 }, { "epoch": 1.2757831050421233, "grad_norm": 0.00827124435454607, "learning_rate": 3.489910390230042e-06, "loss": 0.0012, "step": 75490 }, { "epoch": 1.275952105320974, "grad_norm": 0.008974979631602764, "learning_rate": 3.4885044979604352e-06, "loss": 0.0009, "step": 75500 }, { "epoch": 1.2761211055998243, "grad_norm": 0.021223215386271477, "learning_rate": 3.487098737198259e-06, "loss": 0.0008, "step": 75510 }, { "epoch": 1.2762901058786746, "grad_norm": 0.01636037603020668, "learning_rate": 3.485693108065825e-06, "loss": 0.0015, "step": 75520 }, { "epoch": 1.2764591061575252, "grad_norm": 0.07580610364675522, "learning_rate": 3.484287610685425e-06, "loss": 0.001, "step": 75530 }, { "epoch": 1.2766281064363756, "grad_norm": 0.04372125118970871, "learning_rate": 3.482882245179349e-06, "loss": 0.0011, "step": 75540 }, { "epoch": 1.2767971067152262, "grad_norm": 0.041074804961681366, "learning_rate": 3.4814770116698665e-06, "loss": 0.0008, "step": 75550 }, { "epoch": 1.2769661069940765, "grad_norm": 0.04173261299729347, "learning_rate": 3.4800719102792412e-06, "loss": 0.0006, "step": 75560 }, { "epoch": 1.277135107272927, "grad_norm": 0.02142047882080078, "learning_rate": 3.478666941129725e-06, "loss": 0.0009, "step": 75570 }, { "epoch": 1.2773041075517775, "grad_norm": 0.10476000607013702, "learning_rate": 3.477262104343555e-06, "loss": 0.0009, "step": 75580 }, { "epoch": 1.2774731078306278, "grad_norm": 0.04013489559292793, "learning_rate": 3.475857400042961e-06, "loss": 0.0006, "step": 75590 }, { "epoch": 1.2776421081094784, "grad_norm": 0.03097931295633316, "learning_rate": 3.4744528283501566e-06, "loss": 0.0005, "step": 75600 }, { "epoch": 1.2778111083883288, "grad_norm": 0.01865074224770069, "learning_rate": 3.4730483893873496e-06, "loss": 0.0005, "step": 75610 }, { "epoch": 1.2779801086671794, "grad_norm": 0.0692799761891365, "learning_rate": 3.471644083276729e-06, "loss": 0.0008, "step": 75620 }, { "epoch": 1.2781491089460297, "grad_norm": 0.020755227655172348, "learning_rate": 3.47023991014048e-06, "loss": 0.0008, "step": 75630 }, { "epoch": 1.2783181092248803, "grad_norm": 0.10251430422067642, "learning_rate": 3.4688358701007686e-06, "loss": 0.0006, "step": 75640 }, { "epoch": 1.2784871095037307, "grad_norm": 0.07248004525899887, "learning_rate": 3.467431963279756e-06, "loss": 0.0004, "step": 75650 }, { "epoch": 1.278656109782581, "grad_norm": 0.05969158187508583, "learning_rate": 3.4660281897995885e-06, "loss": 0.0004, "step": 75660 }, { "epoch": 1.2788251100614316, "grad_norm": 0.00034028541995212436, "learning_rate": 3.4646245497824004e-06, "loss": 0.0004, "step": 75670 }, { "epoch": 1.278994110340282, "grad_norm": 0.10661555081605911, "learning_rate": 3.463221043350315e-06, "loss": 0.0012, "step": 75680 }, { "epoch": 1.2791631106191326, "grad_norm": 0.02095131203532219, "learning_rate": 3.4618176706254466e-06, "loss": 0.0019, "step": 75690 }, { "epoch": 1.279332110897983, "grad_norm": 0.06987152993679047, "learning_rate": 3.460414431729891e-06, "loss": 0.0011, "step": 75700 }, { "epoch": 1.2795011111768335, "grad_norm": 0.060856495052576065, "learning_rate": 3.4590113267857417e-06, "loss": 0.0021, "step": 75710 }, { "epoch": 1.2796701114556839, "grad_norm": 0.06990408152341843, "learning_rate": 3.457608355915071e-06, "loss": 0.0009, "step": 75720 }, { "epoch": 1.2798391117345345, "grad_norm": 0.06350047141313553, "learning_rate": 3.4562055192399486e-06, "loss": 0.0011, "step": 75730 }, { "epoch": 1.2800081120133848, "grad_norm": 0.005618101917207241, "learning_rate": 3.4548028168824237e-06, "loss": 0.0012, "step": 75740 }, { "epoch": 1.2801771122922352, "grad_norm": 0.04957776144146919, "learning_rate": 3.4534002489645413e-06, "loss": 0.0013, "step": 75750 }, { "epoch": 1.2803461125710858, "grad_norm": 0.06811799854040146, "learning_rate": 3.45199781560833e-06, "loss": 0.0006, "step": 75760 }, { "epoch": 1.2805151128499361, "grad_norm": 0.024274202063679695, "learning_rate": 3.4505955169358108e-06, "loss": 0.001, "step": 75770 }, { "epoch": 1.2806841131287867, "grad_norm": 0.07120922207832336, "learning_rate": 3.449193353068987e-06, "loss": 0.0007, "step": 75780 }, { "epoch": 1.280853113407637, "grad_norm": 0.06161806359887123, "learning_rate": 3.447791324129857e-06, "loss": 0.0015, "step": 75790 }, { "epoch": 1.2810221136864877, "grad_norm": 0.03518872708082199, "learning_rate": 3.4463894302404004e-06, "loss": 0.0013, "step": 75800 }, { "epoch": 1.281191113965338, "grad_norm": 0.016944030299782753, "learning_rate": 3.444987671522591e-06, "loss": 0.0006, "step": 75810 }, { "epoch": 1.2813601142441886, "grad_norm": 0.04125964269042015, "learning_rate": 3.4435860480983907e-06, "loss": 0.0009, "step": 75820 }, { "epoch": 1.281529114523039, "grad_norm": 0.035951171070337296, "learning_rate": 3.4421845600897425e-06, "loss": 0.0011, "step": 75830 }, { "epoch": 1.2816981148018893, "grad_norm": 0.05894112214446068, "learning_rate": 3.4407832076185876e-06, "loss": 0.0017, "step": 75840 }, { "epoch": 1.28186711508074, "grad_norm": 0.012359118089079857, "learning_rate": 3.439381990806846e-06, "loss": 0.0006, "step": 75850 }, { "epoch": 1.2820361153595903, "grad_norm": 0.030763106420636177, "learning_rate": 3.4379809097764336e-06, "loss": 0.0007, "step": 75860 }, { "epoch": 1.2822051156384409, "grad_norm": 0.034162361174821854, "learning_rate": 3.43657996464925e-06, "loss": 0.0014, "step": 75870 }, { "epoch": 1.2823741159172912, "grad_norm": 0.09156583249568939, "learning_rate": 3.435179155547186e-06, "loss": 0.0009, "step": 75880 }, { "epoch": 1.2825431161961416, "grad_norm": 0.03484535962343216, "learning_rate": 3.433778482592115e-06, "loss": 0.0007, "step": 75890 }, { "epoch": 1.2827121164749922, "grad_norm": 0.033580485731363297, "learning_rate": 3.432377945905906e-06, "loss": 0.0003, "step": 75900 }, { "epoch": 1.2828811167538428, "grad_norm": 0.07795017957687378, "learning_rate": 3.43097754561041e-06, "loss": 0.0008, "step": 75910 }, { "epoch": 1.2830501170326931, "grad_norm": 0.03439640253782272, "learning_rate": 3.429577281827471e-06, "loss": 0.0008, "step": 75920 }, { "epoch": 1.2832191173115435, "grad_norm": 0.10929620265960693, "learning_rate": 3.4281771546789155e-06, "loss": 0.001, "step": 75930 }, { "epoch": 1.283388117590394, "grad_norm": 0.018059978261590004, "learning_rate": 3.4267771642865645e-06, "loss": 0.0011, "step": 75940 }, { "epoch": 1.2835571178692444, "grad_norm": 0.0037063744384795427, "learning_rate": 3.425377310772222e-06, "loss": 0.0004, "step": 75950 }, { "epoch": 1.283726118148095, "grad_norm": 0.01876419596374035, "learning_rate": 3.4239775942576835e-06, "loss": 0.0013, "step": 75960 }, { "epoch": 1.2838951184269454, "grad_norm": 0.06629638373851776, "learning_rate": 3.4225780148647285e-06, "loss": 0.0012, "step": 75970 }, { "epoch": 1.2840641187057957, "grad_norm": 0.0014231489039957523, "learning_rate": 3.4211785727151314e-06, "loss": 0.0007, "step": 75980 }, { "epoch": 1.2842331189846463, "grad_norm": 0.025891413912177086, "learning_rate": 3.4197792679306462e-06, "loss": 0.0004, "step": 75990 }, { "epoch": 1.284402119263497, "grad_norm": 0.13072508573532104, "learning_rate": 3.418380100633023e-06, "loss": 0.0016, "step": 76000 }, { "epoch": 1.2845711195423473, "grad_norm": 0.0022091337013989687, "learning_rate": 3.416981070943992e-06, "loss": 0.0011, "step": 76010 }, { "epoch": 1.2847401198211976, "grad_norm": 0.060467787086963654, "learning_rate": 3.4155821789852796e-06, "loss": 0.0011, "step": 76020 }, { "epoch": 1.2849091201000482, "grad_norm": 0.061618030071258545, "learning_rate": 3.414183424878592e-06, "loss": 0.0011, "step": 76030 }, { "epoch": 1.2850781203788986, "grad_norm": 0.022405657917261124, "learning_rate": 3.412784808745632e-06, "loss": 0.0011, "step": 76040 }, { "epoch": 1.2852471206577492, "grad_norm": 0.007900248281657696, "learning_rate": 3.411386330708082e-06, "loss": 0.0009, "step": 76050 }, { "epoch": 1.2854161209365995, "grad_norm": 0.03440115228295326, "learning_rate": 3.40998799088762e-06, "loss": 0.0005, "step": 76060 }, { "epoch": 1.2855851212154499, "grad_norm": 0.004934143740683794, "learning_rate": 3.4085897894059054e-06, "loss": 0.0007, "step": 76070 }, { "epoch": 1.2857541214943005, "grad_norm": 0.07494954019784927, "learning_rate": 3.4071917263845894e-06, "loss": 0.0018, "step": 76080 }, { "epoch": 1.285923121773151, "grad_norm": 0.03333232179284096, "learning_rate": 3.405793801945313e-06, "loss": 0.0009, "step": 76090 }, { "epoch": 1.2860921220520014, "grad_norm": 0.07441503554582596, "learning_rate": 3.404396016209697e-06, "loss": 0.0005, "step": 76100 }, { "epoch": 1.2862611223308518, "grad_norm": 0.03767404705286026, "learning_rate": 3.4029983692993607e-06, "loss": 0.0008, "step": 76110 }, { "epoch": 1.2864301226097024, "grad_norm": 0.08625879883766174, "learning_rate": 3.401600861335902e-06, "loss": 0.0008, "step": 76120 }, { "epoch": 1.2865991228885527, "grad_norm": 0.03797721117734909, "learning_rate": 3.4002034924409154e-06, "loss": 0.0007, "step": 76130 }, { "epoch": 1.2867681231674033, "grad_norm": 0.009482428431510925, "learning_rate": 3.398806262735973e-06, "loss": 0.001, "step": 76140 }, { "epoch": 1.2869371234462537, "grad_norm": 0.395962119102478, "learning_rate": 3.397409172342646e-06, "loss": 0.0014, "step": 76150 }, { "epoch": 1.287106123725104, "grad_norm": 0.007825309410691261, "learning_rate": 3.3960122213824836e-06, "loss": 0.0005, "step": 76160 }, { "epoch": 1.2872751240039546, "grad_norm": 0.02285987325012684, "learning_rate": 3.394615409977032e-06, "loss": 0.0017, "step": 76170 }, { "epoch": 1.2874441242828052, "grad_norm": 0.06611663848161697, "learning_rate": 3.393218738247816e-06, "loss": 0.0015, "step": 76180 }, { "epoch": 1.2876131245616556, "grad_norm": 0.03683452680706978, "learning_rate": 3.391822206316357e-06, "loss": 0.0012, "step": 76190 }, { "epoch": 1.287782124840506, "grad_norm": 0.0007462403154931962, "learning_rate": 3.3904258143041556e-06, "loss": 0.0018, "step": 76200 }, { "epoch": 1.2879511251193565, "grad_norm": 0.0829184278845787, "learning_rate": 3.3890295623327086e-06, "loss": 0.0008, "step": 76210 }, { "epoch": 1.2881201253982069, "grad_norm": 0.19146932661533356, "learning_rate": 3.387633450523493e-06, "loss": 0.001, "step": 76220 }, { "epoch": 1.2882891256770574, "grad_norm": 0.00407722732052207, "learning_rate": 3.386237478997981e-06, "loss": 0.0021, "step": 76230 }, { "epoch": 1.2884581259559078, "grad_norm": 0.002176871057599783, "learning_rate": 3.384841647877626e-06, "loss": 0.0005, "step": 76240 }, { "epoch": 1.2886271262347582, "grad_norm": 0.07185068726539612, "learning_rate": 3.3834459572838753e-06, "loss": 0.0011, "step": 76250 }, { "epoch": 1.2887961265136088, "grad_norm": 0.09929092973470688, "learning_rate": 3.382050407338156e-06, "loss": 0.001, "step": 76260 }, { "epoch": 1.2889651267924593, "grad_norm": 0.06979778409004211, "learning_rate": 3.380654998161893e-06, "loss": 0.0011, "step": 76270 }, { "epoch": 1.2891341270713097, "grad_norm": 0.05053688585758209, "learning_rate": 3.3792597298764884e-06, "loss": 0.0014, "step": 76280 }, { "epoch": 1.28930312735016, "grad_norm": 0.022292081266641617, "learning_rate": 3.377864602603343e-06, "loss": 0.0006, "step": 76290 }, { "epoch": 1.2894721276290106, "grad_norm": 0.04626830667257309, "learning_rate": 3.376469616463834e-06, "loss": 0.001, "step": 76300 }, { "epoch": 1.289641127907861, "grad_norm": 0.023113220930099487, "learning_rate": 3.375074771579335e-06, "loss": 0.0006, "step": 76310 }, { "epoch": 1.2898101281867116, "grad_norm": 0.07600270211696625, "learning_rate": 3.373680068071205e-06, "loss": 0.0011, "step": 76320 }, { "epoch": 1.289979128465562, "grad_norm": 0.18534424901008606, "learning_rate": 3.3722855060607874e-06, "loss": 0.0025, "step": 76330 }, { "epoch": 1.2901481287444123, "grad_norm": 0.03411588445305824, "learning_rate": 3.3708910856694177e-06, "loss": 0.0009, "step": 76340 }, { "epoch": 1.290317129023263, "grad_norm": 0.04233044013381004, "learning_rate": 3.3694968070184162e-06, "loss": 0.0006, "step": 76350 }, { "epoch": 1.2904861293021135, "grad_norm": 0.028893014416098595, "learning_rate": 3.368102670229094e-06, "loss": 0.001, "step": 76360 }, { "epoch": 1.2906551295809638, "grad_norm": 0.05490986257791519, "learning_rate": 3.366708675422745e-06, "loss": 0.0006, "step": 76370 }, { "epoch": 1.2908241298598142, "grad_norm": 0.009264852851629257, "learning_rate": 3.365314822720657e-06, "loss": 0.0009, "step": 76380 }, { "epoch": 1.2909931301386648, "grad_norm": 0.15807749330997467, "learning_rate": 3.3639211122440963e-06, "loss": 0.001, "step": 76390 }, { "epoch": 1.2911621304175152, "grad_norm": 0.06726587563753128, "learning_rate": 3.362527544114329e-06, "loss": 0.0009, "step": 76400 }, { "epoch": 1.2913311306963657, "grad_norm": 0.009346742182970047, "learning_rate": 3.3611341184525957e-06, "loss": 0.0014, "step": 76410 }, { "epoch": 1.291500130975216, "grad_norm": 0.037689968943595886, "learning_rate": 3.359740835380137e-06, "loss": 0.0008, "step": 76420 }, { "epoch": 1.2916691312540665, "grad_norm": 0.013817720115184784, "learning_rate": 3.3583476950181727e-06, "loss": 0.001, "step": 76430 }, { "epoch": 1.291838131532917, "grad_norm": 0.043633535504341125, "learning_rate": 3.356954697487912e-06, "loss": 0.0012, "step": 76440 }, { "epoch": 1.2920071318117674, "grad_norm": 0.08570516854524612, "learning_rate": 3.355561842910553e-06, "loss": 0.0009, "step": 76450 }, { "epoch": 1.292176132090618, "grad_norm": 0.12532709538936615, "learning_rate": 3.3541691314072835e-06, "loss": 0.0011, "step": 76460 }, { "epoch": 1.2923451323694684, "grad_norm": 0.035797711461782455, "learning_rate": 3.3527765630992715e-06, "loss": 0.0007, "step": 76470 }, { "epoch": 1.292514132648319, "grad_norm": 0.06022035330533981, "learning_rate": 3.3513841381076812e-06, "loss": 0.0013, "step": 76480 }, { "epoch": 1.2926831329271693, "grad_norm": 0.06282058358192444, "learning_rate": 3.3499918565536565e-06, "loss": 0.001, "step": 76490 }, { "epoch": 1.2928521332060199, "grad_norm": 0.022315459325909615, "learning_rate": 3.3485997185583375e-06, "loss": 0.001, "step": 76500 }, { "epoch": 1.2930211334848702, "grad_norm": 0.02336900681257248, "learning_rate": 3.3472077242428414e-06, "loss": 0.001, "step": 76510 }, { "epoch": 1.2931901337637206, "grad_norm": 0.042133525013923645, "learning_rate": 3.3458158737282824e-06, "loss": 0.0008, "step": 76520 }, { "epoch": 1.2933591340425712, "grad_norm": 0.02696537785232067, "learning_rate": 3.3444241671357563e-06, "loss": 0.0011, "step": 76530 }, { "epoch": 1.2935281343214216, "grad_norm": 0.05946613848209381, "learning_rate": 3.3430326045863515e-06, "loss": 0.0006, "step": 76540 }, { "epoch": 1.2936971346002721, "grad_norm": 0.07703051716089249, "learning_rate": 3.3416411862011356e-06, "loss": 0.001, "step": 76550 }, { "epoch": 1.2938661348791225, "grad_norm": 0.05500364303588867, "learning_rate": 3.3402499121011734e-06, "loss": 0.0015, "step": 76560 }, { "epoch": 1.294035135157973, "grad_norm": 0.041644442826509476, "learning_rate": 3.3388587824075094e-06, "loss": 0.0008, "step": 76570 }, { "epoch": 1.2942041354368234, "grad_norm": 0.12322068214416504, "learning_rate": 3.337467797241179e-06, "loss": 0.0006, "step": 76580 }, { "epoch": 1.294373135715674, "grad_norm": 0.02212311513721943, "learning_rate": 3.3360769567232077e-06, "loss": 0.0009, "step": 76590 }, { "epoch": 1.2945421359945244, "grad_norm": 0.09738665819168091, "learning_rate": 3.3346862609746005e-06, "loss": 0.0012, "step": 76600 }, { "epoch": 1.2947111362733748, "grad_norm": 0.11395315825939178, "learning_rate": 3.3332957101163597e-06, "loss": 0.0028, "step": 76610 }, { "epoch": 1.2948801365522253, "grad_norm": 0.04635939002037048, "learning_rate": 3.3319053042694653e-06, "loss": 0.001, "step": 76620 }, { "epoch": 1.2950491368310757, "grad_norm": 0.020105794072151184, "learning_rate": 3.3305150435548927e-06, "loss": 0.0008, "step": 76630 }, { "epoch": 1.2952181371099263, "grad_norm": 0.01157683227211237, "learning_rate": 3.3291249280935995e-06, "loss": 0.0004, "step": 76640 }, { "epoch": 1.2953871373887766, "grad_norm": 0.0161611158400774, "learning_rate": 3.3277349580065343e-06, "loss": 0.0009, "step": 76650 }, { "epoch": 1.2955561376676272, "grad_norm": 0.03194257989525795, "learning_rate": 3.326345133414629e-06, "loss": 0.0005, "step": 76660 }, { "epoch": 1.2957251379464776, "grad_norm": 0.04428780823945999, "learning_rate": 3.3249554544388074e-06, "loss": 0.0008, "step": 76670 }, { "epoch": 1.2958941382253282, "grad_norm": 0.02459871396422386, "learning_rate": 3.3235659211999753e-06, "loss": 0.0008, "step": 76680 }, { "epoch": 1.2960631385041785, "grad_norm": 0.12832576036453247, "learning_rate": 3.3221765338190326e-06, "loss": 0.0013, "step": 76690 }, { "epoch": 1.296232138783029, "grad_norm": 0.04543474689126015, "learning_rate": 3.320787292416859e-06, "loss": 0.001, "step": 76700 }, { "epoch": 1.2964011390618795, "grad_norm": 0.044971760362386703, "learning_rate": 3.3193981971143275e-06, "loss": 0.0009, "step": 76710 }, { "epoch": 1.2965701393407298, "grad_norm": 0.0427575409412384, "learning_rate": 3.3180092480322943e-06, "loss": 0.0006, "step": 76720 }, { "epoch": 1.2967391396195804, "grad_norm": 0.06516630202531815, "learning_rate": 3.3166204452916085e-06, "loss": 0.0011, "step": 76730 }, { "epoch": 1.2969081398984308, "grad_norm": 0.12783502042293549, "learning_rate": 3.315231789013098e-06, "loss": 0.001, "step": 76740 }, { "epoch": 1.2970771401772814, "grad_norm": 0.05215373635292053, "learning_rate": 3.313843279317587e-06, "loss": 0.0013, "step": 76750 }, { "epoch": 1.2972461404561317, "grad_norm": 0.0056024095974862576, "learning_rate": 3.3124549163258777e-06, "loss": 0.0013, "step": 76760 }, { "epoch": 1.2974151407349823, "grad_norm": 0.05509374663233757, "learning_rate": 3.3110667001587694e-06, "loss": 0.0003, "step": 76770 }, { "epoch": 1.2975841410138327, "grad_norm": 0.02442927099764347, "learning_rate": 3.3096786309370387e-06, "loss": 0.0008, "step": 76780 }, { "epoch": 1.297753141292683, "grad_norm": 0.04514923319220543, "learning_rate": 3.3082907087814585e-06, "loss": 0.001, "step": 76790 }, { "epoch": 1.2979221415715336, "grad_norm": 0.005436763167381287, "learning_rate": 3.306902933812783e-06, "loss": 0.001, "step": 76800 }, { "epoch": 1.298091141850384, "grad_norm": 0.023903531953692436, "learning_rate": 3.305515306151756e-06, "loss": 0.0014, "step": 76810 }, { "epoch": 1.2982601421292346, "grad_norm": 0.06894169002771378, "learning_rate": 3.3041278259191056e-06, "loss": 0.0011, "step": 76820 }, { "epoch": 1.298429142408085, "grad_norm": 0.05309809744358063, "learning_rate": 3.302740493235551e-06, "loss": 0.0015, "step": 76830 }, { "epoch": 1.2985981426869353, "grad_norm": 0.05424584448337555, "learning_rate": 3.3013533082217997e-06, "loss": 0.0014, "step": 76840 }, { "epoch": 1.2987671429657859, "grad_norm": 0.0013488342519849539, "learning_rate": 3.299966270998538e-06, "loss": 0.0013, "step": 76850 }, { "epoch": 1.2989361432446365, "grad_norm": 0.01820383593440056, "learning_rate": 3.2985793816864496e-06, "loss": 0.0009, "step": 76860 }, { "epoch": 1.2991051435234868, "grad_norm": 0.017999805510044098, "learning_rate": 3.297192640406197e-06, "loss": 0.0006, "step": 76870 }, { "epoch": 1.2992741438023372, "grad_norm": 0.07281816750764847, "learning_rate": 3.295806047278437e-06, "loss": 0.0007, "step": 76880 }, { "epoch": 1.2994431440811878, "grad_norm": 0.12261531502008438, "learning_rate": 3.2944196024238052e-06, "loss": 0.0013, "step": 76890 }, { "epoch": 1.2996121443600381, "grad_norm": 0.0003672442398965359, "learning_rate": 3.293033305962934e-06, "loss": 0.0005, "step": 76900 }, { "epoch": 1.2997811446388887, "grad_norm": 0.010020782239735126, "learning_rate": 3.2916471580164343e-06, "loss": 0.0017, "step": 76910 }, { "epoch": 1.299950144917739, "grad_norm": 0.031483955681324005, "learning_rate": 3.2902611587049093e-06, "loss": 0.0012, "step": 76920 }, { "epoch": 1.3001191451965894, "grad_norm": 0.06877174973487854, "learning_rate": 3.2888753081489467e-06, "loss": 0.0011, "step": 76930 }, { "epoch": 1.30028814547544, "grad_norm": 0.06118880957365036, "learning_rate": 3.2874896064691246e-06, "loss": 0.0007, "step": 76940 }, { "epoch": 1.3004571457542906, "grad_norm": 0.06608627736568451, "learning_rate": 3.2861040537860025e-06, "loss": 0.0004, "step": 76950 }, { "epoch": 1.300626146033141, "grad_norm": 0.15344126522541046, "learning_rate": 3.2847186502201335e-06, "loss": 0.0019, "step": 76960 }, { "epoch": 1.3007951463119913, "grad_norm": 0.03516198322176933, "learning_rate": 3.28333339589205e-06, "loss": 0.0009, "step": 76970 }, { "epoch": 1.300964146590842, "grad_norm": 0.002182116499170661, "learning_rate": 3.281948290922281e-06, "loss": 0.0007, "step": 76980 }, { "epoch": 1.3011331468696923, "grad_norm": 0.023147817701101303, "learning_rate": 3.2805633354313334e-06, "loss": 0.0015, "step": 76990 }, { "epoch": 1.3013021471485429, "grad_norm": 0.129550501704216, "learning_rate": 3.279178529539707e-06, "loss": 0.0006, "step": 77000 }, { "epoch": 1.3014711474273932, "grad_norm": 0.018386520445346832, "learning_rate": 3.277793873367885e-06, "loss": 0.0008, "step": 77010 }, { "epoch": 1.3016401477062436, "grad_norm": 0.04771873354911804, "learning_rate": 3.2764093670363422e-06, "loss": 0.0009, "step": 77020 }, { "epoch": 1.3018091479850942, "grad_norm": 0.07971066981554031, "learning_rate": 3.2750250106655336e-06, "loss": 0.001, "step": 77030 }, { "epoch": 1.3019781482639448, "grad_norm": 0.04912357032299042, "learning_rate": 3.2736408043759095e-06, "loss": 0.0006, "step": 77040 }, { "epoch": 1.3021471485427951, "grad_norm": 0.02283174730837345, "learning_rate": 3.2722567482878966e-06, "loss": 0.0007, "step": 77050 }, { "epoch": 1.3023161488216455, "grad_norm": 0.052379060536623, "learning_rate": 3.2708728425219204e-06, "loss": 0.0005, "step": 77060 }, { "epoch": 1.302485149100496, "grad_norm": 0.000811600242741406, "learning_rate": 3.2694890871983824e-06, "loss": 0.0007, "step": 77070 }, { "epoch": 1.3026541493793464, "grad_norm": 0.002285461872816086, "learning_rate": 3.268105482437679e-06, "loss": 0.0014, "step": 77080 }, { "epoch": 1.302823149658197, "grad_norm": 0.031205566599965096, "learning_rate": 3.266722028360191e-06, "loss": 0.0008, "step": 77090 }, { "epoch": 1.3029921499370474, "grad_norm": 0.008281203918159008, "learning_rate": 3.2653387250862827e-06, "loss": 0.0012, "step": 77100 }, { "epoch": 1.3031611502158977, "grad_norm": 0.08469940721988678, "learning_rate": 3.2639555727363115e-06, "loss": 0.0009, "step": 77110 }, { "epoch": 1.3033301504947483, "grad_norm": 0.10907159000635147, "learning_rate": 3.262572571430615e-06, "loss": 0.0009, "step": 77120 }, { "epoch": 1.303499150773599, "grad_norm": 0.004431854467839003, "learning_rate": 3.261189721289525e-06, "loss": 0.0019, "step": 77130 }, { "epoch": 1.3036681510524493, "grad_norm": 0.04053468629717827, "learning_rate": 3.259807022433352e-06, "loss": 0.0016, "step": 77140 }, { "epoch": 1.3038371513312996, "grad_norm": 0.1110965833067894, "learning_rate": 3.2584244749824024e-06, "loss": 0.0011, "step": 77150 }, { "epoch": 1.3040061516101502, "grad_norm": 0.06742440909147263, "learning_rate": 3.2570420790569585e-06, "loss": 0.0008, "step": 77160 }, { "epoch": 1.3041751518890006, "grad_norm": 0.01611647941172123, "learning_rate": 3.2556598347773003e-06, "loss": 0.0014, "step": 77170 }, { "epoch": 1.3043441521678512, "grad_norm": 0.04131495580077171, "learning_rate": 3.254277742263687e-06, "loss": 0.0008, "step": 77180 }, { "epoch": 1.3045131524467015, "grad_norm": 0.04060171917080879, "learning_rate": 3.252895801636369e-06, "loss": 0.0006, "step": 77190 }, { "epoch": 1.3046821527255519, "grad_norm": 0.0038544689305126667, "learning_rate": 3.2515140130155808e-06, "loss": 0.0007, "step": 77200 }, { "epoch": 1.3048511530044025, "grad_norm": 0.027534153312444687, "learning_rate": 3.2501323765215454e-06, "loss": 0.0008, "step": 77210 }, { "epoch": 1.305020153283253, "grad_norm": 0.054691337049007416, "learning_rate": 3.2487508922744703e-06, "loss": 0.0012, "step": 77220 }, { "epoch": 1.3051891535621034, "grad_norm": 0.14498290419578552, "learning_rate": 3.2473695603945553e-06, "loss": 0.0015, "step": 77230 }, { "epoch": 1.3053581538409538, "grad_norm": 0.028799815103411674, "learning_rate": 3.245988381001978e-06, "loss": 0.0012, "step": 77240 }, { "epoch": 1.3055271541198044, "grad_norm": 0.0017579590203240514, "learning_rate": 3.244607354216911e-06, "loss": 0.0007, "step": 77250 }, { "epoch": 1.3056961543986547, "grad_norm": 0.06550420075654984, "learning_rate": 3.2432264801595082e-06, "loss": 0.0011, "step": 77260 }, { "epoch": 1.3058651546775053, "grad_norm": 0.030126405879855156, "learning_rate": 3.2418457589499155e-06, "loss": 0.0009, "step": 77270 }, { "epoch": 1.3060341549563557, "grad_norm": 0.008543896488845348, "learning_rate": 3.2404651907082575e-06, "loss": 0.0021, "step": 77280 }, { "epoch": 1.306203155235206, "grad_norm": 0.09629712253808975, "learning_rate": 3.239084775554654e-06, "loss": 0.0006, "step": 77290 }, { "epoch": 1.3063721555140566, "grad_norm": 0.003114005085080862, "learning_rate": 3.2377045136092065e-06, "loss": 0.0012, "step": 77300 }, { "epoch": 1.3065411557929072, "grad_norm": 0.5669236779212952, "learning_rate": 3.2363244049920063e-06, "loss": 0.0009, "step": 77310 }, { "epoch": 1.3067101560717576, "grad_norm": 0.014506954699754715, "learning_rate": 3.234944449823126e-06, "loss": 0.0006, "step": 77320 }, { "epoch": 1.306879156350608, "grad_norm": 0.07578347623348236, "learning_rate": 3.2335646482226336e-06, "loss": 0.0006, "step": 77330 }, { "epoch": 1.3070481566294585, "grad_norm": 0.005560994613915682, "learning_rate": 3.2321850003105726e-06, "loss": 0.0007, "step": 77340 }, { "epoch": 1.3072171569083089, "grad_norm": 0.044672898948192596, "learning_rate": 3.2308055062069817e-06, "loss": 0.0013, "step": 77350 }, { "epoch": 1.3073861571871594, "grad_norm": 0.11879739165306091, "learning_rate": 3.229426166031886e-06, "loss": 0.0009, "step": 77360 }, { "epoch": 1.3075551574660098, "grad_norm": 0.015471025370061398, "learning_rate": 3.22804697990529e-06, "loss": 0.0006, "step": 77370 }, { "epoch": 1.3077241577448602, "grad_norm": 0.02590048499405384, "learning_rate": 3.226667947947193e-06, "loss": 0.001, "step": 77380 }, { "epoch": 1.3078931580237108, "grad_norm": 0.019585004076361656, "learning_rate": 3.2252890702775776e-06, "loss": 0.0012, "step": 77390 }, { "epoch": 1.3080621583025611, "grad_norm": 0.02094372734427452, "learning_rate": 3.22391034701641e-06, "loss": 0.001, "step": 77400 }, { "epoch": 1.3082311585814117, "grad_norm": 0.02087017148733139, "learning_rate": 3.222531778283648e-06, "loss": 0.0006, "step": 77410 }, { "epoch": 1.308400158860262, "grad_norm": 0.09364385157823563, "learning_rate": 3.2211533641992353e-06, "loss": 0.0009, "step": 77420 }, { "epoch": 1.3085691591391126, "grad_norm": 0.008463174104690552, "learning_rate": 3.219775104883096e-06, "loss": 0.0006, "step": 77430 }, { "epoch": 1.308738159417963, "grad_norm": 0.017918633297085762, "learning_rate": 3.2183970004551503e-06, "loss": 0.001, "step": 77440 }, { "epoch": 1.3089071596968136, "grad_norm": 0.02036973461508751, "learning_rate": 3.217019051035295e-06, "loss": 0.0013, "step": 77450 }, { "epoch": 1.309076159975664, "grad_norm": 0.0491136871278286, "learning_rate": 3.215641256743424e-06, "loss": 0.0008, "step": 77460 }, { "epoch": 1.3092451602545143, "grad_norm": 0.005848989821970463, "learning_rate": 3.214263617699407e-06, "loss": 0.0012, "step": 77470 }, { "epoch": 1.309414160533365, "grad_norm": 0.06523486971855164, "learning_rate": 3.2128861340231076e-06, "loss": 0.0005, "step": 77480 }, { "epoch": 1.3095831608122153, "grad_norm": 0.0042354641482234, "learning_rate": 3.2115088058343725e-06, "loss": 0.0014, "step": 77490 }, { "epoch": 1.3097521610910658, "grad_norm": 0.02519715204834938, "learning_rate": 3.2101316332530387e-06, "loss": 0.0007, "step": 77500 }, { "epoch": 1.3099211613699162, "grad_norm": 0.06626711040735245, "learning_rate": 3.2087546163989235e-06, "loss": 0.0008, "step": 77510 }, { "epoch": 1.3100901616487668, "grad_norm": 0.13485555350780487, "learning_rate": 3.2073777553918373e-06, "loss": 0.0012, "step": 77520 }, { "epoch": 1.3102591619276172, "grad_norm": 0.02375718578696251, "learning_rate": 3.206001050351569e-06, "loss": 0.0007, "step": 77530 }, { "epoch": 1.3104281622064677, "grad_norm": 0.06585943698883057, "learning_rate": 3.2046245013979043e-06, "loss": 0.001, "step": 77540 }, { "epoch": 1.310597162485318, "grad_norm": 0.10992880910634995, "learning_rate": 3.2032481086506044e-06, "loss": 0.001, "step": 77550 }, { "epoch": 1.3107661627641685, "grad_norm": 0.009292932227253914, "learning_rate": 3.2018718722294255e-06, "loss": 0.0011, "step": 77560 }, { "epoch": 1.310935163043019, "grad_norm": 0.08823945373296738, "learning_rate": 3.2004957922541057e-06, "loss": 0.0011, "step": 77570 }, { "epoch": 1.3111041633218694, "grad_norm": 0.023845108225941658, "learning_rate": 3.1991198688443712e-06, "loss": 0.0008, "step": 77580 }, { "epoch": 1.31127316360072, "grad_norm": 0.04529750347137451, "learning_rate": 3.197744102119933e-06, "loss": 0.0012, "step": 77590 }, { "epoch": 1.3114421638795704, "grad_norm": 0.04554332047700882, "learning_rate": 3.196368492200489e-06, "loss": 0.0009, "step": 77600 }, { "epoch": 1.311611164158421, "grad_norm": 0.05958491936326027, "learning_rate": 3.1949930392057275e-06, "loss": 0.0008, "step": 77610 }, { "epoch": 1.3117801644372713, "grad_norm": 0.013546890579164028, "learning_rate": 3.193617743255315e-06, "loss": 0.0004, "step": 77620 }, { "epoch": 1.3119491647161219, "grad_norm": 0.06428714096546173, "learning_rate": 3.1922426044689133e-06, "loss": 0.0011, "step": 77630 }, { "epoch": 1.3121181649949722, "grad_norm": 0.027526091784238815, "learning_rate": 3.1908676229661612e-06, "loss": 0.0005, "step": 77640 }, { "epoch": 1.3122871652738226, "grad_norm": 0.04166054353117943, "learning_rate": 3.1894927988666935e-06, "loss": 0.0017, "step": 77650 }, { "epoch": 1.3124561655526732, "grad_norm": 0.020531712099909782, "learning_rate": 3.188118132290123e-06, "loss": 0.0011, "step": 77660 }, { "epoch": 1.3126251658315236, "grad_norm": 0.11892349272966385, "learning_rate": 3.186743623356054e-06, "loss": 0.0011, "step": 77670 }, { "epoch": 1.3127941661103741, "grad_norm": 0.05005523934960365, "learning_rate": 3.1853692721840755e-06, "loss": 0.0008, "step": 77680 }, { "epoch": 1.3129631663892245, "grad_norm": 0.10019006580114365, "learning_rate": 3.1839950788937626e-06, "loss": 0.001, "step": 77690 }, { "epoch": 1.313132166668075, "grad_norm": 0.024045975878834724, "learning_rate": 3.1826210436046752e-06, "loss": 0.0009, "step": 77700 }, { "epoch": 1.3133011669469254, "grad_norm": 0.029203811660408974, "learning_rate": 3.181247166436364e-06, "loss": 0.0011, "step": 77710 }, { "epoch": 1.313470167225776, "grad_norm": 0.013211055658757687, "learning_rate": 3.1798734475083606e-06, "loss": 0.0003, "step": 77720 }, { "epoch": 1.3136391675046264, "grad_norm": 0.06947149336338043, "learning_rate": 3.1784998869401875e-06, "loss": 0.0005, "step": 77730 }, { "epoch": 1.3138081677834768, "grad_norm": 0.08241991698741913, "learning_rate": 3.1771264848513473e-06, "loss": 0.001, "step": 77740 }, { "epoch": 1.3139771680623273, "grad_norm": 0.11345503479242325, "learning_rate": 3.175753241361337e-06, "loss": 0.0007, "step": 77750 }, { "epoch": 1.3141461683411777, "grad_norm": 0.1144309788942337, "learning_rate": 3.1743801565896316e-06, "loss": 0.0014, "step": 77760 }, { "epoch": 1.3143151686200283, "grad_norm": 0.07751011848449707, "learning_rate": 3.1730072306556985e-06, "loss": 0.0016, "step": 77770 }, { "epoch": 1.3144841688988786, "grad_norm": 0.05221608653664589, "learning_rate": 3.1716344636789876e-06, "loss": 0.0011, "step": 77780 }, { "epoch": 1.314653169177729, "grad_norm": 0.004115086980164051, "learning_rate": 3.170261855778939e-06, "loss": 0.0012, "step": 77790 }, { "epoch": 1.3148221694565796, "grad_norm": 0.0636073425412178, "learning_rate": 3.1688894070749722e-06, "loss": 0.0015, "step": 77800 }, { "epoch": 1.3149911697354302, "grad_norm": 0.012179211713373661, "learning_rate": 3.167517117686501e-06, "loss": 0.0009, "step": 77810 }, { "epoch": 1.3151601700142805, "grad_norm": 0.0035941177047789097, "learning_rate": 3.166144987732917e-06, "loss": 0.0013, "step": 77820 }, { "epoch": 1.315329170293131, "grad_norm": 0.03358374163508415, "learning_rate": 3.1647730173336065e-06, "loss": 0.0009, "step": 77830 }, { "epoch": 1.3154981705719815, "grad_norm": 0.011771938763558865, "learning_rate": 3.1634012066079333e-06, "loss": 0.0004, "step": 77840 }, { "epoch": 1.3156671708508318, "grad_norm": 0.015004804357886314, "learning_rate": 3.1620295556752535e-06, "loss": 0.0006, "step": 77850 }, { "epoch": 1.3158361711296824, "grad_norm": 0.018552575260400772, "learning_rate": 3.1606580646549094e-06, "loss": 0.0009, "step": 77860 }, { "epoch": 1.3160051714085328, "grad_norm": 0.006084776483476162, "learning_rate": 3.1592867336662236e-06, "loss": 0.0007, "step": 77870 }, { "epoch": 1.3161741716873832, "grad_norm": 0.0012030642246827483, "learning_rate": 3.157915562828512e-06, "loss": 0.0013, "step": 77880 }, { "epoch": 1.3163431719662337, "grad_norm": 0.050810765475034714, "learning_rate": 3.15654455226107e-06, "loss": 0.0009, "step": 77890 }, { "epoch": 1.3165121722450843, "grad_norm": 0.014970553107559681, "learning_rate": 3.155173702083186e-06, "loss": 0.0016, "step": 77900 }, { "epoch": 1.3166811725239347, "grad_norm": 0.08558324724435806, "learning_rate": 3.153803012414126e-06, "loss": 0.001, "step": 77910 }, { "epoch": 1.316850172802785, "grad_norm": 0.05313260853290558, "learning_rate": 3.1524324833731513e-06, "loss": 0.0011, "step": 77920 }, { "epoch": 1.3170191730816356, "grad_norm": 0.017633775249123573, "learning_rate": 3.1510621150794997e-06, "loss": 0.0015, "step": 77930 }, { "epoch": 1.317188173360486, "grad_norm": 0.012275348417460918, "learning_rate": 3.1496919076524048e-06, "loss": 0.0013, "step": 77940 }, { "epoch": 1.3173571736393366, "grad_norm": 0.02711687795817852, "learning_rate": 3.148321861211077e-06, "loss": 0.0014, "step": 77950 }, { "epoch": 1.317526173918187, "grad_norm": 0.0425824373960495, "learning_rate": 3.146951975874719e-06, "loss": 0.0009, "step": 77960 }, { "epoch": 1.3176951741970373, "grad_norm": 0.018861930817365646, "learning_rate": 3.145582251762517e-06, "loss": 0.0008, "step": 77970 }, { "epoch": 1.3178641744758879, "grad_norm": 0.03422432765364647, "learning_rate": 3.1442126889936456e-06, "loss": 0.0005, "step": 77980 }, { "epoch": 1.3180331747547385, "grad_norm": 0.03603680804371834, "learning_rate": 3.1428432876872607e-06, "loss": 0.0006, "step": 77990 }, { "epoch": 1.3182021750335888, "grad_norm": 0.09233968704938889, "learning_rate": 3.141474047962509e-06, "loss": 0.0009, "step": 78000 }, { "epoch": 1.3183711753124392, "grad_norm": 0.08342853933572769, "learning_rate": 3.140104969938518e-06, "loss": 0.0007, "step": 78010 }, { "epoch": 1.3185401755912898, "grad_norm": 0.06904515624046326, "learning_rate": 3.138736053734408e-06, "loss": 0.0013, "step": 78020 }, { "epoch": 1.3187091758701401, "grad_norm": 0.039718836545944214, "learning_rate": 3.1373672994692777e-06, "loss": 0.0007, "step": 78030 }, { "epoch": 1.3188781761489907, "grad_norm": 0.15891526639461517, "learning_rate": 3.135998707262218e-06, "loss": 0.0021, "step": 78040 }, { "epoch": 1.319047176427841, "grad_norm": 0.15801583230495453, "learning_rate": 3.134630277232302e-06, "loss": 0.0006, "step": 78050 }, { "epoch": 1.3192161767066914, "grad_norm": 0.06909686326980591, "learning_rate": 3.1332620094985893e-06, "loss": 0.0008, "step": 78060 }, { "epoch": 1.319385176985542, "grad_norm": 0.013529068790376186, "learning_rate": 3.1318939041801253e-06, "loss": 0.0011, "step": 78070 }, { "epoch": 1.3195541772643926, "grad_norm": 0.10030799359083176, "learning_rate": 3.130525961395946e-06, "loss": 0.001, "step": 78080 }, { "epoch": 1.319723177543243, "grad_norm": 0.021510707214474678, "learning_rate": 3.1291581812650627e-06, "loss": 0.0005, "step": 78090 }, { "epoch": 1.3198921778220933, "grad_norm": 0.08789089322090149, "learning_rate": 3.1277905639064825e-06, "loss": 0.0009, "step": 78100 }, { "epoch": 1.320061178100944, "grad_norm": 0.004744492471218109, "learning_rate": 3.126423109439196e-06, "loss": 0.0008, "step": 78110 }, { "epoch": 1.3202301783797943, "grad_norm": 0.08623579144477844, "learning_rate": 3.1250558179821754e-06, "loss": 0.0017, "step": 78120 }, { "epoch": 1.3203991786586449, "grad_norm": 0.00471165357157588, "learning_rate": 3.1236886896543844e-06, "loss": 0.0019, "step": 78130 }, { "epoch": 1.3205681789374952, "grad_norm": 0.04368991404771805, "learning_rate": 3.1223217245747667e-06, "loss": 0.0012, "step": 78140 }, { "epoch": 1.3207371792163456, "grad_norm": 0.018874410539865494, "learning_rate": 3.120954922862257e-06, "loss": 0.0011, "step": 78150 }, { "epoch": 1.3209061794951962, "grad_norm": 0.014562326483428478, "learning_rate": 3.119588284635774e-06, "loss": 0.0011, "step": 78160 }, { "epoch": 1.3210751797740468, "grad_norm": 0.06037477031350136, "learning_rate": 3.11822181001422e-06, "loss": 0.0006, "step": 78170 }, { "epoch": 1.3212441800528971, "grad_norm": 0.037516556680202484, "learning_rate": 3.1168554991164855e-06, "loss": 0.0004, "step": 78180 }, { "epoch": 1.3214131803317475, "grad_norm": 0.10493247210979462, "learning_rate": 3.115489352061448e-06, "loss": 0.001, "step": 78190 }, { "epoch": 1.321582180610598, "grad_norm": 0.0857461616396904, "learning_rate": 3.114123368967967e-06, "loss": 0.0008, "step": 78200 }, { "epoch": 1.3217511808894484, "grad_norm": 0.04702319577336311, "learning_rate": 3.1127575499548913e-06, "loss": 0.0018, "step": 78210 }, { "epoch": 1.321920181168299, "grad_norm": 0.030510196462273598, "learning_rate": 3.1113918951410504e-06, "loss": 0.0004, "step": 78220 }, { "epoch": 1.3220891814471494, "grad_norm": 0.067301906645298, "learning_rate": 3.110026404645267e-06, "loss": 0.0009, "step": 78230 }, { "epoch": 1.3222581817259997, "grad_norm": 0.0009512768010608852, "learning_rate": 3.1086610785863425e-06, "loss": 0.0007, "step": 78240 }, { "epoch": 1.3224271820048503, "grad_norm": 0.024270126596093178, "learning_rate": 3.107295917083068e-06, "loss": 0.0015, "step": 78250 }, { "epoch": 1.322596182283701, "grad_norm": 0.04789629578590393, "learning_rate": 3.105930920254219e-06, "loss": 0.0008, "step": 78260 }, { "epoch": 1.3227651825625513, "grad_norm": 0.0892537534236908, "learning_rate": 3.104566088218558e-06, "loss": 0.0006, "step": 78270 }, { "epoch": 1.3229341828414016, "grad_norm": 0.13257360458374023, "learning_rate": 3.1032014210948293e-06, "loss": 0.0007, "step": 78280 }, { "epoch": 1.3231031831202522, "grad_norm": 0.0010018249740824103, "learning_rate": 3.1018369190017685e-06, "loss": 0.0006, "step": 78290 }, { "epoch": 1.3232721833991026, "grad_norm": 0.03295399248600006, "learning_rate": 3.1004725820580917e-06, "loss": 0.0007, "step": 78300 }, { "epoch": 1.3234411836779532, "grad_norm": 0.010809667408466339, "learning_rate": 3.0991084103825047e-06, "loss": 0.0009, "step": 78310 }, { "epoch": 1.3236101839568035, "grad_norm": 0.00015891263319645077, "learning_rate": 3.0977444040936943e-06, "loss": 0.0021, "step": 78320 }, { "epoch": 1.3237791842356539, "grad_norm": 0.0351998396217823, "learning_rate": 3.0963805633103385e-06, "loss": 0.0008, "step": 78330 }, { "epoch": 1.3239481845145045, "grad_norm": 0.07310052961111069, "learning_rate": 3.0950168881510974e-06, "loss": 0.001, "step": 78340 }, { "epoch": 1.3241171847933548, "grad_norm": 0.004926603753119707, "learning_rate": 3.093653378734616e-06, "loss": 0.001, "step": 78350 }, { "epoch": 1.3242861850722054, "grad_norm": 0.038179848343133926, "learning_rate": 3.092290035179527e-06, "loss": 0.0006, "step": 78360 }, { "epoch": 1.3244551853510558, "grad_norm": 0.02062433399260044, "learning_rate": 3.090926857604447e-06, "loss": 0.0007, "step": 78370 }, { "epoch": 1.3246241856299064, "grad_norm": 0.021784713491797447, "learning_rate": 3.0895638461279833e-06, "loss": 0.0005, "step": 78380 }, { "epoch": 1.3247931859087567, "grad_norm": 0.009822769090533257, "learning_rate": 3.088201000868718e-06, "loss": 0.0007, "step": 78390 }, { "epoch": 1.3249621861876073, "grad_norm": 0.06367561221122742, "learning_rate": 3.0868383219452314e-06, "loss": 0.0007, "step": 78400 }, { "epoch": 1.3251311864664577, "grad_norm": 0.028616730123758316, "learning_rate": 3.0854758094760784e-06, "loss": 0.0005, "step": 78410 }, { "epoch": 1.325300186745308, "grad_norm": 0.030751261860132217, "learning_rate": 3.0841134635798077e-06, "loss": 0.0007, "step": 78420 }, { "epoch": 1.3254691870241586, "grad_norm": 0.13814707100391388, "learning_rate": 3.0827512843749457e-06, "loss": 0.001, "step": 78430 }, { "epoch": 1.325638187303009, "grad_norm": 0.06487669795751572, "learning_rate": 3.0813892719800133e-06, "loss": 0.0015, "step": 78440 }, { "epoch": 1.3258071875818596, "grad_norm": 0.1860516518354416, "learning_rate": 3.0800274265135094e-06, "loss": 0.0009, "step": 78450 }, { "epoch": 1.32597618786071, "grad_norm": 0.027568094432353973, "learning_rate": 3.078665748093922e-06, "loss": 0.0012, "step": 78460 }, { "epoch": 1.3261451881395605, "grad_norm": 0.005442751571536064, "learning_rate": 3.0773042368397233e-06, "loss": 0.0011, "step": 78470 }, { "epoch": 1.3263141884184109, "grad_norm": 0.037079572677612305, "learning_rate": 3.0759428928693724e-06, "loss": 0.0005, "step": 78480 }, { "epoch": 1.3264831886972615, "grad_norm": 0.002279214560985565, "learning_rate": 3.0745817163013116e-06, "loss": 0.0008, "step": 78490 }, { "epoch": 1.3266521889761118, "grad_norm": 0.06447796523571014, "learning_rate": 3.073220707253971e-06, "loss": 0.0009, "step": 78500 }, { "epoch": 1.3268211892549622, "grad_norm": 0.09001284092664719, "learning_rate": 3.0718598658457634e-06, "loss": 0.0011, "step": 78510 }, { "epoch": 1.3269901895338128, "grad_norm": 0.045323025435209274, "learning_rate": 3.0704991921950904e-06, "loss": 0.0012, "step": 78520 }, { "epoch": 1.3271591898126631, "grad_norm": 0.006849227007478476, "learning_rate": 3.069138686420335e-06, "loss": 0.0012, "step": 78530 }, { "epoch": 1.3273281900915137, "grad_norm": 0.10984335094690323, "learning_rate": 3.0677783486398698e-06, "loss": 0.0008, "step": 78540 }, { "epoch": 1.327497190370364, "grad_norm": 0.027667885646224022, "learning_rate": 3.066418178972049e-06, "loss": 0.0011, "step": 78550 }, { "epoch": 1.3276661906492147, "grad_norm": 0.11743637174367905, "learning_rate": 3.065058177535218e-06, "loss": 0.0005, "step": 78560 }, { "epoch": 1.327835190928065, "grad_norm": 0.033889032900333405, "learning_rate": 3.0636983444476975e-06, "loss": 0.0008, "step": 78570 }, { "epoch": 1.3280041912069156, "grad_norm": 0.012779652141034603, "learning_rate": 3.0623386798278054e-06, "loss": 0.0004, "step": 78580 }, { "epoch": 1.328173191485766, "grad_norm": 0.02534503862261772, "learning_rate": 3.060979183793834e-06, "loss": 0.0004, "step": 78590 }, { "epoch": 1.3283421917646163, "grad_norm": 0.10777483880519867, "learning_rate": 3.0596198564640706e-06, "loss": 0.0008, "step": 78600 }, { "epoch": 1.328511192043467, "grad_norm": 0.06269858032464981, "learning_rate": 3.0582606979567784e-06, "loss": 0.0005, "step": 78610 }, { "epoch": 1.3286801923223173, "grad_norm": 0.033827923238277435, "learning_rate": 3.0569017083902143e-06, "loss": 0.0009, "step": 78620 }, { "epoch": 1.3288491926011679, "grad_norm": 0.045441195368766785, "learning_rate": 3.0555428878826164e-06, "loss": 0.001, "step": 78630 }, { "epoch": 1.3290181928800182, "grad_norm": 0.10550963133573532, "learning_rate": 3.054184236552209e-06, "loss": 0.0015, "step": 78640 }, { "epoch": 1.3291871931588686, "grad_norm": 0.04889865219593048, "learning_rate": 3.0528257545172003e-06, "loss": 0.001, "step": 78650 }, { "epoch": 1.3293561934377192, "grad_norm": 0.025506243109703064, "learning_rate": 3.0514674418957842e-06, "loss": 0.0004, "step": 78660 }, { "epoch": 1.3295251937165697, "grad_norm": 0.031631048768758774, "learning_rate": 3.050109298806143e-06, "loss": 0.0008, "step": 78670 }, { "epoch": 1.32969419399542, "grad_norm": 0.04925537109375, "learning_rate": 3.048751325366439e-06, "loss": 0.0009, "step": 78680 }, { "epoch": 1.3298631942742705, "grad_norm": 0.035217687487602234, "learning_rate": 3.0473935216948257e-06, "loss": 0.0019, "step": 78690 }, { "epoch": 1.330032194553121, "grad_norm": 0.021285902708768845, "learning_rate": 3.0460358879094343e-06, "loss": 0.0019, "step": 78700 }, { "epoch": 1.3302011948319714, "grad_norm": 0.024787139147520065, "learning_rate": 3.0446784241283898e-06, "loss": 0.0009, "step": 78710 }, { "epoch": 1.330370195110822, "grad_norm": 0.015254669822752476, "learning_rate": 3.0433211304697953e-06, "loss": 0.0009, "step": 78720 }, { "epoch": 1.3305391953896724, "grad_norm": 0.043653395026922226, "learning_rate": 3.041964007051742e-06, "loss": 0.0012, "step": 78730 }, { "epoch": 1.3307081956685227, "grad_norm": 0.08998756110668182, "learning_rate": 3.040607053992307e-06, "loss": 0.0007, "step": 78740 }, { "epoch": 1.3308771959473733, "grad_norm": 0.09312793612480164, "learning_rate": 3.039250271409554e-06, "loss": 0.0008, "step": 78750 }, { "epoch": 1.3310461962262239, "grad_norm": 0.1240491271018982, "learning_rate": 3.037893659421525e-06, "loss": 0.0008, "step": 78760 }, { "epoch": 1.3312151965050742, "grad_norm": 0.04321032017469406, "learning_rate": 3.036537218146256e-06, "loss": 0.0005, "step": 78770 }, { "epoch": 1.3313841967839246, "grad_norm": 0.043809086084365845, "learning_rate": 3.0351809477017602e-06, "loss": 0.0009, "step": 78780 }, { "epoch": 1.3315531970627752, "grad_norm": 0.03958607465028763, "learning_rate": 3.033824848206044e-06, "loss": 0.001, "step": 78790 }, { "epoch": 1.3317221973416256, "grad_norm": 0.0330476239323616, "learning_rate": 3.0324689197770907e-06, "loss": 0.0008, "step": 78800 }, { "epoch": 1.3318911976204761, "grad_norm": 0.16270068287849426, "learning_rate": 3.031113162532875e-06, "loss": 0.0021, "step": 78810 }, { "epoch": 1.3320601978993265, "grad_norm": 0.033717963844537735, "learning_rate": 3.0297575765913536e-06, "loss": 0.0007, "step": 78820 }, { "epoch": 1.3322291981781769, "grad_norm": 0.013505863957107067, "learning_rate": 3.0284021620704686e-06, "loss": 0.0008, "step": 78830 }, { "epoch": 1.3323981984570274, "grad_norm": 0.018628928810358047, "learning_rate": 3.027046919088148e-06, "loss": 0.0009, "step": 78840 }, { "epoch": 1.332567198735878, "grad_norm": 0.2068597823381424, "learning_rate": 3.025691847762306e-06, "loss": 0.001, "step": 78850 }, { "epoch": 1.3327361990147284, "grad_norm": 0.028667191043496132, "learning_rate": 3.024336948210837e-06, "loss": 0.0003, "step": 78860 }, { "epoch": 1.3329051992935788, "grad_norm": 0.08055096119642258, "learning_rate": 3.0229822205516255e-06, "loss": 0.0006, "step": 78870 }, { "epoch": 1.3330741995724293, "grad_norm": 0.03845233470201492, "learning_rate": 3.021627664902543e-06, "loss": 0.0004, "step": 78880 }, { "epoch": 1.3332431998512797, "grad_norm": 0.06304703652858734, "learning_rate": 3.020273281381436e-06, "loss": 0.0008, "step": 78890 }, { "epoch": 1.3334122001301303, "grad_norm": 0.09348312020301819, "learning_rate": 3.0189190701061476e-06, "loss": 0.0007, "step": 78900 }, { "epoch": 1.3335812004089806, "grad_norm": 0.00907797459512949, "learning_rate": 3.017565031194497e-06, "loss": 0.0017, "step": 78910 }, { "epoch": 1.333750200687831, "grad_norm": 0.0017043626867234707, "learning_rate": 3.0162111647642946e-06, "loss": 0.0008, "step": 78920 }, { "epoch": 1.3339192009666816, "grad_norm": 0.11411120742559433, "learning_rate": 3.0148574709333323e-06, "loss": 0.001, "step": 78930 }, { "epoch": 1.3340882012455322, "grad_norm": 0.05680382624268532, "learning_rate": 3.0135039498193886e-06, "loss": 0.0008, "step": 78940 }, { "epoch": 1.3342572015243825, "grad_norm": 0.02499389462172985, "learning_rate": 3.0121506015402253e-06, "loss": 0.0004, "step": 78950 }, { "epoch": 1.334426201803233, "grad_norm": 0.023089343681931496, "learning_rate": 3.0107974262135923e-06, "loss": 0.001, "step": 78960 }, { "epoch": 1.3345952020820835, "grad_norm": 0.03036220371723175, "learning_rate": 3.0094444239572195e-06, "loss": 0.0005, "step": 78970 }, { "epoch": 1.3347642023609338, "grad_norm": 0.032540082931518555, "learning_rate": 3.0080915948888288e-06, "loss": 0.0011, "step": 78980 }, { "epoch": 1.3349332026397844, "grad_norm": 0.0686960443854332, "learning_rate": 3.006738939126118e-06, "loss": 0.0006, "step": 78990 }, { "epoch": 1.3351022029186348, "grad_norm": 0.015140851028263569, "learning_rate": 3.0053864567867785e-06, "loss": 0.0008, "step": 79000 }, { "epoch": 1.3352712031974852, "grad_norm": 0.031037839129567146, "learning_rate": 3.0040341479884805e-06, "loss": 0.0015, "step": 79010 }, { "epoch": 1.3354402034763357, "grad_norm": 0.029850441962480545, "learning_rate": 3.002682012848882e-06, "loss": 0.0014, "step": 79020 }, { "epoch": 1.3356092037551863, "grad_norm": 0.044762782752513885, "learning_rate": 3.001330051485626e-06, "loss": 0.0013, "step": 79030 }, { "epoch": 1.3357782040340367, "grad_norm": 0.023029036819934845, "learning_rate": 2.99997826401634e-06, "loss": 0.001, "step": 79040 }, { "epoch": 1.335947204312887, "grad_norm": 0.007485507521778345, "learning_rate": 2.9986266505586338e-06, "loss": 0.0005, "step": 79050 }, { "epoch": 1.3361162045917376, "grad_norm": 0.0779392346739769, "learning_rate": 2.9972752112301084e-06, "loss": 0.001, "step": 79060 }, { "epoch": 1.336285204870588, "grad_norm": 0.05729474872350693, "learning_rate": 2.9959239461483403e-06, "loss": 0.0007, "step": 79070 }, { "epoch": 1.3364542051494386, "grad_norm": 0.05444788187742233, "learning_rate": 2.9945728554309013e-06, "loss": 0.0013, "step": 79080 }, { "epoch": 1.336623205428289, "grad_norm": 0.0028525509405881166, "learning_rate": 2.993221939195338e-06, "loss": 0.0012, "step": 79090 }, { "epoch": 1.3367922057071393, "grad_norm": 0.09689547121524811, "learning_rate": 2.991871197559191e-06, "loss": 0.0009, "step": 79100 }, { "epoch": 1.3369612059859899, "grad_norm": 0.09920303523540497, "learning_rate": 2.990520630639977e-06, "loss": 0.0024, "step": 79110 }, { "epoch": 1.3371302062648405, "grad_norm": 0.0005344424280337989, "learning_rate": 2.9891702385552076e-06, "loss": 0.0005, "step": 79120 }, { "epoch": 1.3372992065436908, "grad_norm": 0.1610758751630783, "learning_rate": 2.9878200214223676e-06, "loss": 0.0007, "step": 79130 }, { "epoch": 1.3374682068225412, "grad_norm": 0.14027635753154755, "learning_rate": 2.9864699793589346e-06, "loss": 0.0007, "step": 79140 }, { "epoch": 1.3376372071013918, "grad_norm": 0.0003164792724419385, "learning_rate": 2.9851201124823714e-06, "loss": 0.001, "step": 79150 }, { "epoch": 1.3378062073802421, "grad_norm": 0.031022123992443085, "learning_rate": 2.983770420910119e-06, "loss": 0.0007, "step": 79160 }, { "epoch": 1.3379752076590927, "grad_norm": 0.021356869488954544, "learning_rate": 2.9824209047596107e-06, "loss": 0.0007, "step": 79170 }, { "epoch": 1.338144207937943, "grad_norm": 0.0832192450761795, "learning_rate": 2.981071564148257e-06, "loss": 0.0009, "step": 79180 }, { "epoch": 1.3383132082167934, "grad_norm": 0.03244505450129509, "learning_rate": 2.9797223991934616e-06, "loss": 0.0006, "step": 79190 }, { "epoch": 1.338482208495644, "grad_norm": 0.00012445000174921006, "learning_rate": 2.978373410012604e-06, "loss": 0.0004, "step": 79200 }, { "epoch": 1.3386512087744946, "grad_norm": 0.007832073606550694, "learning_rate": 2.9770245967230548e-06, "loss": 0.0012, "step": 79210 }, { "epoch": 1.338820209053345, "grad_norm": 0.05080963298678398, "learning_rate": 2.9756759594421667e-06, "loss": 0.0009, "step": 79220 }, { "epoch": 1.3389892093321953, "grad_norm": 0.02256394736468792, "learning_rate": 2.9743274982872806e-06, "loss": 0.0007, "step": 79230 }, { "epoch": 1.339158209611046, "grad_norm": 0.008010189980268478, "learning_rate": 2.972979213375714e-06, "loss": 0.0012, "step": 79240 }, { "epoch": 1.3393272098898963, "grad_norm": 0.1913597285747528, "learning_rate": 2.971631104824779e-06, "loss": 0.0006, "step": 79250 }, { "epoch": 1.3394962101687469, "grad_norm": 0.07293778657913208, "learning_rate": 2.9702831727517643e-06, "loss": 0.0007, "step": 79260 }, { "epoch": 1.3396652104475972, "grad_norm": 0.06996724009513855, "learning_rate": 2.968935417273949e-06, "loss": 0.0012, "step": 79270 }, { "epoch": 1.3398342107264476, "grad_norm": 0.011677944101393223, "learning_rate": 2.967587838508592e-06, "loss": 0.0006, "step": 79280 }, { "epoch": 1.3400032110052982, "grad_norm": 0.06278412789106369, "learning_rate": 2.966240436572941e-06, "loss": 0.0012, "step": 79290 }, { "epoch": 1.3401722112841485, "grad_norm": 0.07919969409704208, "learning_rate": 2.9648932115842255e-06, "loss": 0.0016, "step": 79300 }, { "epoch": 1.3403412115629991, "grad_norm": 0.002916355151683092, "learning_rate": 2.9635461636596607e-06, "loss": 0.0003, "step": 79310 }, { "epoch": 1.3405102118418495, "grad_norm": 0.007013415917754173, "learning_rate": 2.962199292916446e-06, "loss": 0.0007, "step": 79320 }, { "epoch": 1.3406792121207, "grad_norm": 0.06998938322067261, "learning_rate": 2.9608525994717686e-06, "loss": 0.001, "step": 79330 }, { "epoch": 1.3408482123995504, "grad_norm": 0.009831363335251808, "learning_rate": 2.9595060834427923e-06, "loss": 0.0036, "step": 79340 }, { "epoch": 1.341017212678401, "grad_norm": 0.029194116592407227, "learning_rate": 2.958159744946675e-06, "loss": 0.0007, "step": 79350 }, { "epoch": 1.3411862129572514, "grad_norm": 0.0019040402257815003, "learning_rate": 2.9568135841005512e-06, "loss": 0.0007, "step": 79360 }, { "epoch": 1.3413552132361017, "grad_norm": 0.045416876673698425, "learning_rate": 2.9554676010215464e-06, "loss": 0.0009, "step": 79370 }, { "epoch": 1.3415242135149523, "grad_norm": 0.07052461057901382, "learning_rate": 2.9541217958267653e-06, "loss": 0.001, "step": 79380 }, { "epoch": 1.3416932137938027, "grad_norm": 0.01906942017376423, "learning_rate": 2.9527761686333e-06, "loss": 0.0009, "step": 79390 }, { "epoch": 1.3418622140726533, "grad_norm": 0.03192806988954544, "learning_rate": 2.951430719558228e-06, "loss": 0.0008, "step": 79400 }, { "epoch": 1.3420312143515036, "grad_norm": 0.03816506266593933, "learning_rate": 2.9500854487186093e-06, "loss": 0.001, "step": 79410 }, { "epoch": 1.3422002146303542, "grad_norm": 0.011286511085927486, "learning_rate": 2.9487403562314887e-06, "loss": 0.0005, "step": 79420 }, { "epoch": 1.3423692149092046, "grad_norm": 0.052074991166591644, "learning_rate": 2.947395442213894e-06, "loss": 0.001, "step": 79430 }, { "epoch": 1.3425382151880552, "grad_norm": 0.04629841819405556, "learning_rate": 2.9460507067828437e-06, "loss": 0.0015, "step": 79440 }, { "epoch": 1.3427072154669055, "grad_norm": 0.013466979376971722, "learning_rate": 2.9447061500553308e-06, "loss": 0.0006, "step": 79450 }, { "epoch": 1.3428762157457559, "grad_norm": 0.03133407235145569, "learning_rate": 2.9433617721483433e-06, "loss": 0.0012, "step": 79460 }, { "epoch": 1.3430452160246065, "grad_norm": 0.027102619409561157, "learning_rate": 2.9420175731788443e-06, "loss": 0.0025, "step": 79470 }, { "epoch": 1.3432142163034568, "grad_norm": 0.01388012245297432, "learning_rate": 2.940673553263789e-06, "loss": 0.0004, "step": 79480 }, { "epoch": 1.3433832165823074, "grad_norm": 0.08248895406723022, "learning_rate": 2.939329712520111e-06, "loss": 0.0015, "step": 79490 }, { "epoch": 1.3435522168611578, "grad_norm": 0.04260677471756935, "learning_rate": 2.9379860510647328e-06, "loss": 0.0021, "step": 79500 }, { "epoch": 1.3437212171400084, "grad_norm": 0.023597152903676033, "learning_rate": 2.9366425690145585e-06, "loss": 0.0013, "step": 79510 }, { "epoch": 1.3438902174188587, "grad_norm": 0.07294405996799469, "learning_rate": 2.9352992664864787e-06, "loss": 0.001, "step": 79520 }, { "epoch": 1.3440592176977093, "grad_norm": 0.07735653221607208, "learning_rate": 2.933956143597365e-06, "loss": 0.0015, "step": 79530 }, { "epoch": 1.3442282179765597, "grad_norm": 0.04344133287668228, "learning_rate": 2.9326132004640793e-06, "loss": 0.0006, "step": 79540 }, { "epoch": 1.34439721825541, "grad_norm": 0.12408298999071121, "learning_rate": 2.93127043720346e-06, "loss": 0.0009, "step": 79550 }, { "epoch": 1.3445662185342606, "grad_norm": 0.06359492987394333, "learning_rate": 2.9299278539323374e-06, "loss": 0.0006, "step": 79560 }, { "epoch": 1.344735218813111, "grad_norm": 0.03914562240242958, "learning_rate": 2.928585450767519e-06, "loss": 0.0007, "step": 79570 }, { "epoch": 1.3449042190919616, "grad_norm": 0.06646328419446945, "learning_rate": 2.9272432278258045e-06, "loss": 0.003, "step": 79580 }, { "epoch": 1.345073219370812, "grad_norm": 0.009112788364291191, "learning_rate": 2.925901185223972e-06, "loss": 0.0005, "step": 79590 }, { "epoch": 1.3452422196496623, "grad_norm": 0.04493261128664017, "learning_rate": 2.924559323078785e-06, "loss": 0.0011, "step": 79600 }, { "epoch": 1.3454112199285129, "grad_norm": 0.033908944576978683, "learning_rate": 2.9232176415069913e-06, "loss": 0.001, "step": 79610 }, { "epoch": 1.3455802202073635, "grad_norm": 0.03137993440032005, "learning_rate": 2.921876140625327e-06, "loss": 0.0007, "step": 79620 }, { "epoch": 1.3457492204862138, "grad_norm": 0.015687420964241028, "learning_rate": 2.9205348205505057e-06, "loss": 0.0005, "step": 79630 }, { "epoch": 1.3459182207650642, "grad_norm": 0.0345822349190712, "learning_rate": 2.9191936813992305e-06, "loss": 0.0007, "step": 79640 }, { "epoch": 1.3460872210439148, "grad_norm": 0.0702509880065918, "learning_rate": 2.9178527232881887e-06, "loss": 0.0008, "step": 79650 }, { "epoch": 1.3462562213227651, "grad_norm": 0.11144670099020004, "learning_rate": 2.916511946334046e-06, "loss": 0.0007, "step": 79660 }, { "epoch": 1.3464252216016157, "grad_norm": 0.5236050486564636, "learning_rate": 2.9151713506534606e-06, "loss": 0.0018, "step": 79670 }, { "epoch": 1.346594221880466, "grad_norm": 0.057299088686704636, "learning_rate": 2.9138309363630666e-06, "loss": 0.0006, "step": 79680 }, { "epoch": 1.3467632221593164, "grad_norm": 0.0474783331155777, "learning_rate": 2.9124907035794916e-06, "loss": 0.0007, "step": 79690 }, { "epoch": 1.346932222438167, "grad_norm": 0.1262008249759674, "learning_rate": 2.911150652419337e-06, "loss": 0.001, "step": 79700 }, { "epoch": 1.3471012227170176, "grad_norm": 0.07362750917673111, "learning_rate": 2.909810782999199e-06, "loss": 0.0008, "step": 79710 }, { "epoch": 1.347270222995868, "grad_norm": 0.00031968977418728173, "learning_rate": 2.9084710954356477e-06, "loss": 0.0009, "step": 79720 }, { "epoch": 1.3474392232747183, "grad_norm": 0.07329060882329941, "learning_rate": 2.9071315898452447e-06, "loss": 0.0007, "step": 79730 }, { "epoch": 1.347608223553569, "grad_norm": 0.024064164608716965, "learning_rate": 2.905792266344536e-06, "loss": 0.0024, "step": 79740 }, { "epoch": 1.3477772238324193, "grad_norm": 0.06430544704198837, "learning_rate": 2.904453125050044e-06, "loss": 0.001, "step": 79750 }, { "epoch": 1.3479462241112699, "grad_norm": 0.0032056604977697134, "learning_rate": 2.9031141660782838e-06, "loss": 0.0006, "step": 79760 }, { "epoch": 1.3481152243901202, "grad_norm": 0.02469942532479763, "learning_rate": 2.9017753895457525e-06, "loss": 0.001, "step": 79770 }, { "epoch": 1.3482842246689706, "grad_norm": 0.09063442051410675, "learning_rate": 2.9004367955689266e-06, "loss": 0.0009, "step": 79780 }, { "epoch": 1.3484532249478212, "grad_norm": 0.027034692466259003, "learning_rate": 2.899098384264274e-06, "loss": 0.0007, "step": 79790 }, { "epoch": 1.3486222252266717, "grad_norm": 0.00023188340128399432, "learning_rate": 2.8977601557482393e-06, "loss": 0.001, "step": 79800 }, { "epoch": 1.348791225505522, "grad_norm": 0.09094345569610596, "learning_rate": 2.896422110137259e-06, "loss": 0.0013, "step": 79810 }, { "epoch": 1.3489602257843725, "grad_norm": 0.010485582984983921, "learning_rate": 2.8950842475477446e-06, "loss": 0.0005, "step": 79820 }, { "epoch": 1.349129226063223, "grad_norm": 0.055243708193302155, "learning_rate": 2.8937465680961013e-06, "loss": 0.0021, "step": 79830 }, { "epoch": 1.3492982263420734, "grad_norm": 0.02478218823671341, "learning_rate": 2.8924090718987096e-06, "loss": 0.0005, "step": 79840 }, { "epoch": 1.349467226620924, "grad_norm": 0.08459905534982681, "learning_rate": 2.891071759071942e-06, "loss": 0.0008, "step": 79850 }, { "epoch": 1.3496362268997744, "grad_norm": 0.043836046010255814, "learning_rate": 2.8897346297321484e-06, "loss": 0.0006, "step": 79860 }, { "epoch": 1.3498052271786247, "grad_norm": 0.10422411561012268, "learning_rate": 2.8883976839956672e-06, "loss": 0.0011, "step": 79870 }, { "epoch": 1.3499742274574753, "grad_norm": 0.15190142393112183, "learning_rate": 2.887060921978817e-06, "loss": 0.0018, "step": 79880 }, { "epoch": 1.350143227736326, "grad_norm": 0.00032463777461089194, "learning_rate": 2.885724343797904e-06, "loss": 0.0007, "step": 79890 }, { "epoch": 1.3503122280151763, "grad_norm": 0.07115165144205093, "learning_rate": 2.8843879495692185e-06, "loss": 0.0006, "step": 79900 }, { "epoch": 1.3504812282940266, "grad_norm": 0.0636988952755928, "learning_rate": 2.883051739409031e-06, "loss": 0.0003, "step": 79910 }, { "epoch": 1.3506502285728772, "grad_norm": 0.1498696357011795, "learning_rate": 2.881715713433599e-06, "loss": 0.0015, "step": 79920 }, { "epoch": 1.3508192288517276, "grad_norm": 0.034050557762384415, "learning_rate": 2.880379871759163e-06, "loss": 0.0014, "step": 79930 }, { "epoch": 1.3509882291305781, "grad_norm": 0.03661184757947922, "learning_rate": 2.879044214501947e-06, "loss": 0.0007, "step": 79940 }, { "epoch": 1.3511572294094285, "grad_norm": 0.08183200657367706, "learning_rate": 2.8777087417781614e-06, "loss": 0.0013, "step": 79950 }, { "epoch": 1.3513262296882789, "grad_norm": 0.0048887101002037525, "learning_rate": 2.876373453704e-06, "loss": 0.0005, "step": 79960 }, { "epoch": 1.3514952299671295, "grad_norm": 0.02189868502318859, "learning_rate": 2.8750383503956347e-06, "loss": 0.0009, "step": 79970 }, { "epoch": 1.35166423024598, "grad_norm": 0.028512021526694298, "learning_rate": 2.873703431969231e-06, "loss": 0.0008, "step": 79980 }, { "epoch": 1.3518332305248304, "grad_norm": 0.02241390012204647, "learning_rate": 2.8723686985409283e-06, "loss": 0.0004, "step": 79990 }, { "epoch": 1.3520022308036808, "grad_norm": 0.12110210210084915, "learning_rate": 2.87103415022686e-06, "loss": 0.0007, "step": 80000 }, { "epoch": 1.3521712310825313, "grad_norm": 0.0038254125975072384, "learning_rate": 2.8696997871431333e-06, "loss": 0.0012, "step": 80010 }, { "epoch": 1.3523402313613817, "grad_norm": 0.055296361446380615, "learning_rate": 2.8683656094058486e-06, "loss": 0.0008, "step": 80020 }, { "epoch": 1.3525092316402323, "grad_norm": 0.05945458635687828, "learning_rate": 2.867031617131083e-06, "loss": 0.0006, "step": 80030 }, { "epoch": 1.3526782319190827, "grad_norm": 0.18918436765670776, "learning_rate": 2.865697810434902e-06, "loss": 0.0014, "step": 80040 }, { "epoch": 1.352847232197933, "grad_norm": 0.0316217839717865, "learning_rate": 2.8643641894333506e-06, "loss": 0.0007, "step": 80050 }, { "epoch": 1.3530162324767836, "grad_norm": 0.078201062977314, "learning_rate": 2.8630307542424644e-06, "loss": 0.0009, "step": 80060 }, { "epoch": 1.3531852327556342, "grad_norm": 0.019307147711515427, "learning_rate": 2.8616975049782548e-06, "loss": 0.001, "step": 80070 }, { "epoch": 1.3533542330344845, "grad_norm": 0.01422625221312046, "learning_rate": 2.860364441756724e-06, "loss": 0.0009, "step": 80080 }, { "epoch": 1.353523233313335, "grad_norm": 0.00013813190162181854, "learning_rate": 2.8590315646938515e-06, "loss": 0.0008, "step": 80090 }, { "epoch": 1.3536922335921855, "grad_norm": 0.08600315451622009, "learning_rate": 2.8576988739056068e-06, "loss": 0.0013, "step": 80100 }, { "epoch": 1.3538612338710359, "grad_norm": 0.010337582789361477, "learning_rate": 2.856366369507941e-06, "loss": 0.0004, "step": 80110 }, { "epoch": 1.3540302341498864, "grad_norm": 0.05166761204600334, "learning_rate": 2.855034051616785e-06, "loss": 0.0014, "step": 80120 }, { "epoch": 1.3541992344287368, "grad_norm": 0.019852489233016968, "learning_rate": 2.85370192034806e-06, "loss": 0.0007, "step": 80130 }, { "epoch": 1.3543682347075872, "grad_norm": 0.003500432940199971, "learning_rate": 2.852369975817668e-06, "loss": 0.0008, "step": 80140 }, { "epoch": 1.3545372349864377, "grad_norm": 0.03737901151180267, "learning_rate": 2.851038218141492e-06, "loss": 0.0013, "step": 80150 }, { "epoch": 1.354706235265288, "grad_norm": 0.004693951457738876, "learning_rate": 2.8497066474354034e-06, "loss": 0.0006, "step": 80160 }, { "epoch": 1.3548752355441387, "grad_norm": 0.0182564128190279, "learning_rate": 2.8483752638152568e-06, "loss": 0.0005, "step": 80170 }, { "epoch": 1.355044235822989, "grad_norm": 0.05262609198689461, "learning_rate": 2.847044067396885e-06, "loss": 0.0007, "step": 80180 }, { "epoch": 1.3552132361018396, "grad_norm": 0.02698170579969883, "learning_rate": 2.8457130582961124e-06, "loss": 0.0008, "step": 80190 }, { "epoch": 1.35538223638069, "grad_norm": 0.08764582872390747, "learning_rate": 2.84438223662874e-06, "loss": 0.0006, "step": 80200 }, { "epoch": 1.3555512366595406, "grad_norm": 0.08219470083713531, "learning_rate": 2.843051602510558e-06, "loss": 0.0009, "step": 80210 }, { "epoch": 1.355720236938391, "grad_norm": 0.42823415994644165, "learning_rate": 2.8417211560573364e-06, "loss": 0.001, "step": 80220 }, { "epoch": 1.3558892372172413, "grad_norm": 0.011557972989976406, "learning_rate": 2.840390897384833e-06, "loss": 0.0011, "step": 80230 }, { "epoch": 1.356058237496092, "grad_norm": 0.06456495821475983, "learning_rate": 2.8390608266087834e-06, "loss": 0.001, "step": 80240 }, { "epoch": 1.3562272377749423, "grad_norm": 0.08364235609769821, "learning_rate": 2.8377309438449137e-06, "loss": 0.0007, "step": 80250 }, { "epoch": 1.3563962380537928, "grad_norm": 0.050985436886548996, "learning_rate": 2.836401249208926e-06, "loss": 0.0007, "step": 80260 }, { "epoch": 1.3565652383326432, "grad_norm": 0.04415946453809738, "learning_rate": 2.8350717428165143e-06, "loss": 0.0012, "step": 80270 }, { "epoch": 1.3567342386114938, "grad_norm": 0.026892591267824173, "learning_rate": 2.833742424783349e-06, "loss": 0.0008, "step": 80280 }, { "epoch": 1.3569032388903441, "grad_norm": 0.03614193946123123, "learning_rate": 2.8324132952250904e-06, "loss": 0.0011, "step": 80290 }, { "epoch": 1.3570722391691947, "grad_norm": 0.05073138326406479, "learning_rate": 2.8310843542573753e-06, "loss": 0.0011, "step": 80300 }, { "epoch": 1.357241239448045, "grad_norm": 0.10851094871759415, "learning_rate": 2.8297556019958293e-06, "loss": 0.0007, "step": 80310 }, { "epoch": 1.3574102397268955, "grad_norm": 0.024640558287501335, "learning_rate": 2.828427038556062e-06, "loss": 0.001, "step": 80320 }, { "epoch": 1.357579240005746, "grad_norm": 0.01880939118564129, "learning_rate": 2.8270986640536644e-06, "loss": 0.0002, "step": 80330 }, { "epoch": 1.3577482402845964, "grad_norm": 0.03751187399029732, "learning_rate": 2.8257704786042106e-06, "loss": 0.0006, "step": 80340 }, { "epoch": 1.357917240563447, "grad_norm": 0.046540506184101105, "learning_rate": 2.824442482323261e-06, "loss": 0.0008, "step": 80350 }, { "epoch": 1.3580862408422973, "grad_norm": 0.0005142322042956948, "learning_rate": 2.823114675326354e-06, "loss": 0.0009, "step": 80360 }, { "epoch": 1.358255241121148, "grad_norm": 0.030346006155014038, "learning_rate": 2.8217870577290194e-06, "loss": 0.0012, "step": 80370 }, { "epoch": 1.3584242413999983, "grad_norm": 0.0235869362950325, "learning_rate": 2.820459629646763e-06, "loss": 0.0006, "step": 80380 }, { "epoch": 1.3585932416788489, "grad_norm": 0.0027132586110383272, "learning_rate": 2.8191323911950807e-06, "loss": 0.0008, "step": 80390 }, { "epoch": 1.3587622419576992, "grad_norm": 0.007963139563798904, "learning_rate": 2.817805342489445e-06, "loss": 0.0005, "step": 80400 }, { "epoch": 1.3589312422365496, "grad_norm": 0.0007685494492761791, "learning_rate": 2.8164784836453176e-06, "loss": 0.0058, "step": 80410 }, { "epoch": 1.3591002425154002, "grad_norm": 0.06528615951538086, "learning_rate": 2.815151814778143e-06, "loss": 0.0008, "step": 80420 }, { "epoch": 1.3592692427942505, "grad_norm": 0.050124719738960266, "learning_rate": 2.8138253360033446e-06, "loss": 0.001, "step": 80430 }, { "epoch": 1.3594382430731011, "grad_norm": 0.00018325158453080803, "learning_rate": 2.812499047436336e-06, "loss": 0.0005, "step": 80440 }, { "epoch": 1.3596072433519515, "grad_norm": 0.010511750355362892, "learning_rate": 2.8111729491925076e-06, "loss": 0.0006, "step": 80450 }, { "epoch": 1.359776243630802, "grad_norm": 0.06392191350460052, "learning_rate": 2.8098470413872394e-06, "loss": 0.0006, "step": 80460 }, { "epoch": 1.3599452439096524, "grad_norm": 0.006926842965185642, "learning_rate": 2.8085213241358875e-06, "loss": 0.0014, "step": 80470 }, { "epoch": 1.360114244188503, "grad_norm": 0.0003030473308172077, "learning_rate": 2.807195797553801e-06, "loss": 0.0004, "step": 80480 }, { "epoch": 1.3602832444673534, "grad_norm": 0.033828362822532654, "learning_rate": 2.8058704617563026e-06, "loss": 0.0014, "step": 80490 }, { "epoch": 1.3604522447462037, "grad_norm": 0.020338037982583046, "learning_rate": 2.8045453168587043e-06, "loss": 0.0007, "step": 80500 }, { "epoch": 1.3606212450250543, "grad_norm": 0.025744963437318802, "learning_rate": 2.8032203629763034e-06, "loss": 0.001, "step": 80510 }, { "epoch": 1.3607902453039047, "grad_norm": 0.03640247881412506, "learning_rate": 2.8018956002243726e-06, "loss": 0.0008, "step": 80520 }, { "epoch": 1.3609592455827553, "grad_norm": 0.04088277369737625, "learning_rate": 2.800571028718174e-06, "loss": 0.0007, "step": 80530 }, { "epoch": 1.3611282458616056, "grad_norm": 0.07791779190301895, "learning_rate": 2.7992466485729543e-06, "loss": 0.0011, "step": 80540 }, { "epoch": 1.361297246140456, "grad_norm": 0.06431923806667328, "learning_rate": 2.7979224599039377e-06, "loss": 0.0011, "step": 80550 }, { "epoch": 1.3614662464193066, "grad_norm": 0.026751643046736717, "learning_rate": 2.7965984628263376e-06, "loss": 0.0005, "step": 80560 }, { "epoch": 1.3616352466981572, "grad_norm": 0.07414183765649796, "learning_rate": 2.795274657455346e-06, "loss": 0.0009, "step": 80570 }, { "epoch": 1.3618042469770075, "grad_norm": 0.033633653074502945, "learning_rate": 2.7939510439061425e-06, "loss": 0.0014, "step": 80580 }, { "epoch": 1.3619732472558579, "grad_norm": 0.005656505934894085, "learning_rate": 2.7926276222938855e-06, "loss": 0.0016, "step": 80590 }, { "epoch": 1.3621422475347085, "grad_norm": 0.0321982204914093, "learning_rate": 2.7913043927337212e-06, "loss": 0.0005, "step": 80600 }, { "epoch": 1.3623112478135588, "grad_norm": 0.06537581980228424, "learning_rate": 2.789981355340775e-06, "loss": 0.0012, "step": 80610 }, { "epoch": 1.3624802480924094, "grad_norm": 0.19757148623466492, "learning_rate": 2.78865851023016e-06, "loss": 0.0009, "step": 80620 }, { "epoch": 1.3626492483712598, "grad_norm": 0.03330465033650398, "learning_rate": 2.7873358575169674e-06, "loss": 0.0015, "step": 80630 }, { "epoch": 1.3628182486501101, "grad_norm": 0.010190580040216446, "learning_rate": 2.7860133973162773e-06, "loss": 0.0008, "step": 80640 }, { "epoch": 1.3629872489289607, "grad_norm": 0.004234407562762499, "learning_rate": 2.7846911297431474e-06, "loss": 0.001, "step": 80650 }, { "epoch": 1.3631562492078113, "grad_norm": 0.03019251488149166, "learning_rate": 2.7833690549126226e-06, "loss": 0.0016, "step": 80660 }, { "epoch": 1.3633252494866617, "grad_norm": 0.0046164970844984055, "learning_rate": 2.782047172939731e-06, "loss": 0.0005, "step": 80670 }, { "epoch": 1.363494249765512, "grad_norm": 0.017667559906840324, "learning_rate": 2.7807254839394804e-06, "loss": 0.0008, "step": 80680 }, { "epoch": 1.3636632500443626, "grad_norm": 0.048973340541124344, "learning_rate": 2.779403988026864e-06, "loss": 0.0007, "step": 80690 }, { "epoch": 1.363832250323213, "grad_norm": 0.02754290960729122, "learning_rate": 2.778082685316863e-06, "loss": 0.0009, "step": 80700 }, { "epoch": 1.3640012506020636, "grad_norm": 0.09129004180431366, "learning_rate": 2.7767615759244313e-06, "loss": 0.0015, "step": 80710 }, { "epoch": 1.364170250880914, "grad_norm": 0.036322593688964844, "learning_rate": 2.7754406599645147e-06, "loss": 0.0005, "step": 80720 }, { "epoch": 1.3643392511597643, "grad_norm": 0.07712910324335098, "learning_rate": 2.774119937552041e-06, "loss": 0.0009, "step": 80730 }, { "epoch": 1.3645082514386149, "grad_norm": 0.06588491797447205, "learning_rate": 2.772799408801915e-06, "loss": 0.0013, "step": 80740 }, { "epoch": 1.3646772517174655, "grad_norm": 0.035696469247341156, "learning_rate": 2.7714790738290333e-06, "loss": 0.0007, "step": 80750 }, { "epoch": 1.3648462519963158, "grad_norm": 0.06322583556175232, "learning_rate": 2.770158932748268e-06, "loss": 0.0005, "step": 80760 }, { "epoch": 1.3650152522751662, "grad_norm": 0.01938874088227749, "learning_rate": 2.7688389856744813e-06, "loss": 0.001, "step": 80770 }, { "epoch": 1.3651842525540168, "grad_norm": 0.027592506259679794, "learning_rate": 2.7675192327225107e-06, "loss": 0.0007, "step": 80780 }, { "epoch": 1.3653532528328671, "grad_norm": 0.06308586150407791, "learning_rate": 2.766199674007186e-06, "loss": 0.0019, "step": 80790 }, { "epoch": 1.3655222531117177, "grad_norm": 0.20949746668338776, "learning_rate": 2.764880309643311e-06, "loss": 0.001, "step": 80800 }, { "epoch": 1.365691253390568, "grad_norm": 0.02370143122971058, "learning_rate": 2.76356113974568e-06, "loss": 0.0005, "step": 80810 }, { "epoch": 1.3658602536694184, "grad_norm": 0.1368524432182312, "learning_rate": 2.7622421644290633e-06, "loss": 0.0011, "step": 80820 }, { "epoch": 1.366029253948269, "grad_norm": 0.06743310391902924, "learning_rate": 2.7609233838082222e-06, "loss": 0.0023, "step": 80830 }, { "epoch": 1.3661982542271196, "grad_norm": 0.027281638234853745, "learning_rate": 2.7596047979978935e-06, "loss": 0.001, "step": 80840 }, { "epoch": 1.36636725450597, "grad_norm": 0.10621394962072372, "learning_rate": 2.758286407112805e-06, "loss": 0.0012, "step": 80850 }, { "epoch": 1.3665362547848203, "grad_norm": 0.01406975369900465, "learning_rate": 2.756968211267658e-06, "loss": 0.0006, "step": 80860 }, { "epoch": 1.366705255063671, "grad_norm": 0.002855682745575905, "learning_rate": 2.7556502105771443e-06, "loss": 0.0008, "step": 80870 }, { "epoch": 1.3668742553425213, "grad_norm": 0.017733998596668243, "learning_rate": 2.754332405155939e-06, "loss": 0.0018, "step": 80880 }, { "epoch": 1.3670432556213719, "grad_norm": 0.037968385964632034, "learning_rate": 2.7530147951186923e-06, "loss": 0.0008, "step": 80890 }, { "epoch": 1.3672122559002222, "grad_norm": 0.09487903863191605, "learning_rate": 2.7516973805800457e-06, "loss": 0.0007, "step": 80900 }, { "epoch": 1.3673812561790726, "grad_norm": 0.0455913245677948, "learning_rate": 2.75038016165462e-06, "loss": 0.0008, "step": 80910 }, { "epoch": 1.3675502564579232, "grad_norm": 0.01593964174389839, "learning_rate": 2.749063138457022e-06, "loss": 0.0007, "step": 80920 }, { "epoch": 1.3677192567367737, "grad_norm": 0.02475392445921898, "learning_rate": 2.747746311101835e-06, "loss": 0.0012, "step": 80930 }, { "epoch": 1.367888257015624, "grad_norm": 0.00532240467146039, "learning_rate": 2.746429679703633e-06, "loss": 0.0005, "step": 80940 }, { "epoch": 1.3680572572944745, "grad_norm": 0.0069517092779278755, "learning_rate": 2.745113244376966e-06, "loss": 0.0008, "step": 80950 }, { "epoch": 1.368226257573325, "grad_norm": 0.04656779021024704, "learning_rate": 2.743797005236374e-06, "loss": 0.0044, "step": 80960 }, { "epoch": 1.3683952578521754, "grad_norm": 0.005446649622172117, "learning_rate": 2.7424809623963722e-06, "loss": 0.0013, "step": 80970 }, { "epoch": 1.368564258131026, "grad_norm": 0.1676197052001953, "learning_rate": 2.741165115971466e-06, "loss": 0.0011, "step": 80980 }, { "epoch": 1.3687332584098764, "grad_norm": 0.11519121378660202, "learning_rate": 2.7398494660761378e-06, "loss": 0.0011, "step": 80990 }, { "epoch": 1.3689022586887267, "grad_norm": 0.01789015345275402, "learning_rate": 2.7385340128248583e-06, "loss": 0.0005, "step": 81000 }, { "epoch": 1.3690712589675773, "grad_norm": 0.07445920258760452, "learning_rate": 2.737218756332075e-06, "loss": 0.0009, "step": 81010 }, { "epoch": 1.369240259246428, "grad_norm": 0.016931943595409393, "learning_rate": 2.735903696712225e-06, "loss": 0.0011, "step": 81020 }, { "epoch": 1.3694092595252783, "grad_norm": 0.004109029192477465, "learning_rate": 2.734588834079721e-06, "loss": 0.0015, "step": 81030 }, { "epoch": 1.3695782598041286, "grad_norm": 0.044804804027080536, "learning_rate": 2.733274168548967e-06, "loss": 0.001, "step": 81040 }, { "epoch": 1.3697472600829792, "grad_norm": 0.008167951367795467, "learning_rate": 2.731959700234341e-06, "loss": 0.0015, "step": 81050 }, { "epoch": 1.3699162603618296, "grad_norm": 0.21587994694709778, "learning_rate": 2.7306454292502115e-06, "loss": 0.0029, "step": 81060 }, { "epoch": 1.3700852606406801, "grad_norm": 0.04317808523774147, "learning_rate": 2.7293313557109234e-06, "loss": 0.0008, "step": 81070 }, { "epoch": 1.3702542609195305, "grad_norm": 0.00516023114323616, "learning_rate": 2.728017479730809e-06, "loss": 0.0007, "step": 81080 }, { "epoch": 1.3704232611983809, "grad_norm": 0.013336258940398693, "learning_rate": 2.726703801424182e-06, "loss": 0.0005, "step": 81090 }, { "epoch": 1.3705922614772315, "grad_norm": 0.03444129228591919, "learning_rate": 2.72539032090534e-06, "loss": 0.0007, "step": 81100 }, { "epoch": 1.3707612617560818, "grad_norm": 0.014178609475493431, "learning_rate": 2.7240770382885594e-06, "loss": 0.0008, "step": 81110 }, { "epoch": 1.3709302620349324, "grad_norm": 0.18988923728466034, "learning_rate": 2.722763953688105e-06, "loss": 0.0009, "step": 81120 }, { "epoch": 1.3710992623137828, "grad_norm": 0.1102895513176918, "learning_rate": 2.7214510672182183e-06, "loss": 0.0006, "step": 81130 }, { "epoch": 1.3712682625926333, "grad_norm": 0.016499245539307594, "learning_rate": 2.7201383789931314e-06, "loss": 0.0007, "step": 81140 }, { "epoch": 1.3714372628714837, "grad_norm": 0.02236087992787361, "learning_rate": 2.7188258891270485e-06, "loss": 0.0011, "step": 81150 }, { "epoch": 1.3716062631503343, "grad_norm": 0.06486410647630692, "learning_rate": 2.7175135977341683e-06, "loss": 0.0012, "step": 81160 }, { "epoch": 1.3717752634291847, "grad_norm": 0.04193085432052612, "learning_rate": 2.716201504928662e-06, "loss": 0.001, "step": 81170 }, { "epoch": 1.371944263708035, "grad_norm": 0.11259046196937561, "learning_rate": 2.71488961082469e-06, "loss": 0.001, "step": 81180 }, { "epoch": 1.3721132639868856, "grad_norm": 0.11563017964363098, "learning_rate": 2.7135779155363957e-06, "loss": 0.0017, "step": 81190 }, { "epoch": 1.372282264265736, "grad_norm": 0.1372109204530716, "learning_rate": 2.712266419177898e-06, "loss": 0.0011, "step": 81200 }, { "epoch": 1.3724512645445865, "grad_norm": 0.0071485610678792, "learning_rate": 2.710955121863309e-06, "loss": 0.0007, "step": 81210 }, { "epoch": 1.372620264823437, "grad_norm": 0.017509106546640396, "learning_rate": 2.709644023706713e-06, "loss": 0.0006, "step": 81220 }, { "epoch": 1.3727892651022875, "grad_norm": 0.08765660971403122, "learning_rate": 2.7083331248221855e-06, "loss": 0.0011, "step": 81230 }, { "epoch": 1.3729582653811379, "grad_norm": 0.04982509836554527, "learning_rate": 2.7070224253237775e-06, "loss": 0.0009, "step": 81240 }, { "epoch": 1.3731272656599884, "grad_norm": 0.07236922532320023, "learning_rate": 2.7057119253255306e-06, "loss": 0.001, "step": 81250 }, { "epoch": 1.3732962659388388, "grad_norm": 0.09635771811008453, "learning_rate": 2.70440162494146e-06, "loss": 0.0009, "step": 81260 }, { "epoch": 1.3734652662176892, "grad_norm": 0.10149110853672028, "learning_rate": 2.7030915242855706e-06, "loss": 0.0005, "step": 81270 }, { "epoch": 1.3736342664965397, "grad_norm": 0.019749846309423447, "learning_rate": 2.7017816234718474e-06, "loss": 0.0005, "step": 81280 }, { "epoch": 1.37380326677539, "grad_norm": 0.061225682497024536, "learning_rate": 2.70047192261426e-06, "loss": 0.0008, "step": 81290 }, { "epoch": 1.3739722670542407, "grad_norm": 0.17834830284118652, "learning_rate": 2.6991624218267553e-06, "loss": 0.0013, "step": 81300 }, { "epoch": 1.374141267333091, "grad_norm": 0.02971114031970501, "learning_rate": 2.697853121223269e-06, "loss": 0.0018, "step": 81310 }, { "epoch": 1.3743102676119416, "grad_norm": 0.0017296327278017998, "learning_rate": 2.6965440209177136e-06, "loss": 0.0008, "step": 81320 }, { "epoch": 1.374479267890792, "grad_norm": 0.02275587059557438, "learning_rate": 2.695235121023991e-06, "loss": 0.0009, "step": 81330 }, { "epoch": 1.3746482681696426, "grad_norm": 0.035394806414842606, "learning_rate": 2.6939264216559772e-06, "loss": 0.0009, "step": 81340 }, { "epoch": 1.374817268448493, "grad_norm": 0.02650953270494938, "learning_rate": 2.6926179229275407e-06, "loss": 0.0008, "step": 81350 }, { "epoch": 1.3749862687273433, "grad_norm": 0.1949848234653473, "learning_rate": 2.6913096249525217e-06, "loss": 0.0008, "step": 81360 }, { "epoch": 1.375155269006194, "grad_norm": 0.035317301750183105, "learning_rate": 2.6900015278447534e-06, "loss": 0.0007, "step": 81370 }, { "epoch": 1.3753242692850443, "grad_norm": 0.15639999508857727, "learning_rate": 2.688693631718042e-06, "loss": 0.0011, "step": 81380 }, { "epoch": 1.3754932695638948, "grad_norm": 0.023006858304142952, "learning_rate": 2.6873859366861866e-06, "loss": 0.0009, "step": 81390 }, { "epoch": 1.3756622698427452, "grad_norm": 0.18025051057338715, "learning_rate": 2.6860784428629563e-06, "loss": 0.0011, "step": 81400 }, { "epoch": 1.3758312701215956, "grad_norm": 0.001331401988863945, "learning_rate": 2.684771150362115e-06, "loss": 0.0009, "step": 81410 }, { "epoch": 1.3760002704004461, "grad_norm": 0.003098771208897233, "learning_rate": 2.683464059297399e-06, "loss": 0.001, "step": 81420 }, { "epoch": 1.3761692706792967, "grad_norm": 0.027324680238962173, "learning_rate": 2.6821571697825342e-06, "loss": 0.0007, "step": 81430 }, { "epoch": 1.376338270958147, "grad_norm": 0.009578811936080456, "learning_rate": 2.6808504819312275e-06, "loss": 0.0005, "step": 81440 }, { "epoch": 1.3765072712369975, "grad_norm": 0.016443606466054916, "learning_rate": 2.6795439958571634e-06, "loss": 0.0009, "step": 81450 }, { "epoch": 1.376676271515848, "grad_norm": 0.011706710793077946, "learning_rate": 2.6782377116740143e-06, "loss": 0.0006, "step": 81460 }, { "epoch": 1.3768452717946984, "grad_norm": 0.14949294924736023, "learning_rate": 2.6769316294954364e-06, "loss": 0.0021, "step": 81470 }, { "epoch": 1.377014272073549, "grad_norm": 0.04487974941730499, "learning_rate": 2.675625749435059e-06, "loss": 0.0007, "step": 81480 }, { "epoch": 1.3771832723523993, "grad_norm": 0.02011519856750965, "learning_rate": 2.6743200716065044e-06, "loss": 0.0002, "step": 81490 }, { "epoch": 1.3773522726312497, "grad_norm": 0.019957944750785828, "learning_rate": 2.673014596123373e-06, "loss": 0.0007, "step": 81500 }, { "epoch": 1.3775212729101003, "grad_norm": 0.00412454130128026, "learning_rate": 2.671709323099244e-06, "loss": 0.0006, "step": 81510 }, { "epoch": 1.3776902731889509, "grad_norm": 0.005910065956413746, "learning_rate": 2.670404252647687e-06, "loss": 0.002, "step": 81520 }, { "epoch": 1.3778592734678012, "grad_norm": 0.0417097806930542, "learning_rate": 2.6690993848822457e-06, "loss": 0.0007, "step": 81530 }, { "epoch": 1.3780282737466516, "grad_norm": 0.07791711390018463, "learning_rate": 2.6677947199164533e-06, "loss": 0.001, "step": 81540 }, { "epoch": 1.3781972740255022, "grad_norm": 0.058783017098903656, "learning_rate": 2.6664902578638173e-06, "loss": 0.0013, "step": 81550 }, { "epoch": 1.3783662743043525, "grad_norm": 0.10798044502735138, "learning_rate": 2.6651859988378377e-06, "loss": 0.0006, "step": 81560 }, { "epoch": 1.3785352745832031, "grad_norm": 0.01462811604142189, "learning_rate": 2.663881942951986e-06, "loss": 0.0007, "step": 81570 }, { "epoch": 1.3787042748620535, "grad_norm": 0.10999801009893417, "learning_rate": 2.6625780903197266e-06, "loss": 0.0013, "step": 81580 }, { "epoch": 1.3788732751409039, "grad_norm": 0.09494367986917496, "learning_rate": 2.6612744410544966e-06, "loss": 0.0013, "step": 81590 }, { "epoch": 1.3790422754197544, "grad_norm": 0.021898848935961723, "learning_rate": 2.6599709952697227e-06, "loss": 0.0005, "step": 81600 }, { "epoch": 1.379211275698605, "grad_norm": 0.01613488420844078, "learning_rate": 2.6586677530788087e-06, "loss": 0.0008, "step": 81610 }, { "epoch": 1.3793802759774554, "grad_norm": 0.023948589339852333, "learning_rate": 2.657364714595146e-06, "loss": 0.0009, "step": 81620 }, { "epoch": 1.3795492762563057, "grad_norm": 0.04615328833460808, "learning_rate": 2.6560618799321015e-06, "loss": 0.001, "step": 81630 }, { "epoch": 1.3797182765351563, "grad_norm": 0.046796441078186035, "learning_rate": 2.65475924920303e-06, "loss": 0.0011, "step": 81640 }, { "epoch": 1.3798872768140067, "grad_norm": 0.10986502468585968, "learning_rate": 2.6534568225212687e-06, "loss": 0.0014, "step": 81650 }, { "epoch": 1.3800562770928573, "grad_norm": 0.039961110800504684, "learning_rate": 2.6521546000001307e-06, "loss": 0.0005, "step": 81660 }, { "epoch": 1.3802252773717076, "grad_norm": 0.0009647384868003428, "learning_rate": 2.650852581752919e-06, "loss": 0.0009, "step": 81670 }, { "epoch": 1.380394277650558, "grad_norm": 0.021076450124382973, "learning_rate": 2.6495507678929144e-06, "loss": 0.0007, "step": 81680 }, { "epoch": 1.3805632779294086, "grad_norm": 0.01967843621969223, "learning_rate": 2.6482491585333823e-06, "loss": 0.0009, "step": 81690 }, { "epoch": 1.3807322782082592, "grad_norm": 0.02331704832613468, "learning_rate": 2.6469477537875665e-06, "loss": 0.0005, "step": 81700 }, { "epoch": 1.3809012784871095, "grad_norm": 0.06854772567749023, "learning_rate": 2.6456465537686993e-06, "loss": 0.0005, "step": 81710 }, { "epoch": 1.38107027876596, "grad_norm": 0.042576905339956284, "learning_rate": 2.644345558589987e-06, "loss": 0.0021, "step": 81720 }, { "epoch": 1.3812392790448105, "grad_norm": 0.0038385672960430384, "learning_rate": 2.6430447683646254e-06, "loss": 0.0007, "step": 81730 }, { "epoch": 1.3814082793236608, "grad_norm": 0.017321884632110596, "learning_rate": 2.641744183205788e-06, "loss": 0.0005, "step": 81740 }, { "epoch": 1.3815772796025114, "grad_norm": 0.06699923425912857, "learning_rate": 2.6404438032266338e-06, "loss": 0.003, "step": 81750 }, { "epoch": 1.3817462798813618, "grad_norm": 0.03266004100441933, "learning_rate": 2.639143628540299e-06, "loss": 0.0017, "step": 81760 }, { "epoch": 1.3819152801602121, "grad_norm": 0.028615793213248253, "learning_rate": 2.63784365925991e-06, "loss": 0.001, "step": 81770 }, { "epoch": 1.3820842804390627, "grad_norm": 0.004312659613788128, "learning_rate": 2.6365438954985646e-06, "loss": 0.0005, "step": 81780 }, { "epoch": 1.3822532807179133, "grad_norm": 0.05887260288000107, "learning_rate": 2.6352443373693538e-06, "loss": 0.0013, "step": 81790 }, { "epoch": 1.3824222809967637, "grad_norm": 0.02822296693921089, "learning_rate": 2.6339449849853416e-06, "loss": 0.001, "step": 81800 }, { "epoch": 1.382591281275614, "grad_norm": 0.011759044602513313, "learning_rate": 2.632645838459581e-06, "loss": 0.001, "step": 81810 }, { "epoch": 1.3827602815544646, "grad_norm": 0.05984441190958023, "learning_rate": 2.6313468979051003e-06, "loss": 0.0009, "step": 81820 }, { "epoch": 1.382929281833315, "grad_norm": 0.019043082371354103, "learning_rate": 2.630048163434917e-06, "loss": 0.0008, "step": 81830 }, { "epoch": 1.3830982821121656, "grad_norm": 0.04396101459860802, "learning_rate": 2.6287496351620273e-06, "loss": 0.0011, "step": 81840 }, { "epoch": 1.383267282391016, "grad_norm": 0.1366347372531891, "learning_rate": 2.627451313199406e-06, "loss": 0.0006, "step": 81850 }, { "epoch": 1.3834362826698663, "grad_norm": 0.08047544211149216, "learning_rate": 2.6261531976600164e-06, "loss": 0.0009, "step": 81860 }, { "epoch": 1.3836052829487169, "grad_norm": 0.053900931030511856, "learning_rate": 2.6248552886568025e-06, "loss": 0.0012, "step": 81870 }, { "epoch": 1.3837742832275675, "grad_norm": 0.032839562743902206, "learning_rate": 2.6235575863026837e-06, "loss": 0.0007, "step": 81880 }, { "epoch": 1.3839432835064178, "grad_norm": 0.10769818723201752, "learning_rate": 2.622260090710571e-06, "loss": 0.0012, "step": 81890 }, { "epoch": 1.3841122837852682, "grad_norm": 0.00828024186193943, "learning_rate": 2.6209628019933486e-06, "loss": 0.0006, "step": 81900 }, { "epoch": 1.3842812840641188, "grad_norm": 0.02269088849425316, "learning_rate": 2.6196657202638913e-06, "loss": 0.0009, "step": 81910 }, { "epoch": 1.3844502843429691, "grad_norm": 0.05940607562661171, "learning_rate": 2.6183688456350474e-06, "loss": 0.0019, "step": 81920 }, { "epoch": 1.3846192846218197, "grad_norm": 0.10371402651071548, "learning_rate": 2.6170721782196534e-06, "loss": 0.0009, "step": 81930 }, { "epoch": 1.38478828490067, "grad_norm": 0.06377759575843811, "learning_rate": 2.6157757181305276e-06, "loss": 0.0006, "step": 81940 }, { "epoch": 1.3849572851795204, "grad_norm": 0.0681513175368309, "learning_rate": 2.614479465480464e-06, "loss": 0.0007, "step": 81950 }, { "epoch": 1.385126285458371, "grad_norm": 0.008973762392997742, "learning_rate": 2.6131834203822463e-06, "loss": 0.0014, "step": 81960 }, { "epoch": 1.3852952857372216, "grad_norm": 0.03835931420326233, "learning_rate": 2.6118875829486345e-06, "loss": 0.0015, "step": 81970 }, { "epoch": 1.385464286016072, "grad_norm": 0.17669308185577393, "learning_rate": 2.610591953292375e-06, "loss": 0.0017, "step": 81980 }, { "epoch": 1.3856332862949223, "grad_norm": 0.04100028797984123, "learning_rate": 2.609296531526191e-06, "loss": 0.0005, "step": 81990 }, { "epoch": 1.385802286573773, "grad_norm": 0.10174252837896347, "learning_rate": 2.608001317762793e-06, "loss": 0.0011, "step": 82000 }, { "epoch": 1.3859712868526233, "grad_norm": 0.04097196087241173, "learning_rate": 2.6067063121148682e-06, "loss": 0.0007, "step": 82010 }, { "epoch": 1.3861402871314739, "grad_norm": 0.031012501567602158, "learning_rate": 2.605411514695093e-06, "loss": 0.001, "step": 82020 }, { "epoch": 1.3863092874103242, "grad_norm": 0.07320142537355423, "learning_rate": 2.604116925616115e-06, "loss": 0.0015, "step": 82030 }, { "epoch": 1.3864782876891746, "grad_norm": 0.011016522534191608, "learning_rate": 2.602822544990573e-06, "loss": 0.0008, "step": 82040 }, { "epoch": 1.3866472879680252, "grad_norm": 0.009944644756615162, "learning_rate": 2.601528372931085e-06, "loss": 0.0006, "step": 82050 }, { "epoch": 1.3868162882468755, "grad_norm": 0.041490208357572556, "learning_rate": 2.6002344095502507e-06, "loss": 0.0009, "step": 82060 }, { "epoch": 1.3869852885257261, "grad_norm": 0.05326971784234047, "learning_rate": 2.5989406549606477e-06, "loss": 0.0009, "step": 82070 }, { "epoch": 1.3871542888045765, "grad_norm": 0.05080737918615341, "learning_rate": 2.5976471092748436e-06, "loss": 0.0007, "step": 82080 }, { "epoch": 1.387323289083427, "grad_norm": 0.06964278221130371, "learning_rate": 2.5963537726053785e-06, "loss": 0.0009, "step": 82090 }, { "epoch": 1.3874922893622774, "grad_norm": 0.0013241906417533755, "learning_rate": 2.595060645064783e-06, "loss": 0.0005, "step": 82100 }, { "epoch": 1.387661289641128, "grad_norm": 0.03161228448152542, "learning_rate": 2.5937677267655616e-06, "loss": 0.0013, "step": 82110 }, { "epoch": 1.3878302899199784, "grad_norm": 0.04791177436709404, "learning_rate": 2.5924750178202083e-06, "loss": 0.001, "step": 82120 }, { "epoch": 1.3879992901988287, "grad_norm": 0.01146010123193264, "learning_rate": 2.5911825183411914e-06, "loss": 0.0003, "step": 82130 }, { "epoch": 1.3881682904776793, "grad_norm": 0.04580337554216385, "learning_rate": 2.5898902284409684e-06, "loss": 0.0008, "step": 82140 }, { "epoch": 1.3883372907565297, "grad_norm": 0.024946637451648712, "learning_rate": 2.588598148231971e-06, "loss": 0.0012, "step": 82150 }, { "epoch": 1.3885062910353803, "grad_norm": 0.024746665731072426, "learning_rate": 2.5873062778266194e-06, "loss": 0.0003, "step": 82160 }, { "epoch": 1.3886752913142306, "grad_norm": 0.07841707020998001, "learning_rate": 2.58601461733731e-06, "loss": 0.0006, "step": 82170 }, { "epoch": 1.3888442915930812, "grad_norm": 0.048576850444078445, "learning_rate": 2.584723166876427e-06, "loss": 0.0009, "step": 82180 }, { "epoch": 1.3890132918719316, "grad_norm": 0.014373579062521458, "learning_rate": 2.5834319265563292e-06, "loss": 0.0004, "step": 82190 }, { "epoch": 1.3891822921507821, "grad_norm": 0.030972721055150032, "learning_rate": 2.5821408964893614e-06, "loss": 0.0005, "step": 82200 }, { "epoch": 1.3893512924296325, "grad_norm": 0.054175764322280884, "learning_rate": 2.5808500767878523e-06, "loss": 0.0009, "step": 82210 }, { "epoch": 1.3895202927084829, "grad_norm": 0.007382318377494812, "learning_rate": 2.579559467564107e-06, "loss": 0.0006, "step": 82220 }, { "epoch": 1.3896892929873335, "grad_norm": 0.11436676979064941, "learning_rate": 2.5782690689304136e-06, "loss": 0.0011, "step": 82230 }, { "epoch": 1.3898582932661838, "grad_norm": 0.2287551313638687, "learning_rate": 2.5769788809990475e-06, "loss": 0.0006, "step": 82240 }, { "epoch": 1.3900272935450344, "grad_norm": 0.09457912296056747, "learning_rate": 2.5756889038822562e-06, "loss": 0.0008, "step": 82250 }, { "epoch": 1.3901962938238848, "grad_norm": 0.17385007441043854, "learning_rate": 2.574399137692277e-06, "loss": 0.0015, "step": 82260 }, { "epoch": 1.3903652941027353, "grad_norm": 0.20081576704978943, "learning_rate": 2.5731095825413267e-06, "loss": 0.001, "step": 82270 }, { "epoch": 1.3905342943815857, "grad_norm": 0.10662265121936798, "learning_rate": 2.5718202385415997e-06, "loss": 0.002, "step": 82280 }, { "epoch": 1.3907032946604363, "grad_norm": 0.038537610322237015, "learning_rate": 2.5705311058052783e-06, "loss": 0.0008, "step": 82290 }, { "epoch": 1.3908722949392867, "grad_norm": 0.0011308231623843312, "learning_rate": 2.5692421844445204e-06, "loss": 0.001, "step": 82300 }, { "epoch": 1.391041295218137, "grad_norm": 0.00529683381319046, "learning_rate": 2.567953474571471e-06, "loss": 0.0011, "step": 82310 }, { "epoch": 1.3912102954969876, "grad_norm": 0.07235697656869888, "learning_rate": 2.566664976298251e-06, "loss": 0.0033, "step": 82320 }, { "epoch": 1.391379295775838, "grad_norm": 0.07601780444383621, "learning_rate": 2.5653766897369696e-06, "loss": 0.0007, "step": 82330 }, { "epoch": 1.3915482960546885, "grad_norm": 0.04762272536754608, "learning_rate": 2.5640886149997108e-06, "loss": 0.001, "step": 82340 }, { "epoch": 1.391717296333539, "grad_norm": 0.04797697812318802, "learning_rate": 2.5628007521985467e-06, "loss": 0.0015, "step": 82350 }, { "epoch": 1.3918862966123893, "grad_norm": 0.036652568727731705, "learning_rate": 2.561513101445523e-06, "loss": 0.0007, "step": 82360 }, { "epoch": 1.3920552968912399, "grad_norm": 0.03651326522231102, "learning_rate": 2.5602256628526765e-06, "loss": 0.0006, "step": 82370 }, { "epoch": 1.3922242971700904, "grad_norm": 0.0008418612414970994, "learning_rate": 2.558938436532017e-06, "loss": 0.0011, "step": 82380 }, { "epoch": 1.3923932974489408, "grad_norm": 0.02465483732521534, "learning_rate": 2.557651422595542e-06, "loss": 0.0013, "step": 82390 }, { "epoch": 1.3925622977277912, "grad_norm": 0.05674951896071434, "learning_rate": 2.5563646211552252e-06, "loss": 0.0017, "step": 82400 }, { "epoch": 1.3927312980066417, "grad_norm": 0.04472305253148079, "learning_rate": 2.5550780323230256e-06, "loss": 0.0006, "step": 82410 }, { "epoch": 1.3929002982854921, "grad_norm": 0.02844862826168537, "learning_rate": 2.5537916562108835e-06, "loss": 0.0006, "step": 82420 }, { "epoch": 1.3930692985643427, "grad_norm": 0.04687836021184921, "learning_rate": 2.5525054929307212e-06, "loss": 0.0004, "step": 82430 }, { "epoch": 1.393238298843193, "grad_norm": 0.3177129924297333, "learning_rate": 2.5512195425944373e-06, "loss": 0.0007, "step": 82440 }, { "epoch": 1.3934072991220434, "grad_norm": 0.10119723528623581, "learning_rate": 2.5499338053139177e-06, "loss": 0.0009, "step": 82450 }, { "epoch": 1.393576299400894, "grad_norm": 0.047569334506988525, "learning_rate": 2.5486482812010303e-06, "loss": 0.0005, "step": 82460 }, { "epoch": 1.3937452996797446, "grad_norm": 0.04689168184995651, "learning_rate": 2.547362970367617e-06, "loss": 0.0007, "step": 82470 }, { "epoch": 1.393914299958595, "grad_norm": 0.019643142819404602, "learning_rate": 2.546077872925511e-06, "loss": 0.0008, "step": 82480 }, { "epoch": 1.3940833002374453, "grad_norm": 0.025296105071902275, "learning_rate": 2.5447929889865174e-06, "loss": 0.0007, "step": 82490 }, { "epoch": 1.394252300516296, "grad_norm": 0.022164635360240936, "learning_rate": 2.543508318662432e-06, "loss": 0.0008, "step": 82500 }, { "epoch": 1.3944213007951463, "grad_norm": 0.002421640558168292, "learning_rate": 2.542223862065022e-06, "loss": 0.0005, "step": 82510 }, { "epoch": 1.3945903010739968, "grad_norm": 0.05165115371346474, "learning_rate": 2.5409396193060465e-06, "loss": 0.0005, "step": 82520 }, { "epoch": 1.3947593013528472, "grad_norm": 0.07443195581436157, "learning_rate": 2.5396555904972368e-06, "loss": 0.0007, "step": 82530 }, { "epoch": 1.3949283016316976, "grad_norm": 0.03140726685523987, "learning_rate": 2.5383717757503136e-06, "loss": 0.0006, "step": 82540 }, { "epoch": 1.3950973019105481, "grad_norm": 0.04079822450876236, "learning_rate": 2.5370881751769704e-06, "loss": 0.0009, "step": 82550 }, { "epoch": 1.3952663021893987, "grad_norm": 0.013004903681576252, "learning_rate": 2.535804788888891e-06, "loss": 0.0008, "step": 82560 }, { "epoch": 1.395435302468249, "grad_norm": 0.010640832595527172, "learning_rate": 2.5345216169977325e-06, "loss": 0.0005, "step": 82570 }, { "epoch": 1.3956043027470995, "grad_norm": 0.024120008572936058, "learning_rate": 2.53323865961514e-06, "loss": 0.0007, "step": 82580 }, { "epoch": 1.39577330302595, "grad_norm": 0.026166046038269997, "learning_rate": 2.5319559168527354e-06, "loss": 0.001, "step": 82590 }, { "epoch": 1.3959423033048004, "grad_norm": 0.06554225832223892, "learning_rate": 2.5306733888221223e-06, "loss": 0.0008, "step": 82600 }, { "epoch": 1.396111303583651, "grad_norm": 0.018941501155495644, "learning_rate": 2.5293910756348916e-06, "loss": 0.0006, "step": 82610 }, { "epoch": 1.3962803038625013, "grad_norm": 0.038295380771160126, "learning_rate": 2.5281089774026056e-06, "loss": 0.0006, "step": 82620 }, { "epoch": 1.3964493041413517, "grad_norm": 0.0389002226293087, "learning_rate": 2.526827094236815e-06, "loss": 0.0006, "step": 82630 }, { "epoch": 1.3966183044202023, "grad_norm": 0.019440731033682823, "learning_rate": 2.525545426249052e-06, "loss": 0.0007, "step": 82640 }, { "epoch": 1.3967873046990529, "grad_norm": 0.4608055055141449, "learning_rate": 2.524263973550824e-06, "loss": 0.0008, "step": 82650 }, { "epoch": 1.3969563049779032, "grad_norm": 0.051159411668777466, "learning_rate": 2.5229827362536274e-06, "loss": 0.0003, "step": 82660 }, { "epoch": 1.3971253052567536, "grad_norm": 0.017769791185855865, "learning_rate": 2.5217017144689323e-06, "loss": 0.0008, "step": 82670 }, { "epoch": 1.3972943055356042, "grad_norm": 0.01922992430627346, "learning_rate": 2.5204209083081977e-06, "loss": 0.0011, "step": 82680 }, { "epoch": 1.3974633058144545, "grad_norm": 0.021616067737340927, "learning_rate": 2.5191403178828554e-06, "loss": 0.0033, "step": 82690 }, { "epoch": 1.3976323060933051, "grad_norm": 0.03175405040383339, "learning_rate": 2.517859943304326e-06, "loss": 0.0008, "step": 82700 }, { "epoch": 1.3978013063721555, "grad_norm": 0.10438563674688339, "learning_rate": 2.5165797846840097e-06, "loss": 0.0014, "step": 82710 }, { "epoch": 1.3979703066510059, "grad_norm": 0.032787639647722244, "learning_rate": 2.5152998421332827e-06, "loss": 0.0006, "step": 82720 }, { "epoch": 1.3981393069298564, "grad_norm": 0.058113645762205124, "learning_rate": 2.5140201157635098e-06, "loss": 0.0005, "step": 82730 }, { "epoch": 1.398308307208707, "grad_norm": 0.05953868478536606, "learning_rate": 2.5127406056860293e-06, "loss": 0.0007, "step": 82740 }, { "epoch": 1.3984773074875574, "grad_norm": 0.05751534551382065, "learning_rate": 2.51146131201217e-06, "loss": 0.0007, "step": 82750 }, { "epoch": 1.3986463077664077, "grad_norm": 0.06238119676709175, "learning_rate": 2.5101822348532314e-06, "loss": 0.0014, "step": 82760 }, { "epoch": 1.3988153080452583, "grad_norm": 0.040273841470479965, "learning_rate": 2.5089033743205037e-06, "loss": 0.001, "step": 82770 }, { "epoch": 1.3989843083241087, "grad_norm": 0.011359905824065208, "learning_rate": 2.5076247305252503e-06, "loss": 0.0006, "step": 82780 }, { "epoch": 1.3991533086029593, "grad_norm": 0.02924640290439129, "learning_rate": 2.5063463035787234e-06, "loss": 0.0006, "step": 82790 }, { "epoch": 1.3993223088818096, "grad_norm": 0.029478926211595535, "learning_rate": 2.5050680935921478e-06, "loss": 0.0012, "step": 82800 }, { "epoch": 1.39949130916066, "grad_norm": 0.02089492790400982, "learning_rate": 2.5037901006767363e-06, "loss": 0.0012, "step": 82810 }, { "epoch": 1.3996603094395106, "grad_norm": 0.024508384987711906, "learning_rate": 2.5025123249436807e-06, "loss": 0.0005, "step": 82820 }, { "epoch": 1.3998293097183612, "grad_norm": 0.006763719953596592, "learning_rate": 2.501234766504155e-06, "loss": 0.0004, "step": 82830 }, { "epoch": 1.3999983099972115, "grad_norm": 0.08759523183107376, "learning_rate": 2.4999574254693094e-06, "loss": 0.0007, "step": 82840 }, { "epoch": 1.400167310276062, "grad_norm": 0.03937356173992157, "learning_rate": 2.498680301950283e-06, "loss": 0.0005, "step": 82850 }, { "epoch": 1.4003363105549125, "grad_norm": 0.040558457374572754, "learning_rate": 2.497403396058188e-06, "loss": 0.0004, "step": 82860 }, { "epoch": 1.4005053108337628, "grad_norm": 0.0008733658469282091, "learning_rate": 2.496126707904124e-06, "loss": 0.0009, "step": 82870 }, { "epoch": 1.4006743111126134, "grad_norm": 0.048882171511650085, "learning_rate": 2.4948502375991664e-06, "loss": 0.0006, "step": 82880 }, { "epoch": 1.4008433113914638, "grad_norm": 0.0028946897946298122, "learning_rate": 2.4935739852543777e-06, "loss": 0.0005, "step": 82890 }, { "epoch": 1.4010123116703141, "grad_norm": 0.046913258731365204, "learning_rate": 2.4922979509807945e-06, "loss": 0.0008, "step": 82900 }, { "epoch": 1.4011813119491647, "grad_norm": 0.1057584136724472, "learning_rate": 2.4910221348894413e-06, "loss": 0.0007, "step": 82910 }, { "epoch": 1.401350312228015, "grad_norm": 0.0601949468255043, "learning_rate": 2.489746537091317e-06, "loss": 0.0012, "step": 82920 }, { "epoch": 1.4015193125068657, "grad_norm": 0.041038982570171356, "learning_rate": 2.488471157697408e-06, "loss": 0.0008, "step": 82930 }, { "epoch": 1.401688312785716, "grad_norm": 0.10405878722667694, "learning_rate": 2.4871959968186755e-06, "loss": 0.0007, "step": 82940 }, { "epoch": 1.4018573130645666, "grad_norm": 0.012591134756803513, "learning_rate": 2.485921054566067e-06, "loss": 0.0005, "step": 82950 }, { "epoch": 1.402026313343417, "grad_norm": 0.011082008481025696, "learning_rate": 2.484646331050507e-06, "loss": 0.0011, "step": 82960 }, { "epoch": 1.4021953136222676, "grad_norm": 0.07454565167427063, "learning_rate": 2.483371826382903e-06, "loss": 0.001, "step": 82970 }, { "epoch": 1.402364313901118, "grad_norm": 0.0808311477303505, "learning_rate": 2.482097540674145e-06, "loss": 0.001, "step": 82980 }, { "epoch": 1.4025333141799683, "grad_norm": 0.03571702912449837, "learning_rate": 2.4808234740350988e-06, "loss": 0.0011, "step": 82990 }, { "epoch": 1.4027023144588189, "grad_norm": 0.02523273229598999, "learning_rate": 2.479549626576616e-06, "loss": 0.0002, "step": 83000 }, { "epoch": 1.4028713147376692, "grad_norm": 0.09487809240818024, "learning_rate": 2.4782759984095277e-06, "loss": 0.0014, "step": 83010 }, { "epoch": 1.4030403150165198, "grad_norm": 0.06667988002300262, "learning_rate": 2.4770025896446477e-06, "loss": 0.0007, "step": 83020 }, { "epoch": 1.4032093152953702, "grad_norm": 0.024624986574053764, "learning_rate": 2.4757294003927647e-06, "loss": 0.0013, "step": 83030 }, { "epoch": 1.4033783155742208, "grad_norm": 0.027179623022675514, "learning_rate": 2.4744564307646555e-06, "loss": 0.0004, "step": 83040 }, { "epoch": 1.4035473158530711, "grad_norm": 0.006963491905480623, "learning_rate": 2.4731836808710717e-06, "loss": 0.0007, "step": 83050 }, { "epoch": 1.4037163161319217, "grad_norm": 0.014814918860793114, "learning_rate": 2.4719111508227523e-06, "loss": 0.0005, "step": 83060 }, { "epoch": 1.403885316410772, "grad_norm": 0.006815911270678043, "learning_rate": 2.47063884073041e-06, "loss": 0.001, "step": 83070 }, { "epoch": 1.4040543166896224, "grad_norm": 0.037115439772605896, "learning_rate": 2.4693667507047453e-06, "loss": 0.0013, "step": 83080 }, { "epoch": 1.404223316968473, "grad_norm": 0.13958847522735596, "learning_rate": 2.4680948808564327e-06, "loss": 0.0012, "step": 83090 }, { "epoch": 1.4043923172473234, "grad_norm": 0.05505302920937538, "learning_rate": 2.4668232312961345e-06, "loss": 0.0007, "step": 83100 }, { "epoch": 1.404561317526174, "grad_norm": 0.007390045560896397, "learning_rate": 2.4655518021344873e-06, "loss": 0.0012, "step": 83110 }, { "epoch": 1.4047303178050243, "grad_norm": 0.015265069901943207, "learning_rate": 2.464280593482114e-06, "loss": 0.0007, "step": 83120 }, { "epoch": 1.404899318083875, "grad_norm": 0.01129642128944397, "learning_rate": 2.463009605449614e-06, "loss": 0.0011, "step": 83130 }, { "epoch": 1.4050683183627253, "grad_norm": 0.022243892773985863, "learning_rate": 2.4617388381475715e-06, "loss": 0.0004, "step": 83140 }, { "epoch": 1.4052373186415759, "grad_norm": 0.00014853873290121555, "learning_rate": 2.4604682916865467e-06, "loss": 0.0008, "step": 83150 }, { "epoch": 1.4054063189204262, "grad_norm": 0.05525534227490425, "learning_rate": 2.4591979661770864e-06, "loss": 0.0008, "step": 83160 }, { "epoch": 1.4055753191992766, "grad_norm": 0.1048225536942482, "learning_rate": 2.457927861729712e-06, "loss": 0.0021, "step": 83170 }, { "epoch": 1.4057443194781272, "grad_norm": 0.02692188322544098, "learning_rate": 2.4566579784549303e-06, "loss": 0.0009, "step": 83180 }, { "epoch": 1.4059133197569775, "grad_norm": 0.011747177690267563, "learning_rate": 2.455388316463227e-06, "loss": 0.0014, "step": 83190 }, { "epoch": 1.4060823200358281, "grad_norm": 0.0679253339767456, "learning_rate": 2.4541188758650713e-06, "loss": 0.0005, "step": 83200 }, { "epoch": 1.4062513203146785, "grad_norm": 0.021077698096632957, "learning_rate": 2.452849656770906e-06, "loss": 0.0009, "step": 83210 }, { "epoch": 1.406420320593529, "grad_norm": 0.0737432986497879, "learning_rate": 2.4515806592911623e-06, "loss": 0.0011, "step": 83220 }, { "epoch": 1.4065893208723794, "grad_norm": 0.04319237917661667, "learning_rate": 2.4503118835362503e-06, "loss": 0.0009, "step": 83230 }, { "epoch": 1.40675832115123, "grad_norm": 0.04714735969901085, "learning_rate": 2.4490433296165563e-06, "loss": 0.0007, "step": 83240 }, { "epoch": 1.4069273214300804, "grad_norm": 0.014764646999537945, "learning_rate": 2.4477749976424537e-06, "loss": 0.0008, "step": 83250 }, { "epoch": 1.4070963217089307, "grad_norm": 0.028909694403409958, "learning_rate": 2.446506887724291e-06, "loss": 0.0008, "step": 83260 }, { "epoch": 1.4072653219877813, "grad_norm": 0.000218815024709329, "learning_rate": 2.4452389999724023e-06, "loss": 0.0006, "step": 83270 }, { "epoch": 1.4074343222666317, "grad_norm": 0.028963739052414894, "learning_rate": 2.4439713344970968e-06, "loss": 0.0024, "step": 83280 }, { "epoch": 1.4076033225454823, "grad_norm": 0.03829863294959068, "learning_rate": 2.4427038914086715e-06, "loss": 0.0007, "step": 83290 }, { "epoch": 1.4077723228243326, "grad_norm": 0.015505461022257805, "learning_rate": 2.441436670817396e-06, "loss": 0.0003, "step": 83300 }, { "epoch": 1.407941323103183, "grad_norm": 0.022600213065743446, "learning_rate": 2.4401696728335287e-06, "loss": 0.0006, "step": 83310 }, { "epoch": 1.4081103233820336, "grad_norm": 0.048072341829538345, "learning_rate": 2.4389028975673006e-06, "loss": 0.001, "step": 83320 }, { "epoch": 1.4082793236608842, "grad_norm": 0.047883547842502594, "learning_rate": 2.4376363451289313e-06, "loss": 0.001, "step": 83330 }, { "epoch": 1.4084483239397345, "grad_norm": 0.01860985904932022, "learning_rate": 2.436370015628613e-06, "loss": 0.0007, "step": 83340 }, { "epoch": 1.4086173242185849, "grad_norm": 0.04321892186999321, "learning_rate": 2.435103909176526e-06, "loss": 0.0009, "step": 83350 }, { "epoch": 1.4087863244974355, "grad_norm": 0.03282180428504944, "learning_rate": 2.4338380258828244e-06, "loss": 0.0008, "step": 83360 }, { "epoch": 1.4089553247762858, "grad_norm": 0.049251966178417206, "learning_rate": 2.4325723658576478e-06, "loss": 0.0014, "step": 83370 }, { "epoch": 1.4091243250551364, "grad_norm": 0.027250435203313828, "learning_rate": 2.431306929211117e-06, "loss": 0.0014, "step": 83380 }, { "epoch": 1.4092933253339868, "grad_norm": 0.007739432156085968, "learning_rate": 2.430041716053327e-06, "loss": 0.0006, "step": 83390 }, { "epoch": 1.4094623256128371, "grad_norm": 0.00580311706289649, "learning_rate": 2.428776726494359e-06, "loss": 0.0008, "step": 83400 }, { "epoch": 1.4096313258916877, "grad_norm": 0.023706577718257904, "learning_rate": 2.427511960644276e-06, "loss": 0.0008, "step": 83410 }, { "epoch": 1.4098003261705383, "grad_norm": 0.03796723112463951, "learning_rate": 2.4262474186131142e-06, "loss": 0.001, "step": 83420 }, { "epoch": 1.4099693264493887, "grad_norm": 0.012999052181839943, "learning_rate": 2.424983100510899e-06, "loss": 0.0006, "step": 83430 }, { "epoch": 1.410138326728239, "grad_norm": 0.03713338077068329, "learning_rate": 2.4237190064476284e-06, "loss": 0.0005, "step": 83440 }, { "epoch": 1.4103073270070896, "grad_norm": 0.02254197560250759, "learning_rate": 2.422455136533289e-06, "loss": 0.0006, "step": 83450 }, { "epoch": 1.41047632728594, "grad_norm": 0.09923889487981796, "learning_rate": 2.4211914908778387e-06, "loss": 0.0009, "step": 83460 }, { "epoch": 1.4106453275647906, "grad_norm": 0.03275029733777046, "learning_rate": 2.4199280695912235e-06, "loss": 0.0006, "step": 83470 }, { "epoch": 1.410814327843641, "grad_norm": 0.04385049268603325, "learning_rate": 2.4186648727833694e-06, "loss": 0.0007, "step": 83480 }, { "epoch": 1.4109833281224913, "grad_norm": 0.03857121989130974, "learning_rate": 2.4174019005641757e-06, "loss": 0.0008, "step": 83490 }, { "epoch": 1.4111523284013419, "grad_norm": 0.040195971727371216, "learning_rate": 2.4161391530435316e-06, "loss": 0.0005, "step": 83500 }, { "epoch": 1.4113213286801924, "grad_norm": 0.0192074254155159, "learning_rate": 2.414876630331299e-06, "loss": 0.0008, "step": 83510 }, { "epoch": 1.4114903289590428, "grad_norm": 0.07239630818367004, "learning_rate": 2.4136143325373263e-06, "loss": 0.0007, "step": 83520 }, { "epoch": 1.4116593292378932, "grad_norm": 0.0068862829357385635, "learning_rate": 2.4123522597714354e-06, "loss": 0.001, "step": 83530 }, { "epoch": 1.4118283295167438, "grad_norm": 0.04745925962924957, "learning_rate": 2.4110904121434383e-06, "loss": 0.0005, "step": 83540 }, { "epoch": 1.4119973297955941, "grad_norm": 0.012015627697110176, "learning_rate": 2.409828789763116e-06, "loss": 0.0006, "step": 83550 }, { "epoch": 1.4121663300744447, "grad_norm": 0.081517793238163, "learning_rate": 2.4085673927402416e-06, "loss": 0.0004, "step": 83560 }, { "epoch": 1.412335330353295, "grad_norm": 0.008573692291975021, "learning_rate": 2.4073062211845573e-06, "loss": 0.0004, "step": 83570 }, { "epoch": 1.4125043306321454, "grad_norm": 0.026937799528241158, "learning_rate": 2.406045275205794e-06, "loss": 0.0005, "step": 83580 }, { "epoch": 1.412673330910996, "grad_norm": 0.027086956426501274, "learning_rate": 2.4047845549136588e-06, "loss": 0.0005, "step": 83590 }, { "epoch": 1.4128423311898466, "grad_norm": 0.06727180629968643, "learning_rate": 2.4035240604178443e-06, "loss": 0.0004, "step": 83600 }, { "epoch": 1.413011331468697, "grad_norm": 0.10865618288516998, "learning_rate": 2.4022637918280144e-06, "loss": 0.0014, "step": 83610 }, { "epoch": 1.4131803317475473, "grad_norm": 0.030005959793925285, "learning_rate": 2.4010037492538235e-06, "loss": 0.0014, "step": 83620 }, { "epoch": 1.413349332026398, "grad_norm": 0.048618074506521225, "learning_rate": 2.3997439328048962e-06, "loss": 0.0007, "step": 83630 }, { "epoch": 1.4135183323052483, "grad_norm": 0.015072825364768505, "learning_rate": 2.398484342590847e-06, "loss": 0.0007, "step": 83640 }, { "epoch": 1.4136873325840988, "grad_norm": 0.024250496178865433, "learning_rate": 2.3972249787212637e-06, "loss": 0.001, "step": 83650 }, { "epoch": 1.4138563328629492, "grad_norm": 0.047752391546964645, "learning_rate": 2.3959658413057193e-06, "loss": 0.0013, "step": 83660 }, { "epoch": 1.4140253331417996, "grad_norm": 0.08491960167884827, "learning_rate": 2.394706930453762e-06, "loss": 0.0012, "step": 83670 }, { "epoch": 1.4141943334206502, "grad_norm": 0.00630682660266757, "learning_rate": 2.393448246274926e-06, "loss": 0.0008, "step": 83680 }, { "epoch": 1.4143633336995007, "grad_norm": 0.00278993952088058, "learning_rate": 2.39218978887872e-06, "loss": 0.0011, "step": 83690 }, { "epoch": 1.414532333978351, "grad_norm": 0.024969255551695824, "learning_rate": 2.3909315583746395e-06, "loss": 0.0004, "step": 83700 }, { "epoch": 1.4147013342572015, "grad_norm": 0.04617280885577202, "learning_rate": 2.3896735548721523e-06, "loss": 0.001, "step": 83710 }, { "epoch": 1.414870334536052, "grad_norm": 0.008670149371027946, "learning_rate": 2.3884157784807138e-06, "loss": 0.0012, "step": 83720 }, { "epoch": 1.4150393348149024, "grad_norm": 0.004349506925791502, "learning_rate": 2.387158229309757e-06, "loss": 0.0009, "step": 83730 }, { "epoch": 1.415208335093753, "grad_norm": 0.06572866439819336, "learning_rate": 2.385900907468693e-06, "loss": 0.0012, "step": 83740 }, { "epoch": 1.4153773353726034, "grad_norm": 0.03176647424697876, "learning_rate": 2.384643813066917e-06, "loss": 0.0014, "step": 83750 }, { "epoch": 1.4155463356514537, "grad_norm": 0.036327064037323, "learning_rate": 2.383386946213799e-06, "loss": 0.0006, "step": 83760 }, { "epoch": 1.4157153359303043, "grad_norm": 0.01255666371434927, "learning_rate": 2.382130307018694e-06, "loss": 0.0005, "step": 83770 }, { "epoch": 1.4158843362091549, "grad_norm": 0.04504786431789398, "learning_rate": 2.380873895590937e-06, "loss": 0.0011, "step": 83780 }, { "epoch": 1.4160533364880052, "grad_norm": 0.0008414179901592433, "learning_rate": 2.3796177120398427e-06, "loss": 0.0008, "step": 83790 }, { "epoch": 1.4162223367668556, "grad_norm": 0.05456938594579697, "learning_rate": 2.3783617564747016e-06, "loss": 0.001, "step": 83800 }, { "epoch": 1.4163913370457062, "grad_norm": 0.08158857375383377, "learning_rate": 2.3771060290047915e-06, "loss": 0.0015, "step": 83810 }, { "epoch": 1.4165603373245566, "grad_norm": 0.08919025212526321, "learning_rate": 2.375850529739363e-06, "loss": 0.0009, "step": 83820 }, { "epoch": 1.4167293376034071, "grad_norm": 0.07634237408638, "learning_rate": 2.374595258787655e-06, "loss": 0.0012, "step": 83830 }, { "epoch": 1.4168983378822575, "grad_norm": 0.031027309596538544, "learning_rate": 2.3733402162588782e-06, "loss": 0.0009, "step": 83840 }, { "epoch": 1.4170673381611079, "grad_norm": 0.016404179856181145, "learning_rate": 2.3720854022622303e-06, "loss": 0.0008, "step": 83850 }, { "epoch": 1.4172363384399584, "grad_norm": 0.11980027705430984, "learning_rate": 2.3708308169068832e-06, "loss": 0.0013, "step": 83860 }, { "epoch": 1.4174053387188088, "grad_norm": 0.0009762226836755872, "learning_rate": 2.369576460301996e-06, "loss": 0.0006, "step": 83870 }, { "epoch": 1.4175743389976594, "grad_norm": 0.06345545500516891, "learning_rate": 2.3683223325566997e-06, "loss": 0.0007, "step": 83880 }, { "epoch": 1.4177433392765098, "grad_norm": 0.10920125991106033, "learning_rate": 2.3670684337801126e-06, "loss": 0.0015, "step": 83890 }, { "epoch": 1.4179123395553603, "grad_norm": 0.06489748507738113, "learning_rate": 2.3658147640813267e-06, "loss": 0.0015, "step": 83900 }, { "epoch": 1.4180813398342107, "grad_norm": 0.06503353267908096, "learning_rate": 2.364561323569421e-06, "loss": 0.0014, "step": 83910 }, { "epoch": 1.4182503401130613, "grad_norm": 0.028652485460042953, "learning_rate": 2.3633081123534475e-06, "loss": 0.0008, "step": 83920 }, { "epoch": 1.4184193403919116, "grad_norm": 0.01862611249089241, "learning_rate": 2.362055130542445e-06, "loss": 0.0006, "step": 83930 }, { "epoch": 1.418588340670762, "grad_norm": 0.1063927486538887, "learning_rate": 2.360802378245426e-06, "loss": 0.0011, "step": 83940 }, { "epoch": 1.4187573409496126, "grad_norm": 0.014605813659727573, "learning_rate": 2.3595498555713865e-06, "loss": 0.0006, "step": 83950 }, { "epoch": 1.418926341228463, "grad_norm": 0.020873332396149635, "learning_rate": 2.3582975626293037e-06, "loss": 0.0002, "step": 83960 }, { "epoch": 1.4190953415073135, "grad_norm": 0.17552272975444794, "learning_rate": 2.357045499528133e-06, "loss": 0.0009, "step": 83970 }, { "epoch": 1.419264341786164, "grad_norm": 0.07404538244009018, "learning_rate": 2.355793666376808e-06, "loss": 0.0007, "step": 83980 }, { "epoch": 1.4194333420650145, "grad_norm": 0.013749707490205765, "learning_rate": 2.354542063284246e-06, "loss": 0.001, "step": 83990 }, { "epoch": 1.4196023423438648, "grad_norm": 0.04214341565966606, "learning_rate": 2.3532906903593434e-06, "loss": 0.0012, "step": 84000 }, { "epoch": 1.4197713426227154, "grad_norm": 0.008900276385247707, "learning_rate": 2.3520395477109724e-06, "loss": 0.0005, "step": 84010 }, { "epoch": 1.4199403429015658, "grad_norm": 0.05283127725124359, "learning_rate": 2.3507886354479927e-06, "loss": 0.0008, "step": 84020 }, { "epoch": 1.4201093431804162, "grad_norm": 0.02634141966700554, "learning_rate": 2.349537953679235e-06, "loss": 0.0005, "step": 84030 }, { "epoch": 1.4202783434592667, "grad_norm": 0.0013195067876949906, "learning_rate": 2.3482875025135195e-06, "loss": 0.0007, "step": 84040 }, { "epoch": 1.420447343738117, "grad_norm": 0.03893652185797691, "learning_rate": 2.347037282059637e-06, "loss": 0.0005, "step": 84050 }, { "epoch": 1.4206163440169677, "grad_norm": 0.07587000727653503, "learning_rate": 2.345787292426367e-06, "loss": 0.0012, "step": 84060 }, { "epoch": 1.420785344295818, "grad_norm": 0.0020923058036714792, "learning_rate": 2.34453753372246e-06, "loss": 0.0009, "step": 84070 }, { "epoch": 1.4209543445746686, "grad_norm": 0.03815976530313492, "learning_rate": 2.343288006056656e-06, "loss": 0.001, "step": 84080 }, { "epoch": 1.421123344853519, "grad_norm": 0.03211137279868126, "learning_rate": 2.3420387095376655e-06, "loss": 0.0013, "step": 84090 }, { "epoch": 1.4212923451323696, "grad_norm": 0.011824673973023891, "learning_rate": 2.3407896442741873e-06, "loss": 0.0008, "step": 84100 }, { "epoch": 1.42146134541122, "grad_norm": 0.06679586321115494, "learning_rate": 2.3395408103748924e-06, "loss": 0.001, "step": 84110 }, { "epoch": 1.4216303456900703, "grad_norm": 0.06375548988580704, "learning_rate": 2.338292207948439e-06, "loss": 0.0005, "step": 84120 }, { "epoch": 1.4217993459689209, "grad_norm": 0.02201116643846035, "learning_rate": 2.337043837103458e-06, "loss": 0.0005, "step": 84130 }, { "epoch": 1.4219683462477712, "grad_norm": 0.04283535107970238, "learning_rate": 2.335795697948565e-06, "loss": 0.0007, "step": 84140 }, { "epoch": 1.4221373465266218, "grad_norm": 0.013010313734412193, "learning_rate": 2.3345477905923574e-06, "loss": 0.0006, "step": 84150 }, { "epoch": 1.4223063468054722, "grad_norm": 0.06410606950521469, "learning_rate": 2.3333001151434042e-06, "loss": 0.0007, "step": 84160 }, { "epoch": 1.4224753470843228, "grad_norm": 0.01201480720192194, "learning_rate": 2.332052671710261e-06, "loss": 0.001, "step": 84170 }, { "epoch": 1.4226443473631731, "grad_norm": 0.016264183446764946, "learning_rate": 2.330805460401464e-06, "loss": 0.0011, "step": 84180 }, { "epoch": 1.4228133476420237, "grad_norm": 0.030496496707201004, "learning_rate": 2.329558481325523e-06, "loss": 0.0009, "step": 84190 }, { "epoch": 1.422982347920874, "grad_norm": 0.06798092275857925, "learning_rate": 2.328311734590934e-06, "loss": 0.0007, "step": 84200 }, { "epoch": 1.4231513481997244, "grad_norm": 0.0211226437240839, "learning_rate": 2.327065220306167e-06, "loss": 0.0005, "step": 84210 }, { "epoch": 1.423320348478575, "grad_norm": 0.03784128651022911, "learning_rate": 2.3258189385796786e-06, "loss": 0.001, "step": 84220 }, { "epoch": 1.4234893487574254, "grad_norm": 0.05676233768463135, "learning_rate": 2.3245728895198978e-06, "loss": 0.0011, "step": 84230 }, { "epoch": 1.423658349036276, "grad_norm": 0.05738433077931404, "learning_rate": 2.3233270732352377e-06, "loss": 0.0005, "step": 84240 }, { "epoch": 1.4238273493151263, "grad_norm": 0.08206577599048615, "learning_rate": 2.3220814898340927e-06, "loss": 0.0007, "step": 84250 }, { "epoch": 1.4239963495939767, "grad_norm": 0.107279472053051, "learning_rate": 2.3208361394248313e-06, "loss": 0.0011, "step": 84260 }, { "epoch": 1.4241653498728273, "grad_norm": 0.03278566524386406, "learning_rate": 2.3195910221158085e-06, "loss": 0.0006, "step": 84270 }, { "epoch": 1.4243343501516779, "grad_norm": 0.0013548015849664807, "learning_rate": 2.3183461380153522e-06, "loss": 0.0009, "step": 84280 }, { "epoch": 1.4245033504305282, "grad_norm": 0.036180540919303894, "learning_rate": 2.317101487231776e-06, "loss": 0.001, "step": 84290 }, { "epoch": 1.4246723507093786, "grad_norm": 0.037334784865379333, "learning_rate": 2.3158570698733678e-06, "loss": 0.001, "step": 84300 }, { "epoch": 1.4248413509882292, "grad_norm": 0.17496155202388763, "learning_rate": 2.3146128860484014e-06, "loss": 0.001, "step": 84310 }, { "epoch": 1.4250103512670795, "grad_norm": 0.015429135411977768, "learning_rate": 2.313368935865123e-06, "loss": 0.0005, "step": 84320 }, { "epoch": 1.4251793515459301, "grad_norm": 0.048344243317842484, "learning_rate": 2.312125219431764e-06, "loss": 0.0007, "step": 84330 }, { "epoch": 1.4253483518247805, "grad_norm": 0.004564461763948202, "learning_rate": 2.3108817368565362e-06, "loss": 0.0027, "step": 84340 }, { "epoch": 1.4255173521036308, "grad_norm": 0.01210798416286707, "learning_rate": 2.3096384882476243e-06, "loss": 0.0004, "step": 84350 }, { "epoch": 1.4256863523824814, "grad_norm": 0.027822580188512802, "learning_rate": 2.308395473713199e-06, "loss": 0.0033, "step": 84360 }, { "epoch": 1.425855352661332, "grad_norm": 0.08893080055713654, "learning_rate": 2.3071526933614103e-06, "loss": 0.0011, "step": 84370 }, { "epoch": 1.4260243529401824, "grad_norm": 0.0183196272701025, "learning_rate": 2.305910147300383e-06, "loss": 0.0006, "step": 84380 }, { "epoch": 1.4261933532190327, "grad_norm": 0.05367874726653099, "learning_rate": 2.3046678356382275e-06, "loss": 0.0006, "step": 84390 }, { "epoch": 1.4263623534978833, "grad_norm": 0.0326240099966526, "learning_rate": 2.3034257584830276e-06, "loss": 0.0008, "step": 84400 }, { "epoch": 1.4265313537767337, "grad_norm": 0.020917244255542755, "learning_rate": 2.3021839159428543e-06, "loss": 0.0006, "step": 84410 }, { "epoch": 1.4267003540555843, "grad_norm": 0.02447577938437462, "learning_rate": 2.3009423081257493e-06, "loss": 0.0008, "step": 84420 }, { "epoch": 1.4268693543344346, "grad_norm": 0.19547536969184875, "learning_rate": 2.2997009351397425e-06, "loss": 0.0013, "step": 84430 }, { "epoch": 1.427038354613285, "grad_norm": 0.027016576379537582, "learning_rate": 2.2984597970928364e-06, "loss": 0.0008, "step": 84440 }, { "epoch": 1.4272073548921356, "grad_norm": 0.0012160215992480516, "learning_rate": 2.2972188940930185e-06, "loss": 0.0005, "step": 84450 }, { "epoch": 1.4273763551709862, "grad_norm": 0.1148276999592781, "learning_rate": 2.29597822624825e-06, "loss": 0.0009, "step": 84460 }, { "epoch": 1.4275453554498365, "grad_norm": 0.013243789784610271, "learning_rate": 2.2947377936664794e-06, "loss": 0.0011, "step": 84470 }, { "epoch": 1.4277143557286869, "grad_norm": 0.0687238946557045, "learning_rate": 2.293497596455626e-06, "loss": 0.0004, "step": 84480 }, { "epoch": 1.4278833560075375, "grad_norm": 0.02749786525964737, "learning_rate": 2.2922576347235947e-06, "loss": 0.0008, "step": 84490 }, { "epoch": 1.4280523562863878, "grad_norm": 0.0005987897166050971, "learning_rate": 2.2910179085782706e-06, "loss": 0.0002, "step": 84500 }, { "epoch": 1.4282213565652384, "grad_norm": 0.039005473256111145, "learning_rate": 2.2897784181275123e-06, "loss": 0.0014, "step": 84510 }, { "epoch": 1.4283903568440888, "grad_norm": 0.04905512556433678, "learning_rate": 2.288539163479165e-06, "loss": 0.0006, "step": 84520 }, { "epoch": 1.4285593571229391, "grad_norm": 0.04409101605415344, "learning_rate": 2.2873001447410448e-06, "loss": 0.0012, "step": 84530 }, { "epoch": 1.4287283574017897, "grad_norm": 0.01638905517756939, "learning_rate": 2.2860613620209567e-06, "loss": 0.0017, "step": 84540 }, { "epoch": 1.4288973576806403, "grad_norm": 0.06980341672897339, "learning_rate": 2.2848228154266784e-06, "loss": 0.0012, "step": 84550 }, { "epoch": 1.4290663579594907, "grad_norm": 0.055803362280130386, "learning_rate": 2.283584505065973e-06, "loss": 0.0006, "step": 84560 }, { "epoch": 1.429235358238341, "grad_norm": 0.045138075947761536, "learning_rate": 2.2823464310465742e-06, "loss": 0.0009, "step": 84570 }, { "epoch": 1.4294043585171916, "grad_norm": 0.028129177168011665, "learning_rate": 2.2811085934762064e-06, "loss": 0.0008, "step": 84580 }, { "epoch": 1.429573358796042, "grad_norm": 0.16420289874076843, "learning_rate": 2.279870992462561e-06, "loss": 0.0008, "step": 84590 }, { "epoch": 1.4297423590748926, "grad_norm": 0.02660529501736164, "learning_rate": 2.2786336281133213e-06, "loss": 0.0012, "step": 84600 }, { "epoch": 1.429911359353743, "grad_norm": 0.001202136161737144, "learning_rate": 2.2773965005361397e-06, "loss": 0.0009, "step": 84610 }, { "epoch": 1.4300803596325933, "grad_norm": 0.029954062774777412, "learning_rate": 2.276159609838655e-06, "loss": 0.0008, "step": 84620 }, { "epoch": 1.4302493599114439, "grad_norm": 0.03627415746450424, "learning_rate": 2.2749229561284804e-06, "loss": 0.0005, "step": 84630 }, { "epoch": 1.4304183601902944, "grad_norm": 0.07238475233316422, "learning_rate": 2.2736865395132134e-06, "loss": 0.0008, "step": 84640 }, { "epoch": 1.4305873604691448, "grad_norm": 0.058888595551252365, "learning_rate": 2.272450360100425e-06, "loss": 0.001, "step": 84650 }, { "epoch": 1.4307563607479952, "grad_norm": 0.04588101804256439, "learning_rate": 2.2712144179976723e-06, "loss": 0.0012, "step": 84660 }, { "epoch": 1.4309253610268458, "grad_norm": 0.08574783802032471, "learning_rate": 2.2699787133124845e-06, "loss": 0.0009, "step": 84670 }, { "epoch": 1.4310943613056961, "grad_norm": 0.07861129194498062, "learning_rate": 2.2687432461523778e-06, "loss": 0.0012, "step": 84680 }, { "epoch": 1.4312633615845467, "grad_norm": 0.05322394147515297, "learning_rate": 2.2675080166248403e-06, "loss": 0.0009, "step": 84690 }, { "epoch": 1.431432361863397, "grad_norm": 0.028422387316823006, "learning_rate": 2.2662730248373465e-06, "loss": 0.001, "step": 84700 }, { "epoch": 1.4316013621422474, "grad_norm": 0.0500117689371109, "learning_rate": 2.265038270897343e-06, "loss": 0.001, "step": 84710 }, { "epoch": 1.431770362421098, "grad_norm": 0.04492614418268204, "learning_rate": 2.2638037549122605e-06, "loss": 0.0008, "step": 84720 }, { "epoch": 1.4319393626999486, "grad_norm": 0.030771596357226372, "learning_rate": 2.262569476989509e-06, "loss": 0.0005, "step": 84730 }, { "epoch": 1.432108362978799, "grad_norm": 0.03537273406982422, "learning_rate": 2.261335437236476e-06, "loss": 0.0007, "step": 84740 }, { "epoch": 1.4322773632576493, "grad_norm": 0.09645888209342957, "learning_rate": 2.260101635760531e-06, "loss": 0.0019, "step": 84750 }, { "epoch": 1.4324463635365, "grad_norm": 0.0512833297252655, "learning_rate": 2.258868072669017e-06, "loss": 0.0008, "step": 84760 }, { "epoch": 1.4326153638153503, "grad_norm": 0.006439016200602055, "learning_rate": 2.2576347480692636e-06, "loss": 0.0005, "step": 84770 }, { "epoch": 1.4327843640942008, "grad_norm": 0.04037424549460411, "learning_rate": 2.2564016620685724e-06, "loss": 0.0007, "step": 84780 }, { "epoch": 1.4329533643730512, "grad_norm": 0.004947283770889044, "learning_rate": 2.255168814774231e-06, "loss": 0.0013, "step": 84790 }, { "epoch": 1.4331223646519016, "grad_norm": 0.014233722351491451, "learning_rate": 2.2539362062935007e-06, "loss": 0.0008, "step": 84800 }, { "epoch": 1.4332913649307522, "grad_norm": 0.0038694292306900024, "learning_rate": 2.2527038367336267e-06, "loss": 0.001, "step": 84810 }, { "epoch": 1.4334603652096025, "grad_norm": 0.01896851696074009, "learning_rate": 2.251471706201828e-06, "loss": 0.0005, "step": 84820 }, { "epoch": 1.433629365488453, "grad_norm": 0.04535583034157753, "learning_rate": 2.25023981480531e-06, "loss": 0.0005, "step": 84830 }, { "epoch": 1.4337983657673035, "grad_norm": 0.0728137418627739, "learning_rate": 2.249008162651249e-06, "loss": 0.0016, "step": 84840 }, { "epoch": 1.433967366046154, "grad_norm": 0.002345575951039791, "learning_rate": 2.247776749846808e-06, "loss": 0.0004, "step": 84850 }, { "epoch": 1.4341363663250044, "grad_norm": 0.03326883539557457, "learning_rate": 2.2465455764991232e-06, "loss": 0.0005, "step": 84860 }, { "epoch": 1.434305366603855, "grad_norm": 0.006739679723978043, "learning_rate": 2.2453146427153167e-06, "loss": 0.001, "step": 84870 }, { "epoch": 1.4344743668827054, "grad_norm": 0.08603805303573608, "learning_rate": 2.24408394860248e-06, "loss": 0.0034, "step": 84880 }, { "epoch": 1.4346433671615557, "grad_norm": 0.06419666111469269, "learning_rate": 2.242853494267695e-06, "loss": 0.0006, "step": 84890 }, { "epoch": 1.4348123674404063, "grad_norm": 0.021079787984490395, "learning_rate": 2.2416232798180124e-06, "loss": 0.0009, "step": 84900 }, { "epoch": 1.4349813677192567, "grad_norm": 0.05516824498772621, "learning_rate": 2.240393305360469e-06, "loss": 0.0008, "step": 84910 }, { "epoch": 1.4351503679981072, "grad_norm": 0.0007657507085241377, "learning_rate": 2.2391635710020792e-06, "loss": 0.0012, "step": 84920 }, { "epoch": 1.4353193682769576, "grad_norm": 0.016834547743201256, "learning_rate": 2.237934076849837e-06, "loss": 0.0035, "step": 84930 }, { "epoch": 1.4354883685558082, "grad_norm": 0.020797796547412872, "learning_rate": 2.2367048230107107e-06, "loss": 0.0006, "step": 84940 }, { "epoch": 1.4356573688346586, "grad_norm": 0.020580783486366272, "learning_rate": 2.2354758095916553e-06, "loss": 0.0014, "step": 84950 }, { "epoch": 1.4358263691135091, "grad_norm": 0.04580893740057945, "learning_rate": 2.2342470366995968e-06, "loss": 0.0007, "step": 84960 }, { "epoch": 1.4359953693923595, "grad_norm": 0.05132446438074112, "learning_rate": 2.233018504441448e-06, "loss": 0.0006, "step": 84970 }, { "epoch": 1.4361643696712099, "grad_norm": 0.11026140302419662, "learning_rate": 2.2317902129240943e-06, "loss": 0.0018, "step": 84980 }, { "epoch": 1.4363333699500604, "grad_norm": 0.13513745367527008, "learning_rate": 2.230562162254406e-06, "loss": 0.0005, "step": 84990 }, { "epoch": 1.4365023702289108, "grad_norm": 0.0034779072739183903, "learning_rate": 2.229334352539226e-06, "loss": 0.0007, "step": 85000 }, { "epoch": 1.4366713705077614, "grad_norm": 0.0015267017297446728, "learning_rate": 2.2281067838853816e-06, "loss": 0.0005, "step": 85010 }, { "epoch": 1.4368403707866118, "grad_norm": 0.014659667387604713, "learning_rate": 2.2268794563996787e-06, "loss": 0.0005, "step": 85020 }, { "epoch": 1.4370093710654623, "grad_norm": 0.01734306290745735, "learning_rate": 2.2256523701888976e-06, "loss": 0.0008, "step": 85030 }, { "epoch": 1.4371783713443127, "grad_norm": 0.0024235215969383717, "learning_rate": 2.224425525359804e-06, "loss": 0.001, "step": 85040 }, { "epoch": 1.4373473716231633, "grad_norm": 0.046465914696455, "learning_rate": 2.2231989220191364e-06, "loss": 0.0013, "step": 85050 }, { "epoch": 1.4375163719020136, "grad_norm": 0.048767272382974625, "learning_rate": 2.2219725602736175e-06, "loss": 0.0003, "step": 85060 }, { "epoch": 1.437685372180864, "grad_norm": 0.015352829359471798, "learning_rate": 2.2207464402299445e-06, "loss": 0.0009, "step": 85070 }, { "epoch": 1.4378543724597146, "grad_norm": 0.06857003271579742, "learning_rate": 2.2195205619947983e-06, "loss": 0.0007, "step": 85080 }, { "epoch": 1.438023372738565, "grad_norm": 0.07974264770746231, "learning_rate": 2.218294925674834e-06, "loss": 0.0009, "step": 85090 }, { "epoch": 1.4381923730174155, "grad_norm": 0.04741501063108444, "learning_rate": 2.217069531376688e-06, "loss": 0.0015, "step": 85100 }, { "epoch": 1.438361373296266, "grad_norm": 0.05591466650366783, "learning_rate": 2.215844379206978e-06, "loss": 0.0009, "step": 85110 }, { "epoch": 1.4385303735751163, "grad_norm": 0.022825388237833977, "learning_rate": 2.2146194692722954e-06, "loss": 0.0005, "step": 85120 }, { "epoch": 1.4386993738539668, "grad_norm": 0.0009319696109741926, "learning_rate": 2.213394801679214e-06, "loss": 0.0005, "step": 85130 }, { "epoch": 1.4388683741328174, "grad_norm": 0.0101107032969594, "learning_rate": 2.2121703765342883e-06, "loss": 0.0005, "step": 85140 }, { "epoch": 1.4390373744116678, "grad_norm": 0.024927053600549698, "learning_rate": 2.2109461939440454e-06, "loss": 0.0006, "step": 85150 }, { "epoch": 1.4392063746905182, "grad_norm": 0.025109082460403442, "learning_rate": 2.2097222540149987e-06, "loss": 0.0009, "step": 85160 }, { "epoch": 1.4393753749693687, "grad_norm": 0.023030636832118034, "learning_rate": 2.2084985568536334e-06, "loss": 0.0007, "step": 85170 }, { "epoch": 1.439544375248219, "grad_norm": 0.038196589797735214, "learning_rate": 2.2072751025664204e-06, "loss": 0.0009, "step": 85180 }, { "epoch": 1.4397133755270697, "grad_norm": 0.0019264441216364503, "learning_rate": 2.206051891259803e-06, "loss": 0.002, "step": 85190 }, { "epoch": 1.43988237580592, "grad_norm": 0.047808222472667694, "learning_rate": 2.204828923040209e-06, "loss": 0.0007, "step": 85200 }, { "epoch": 1.4400513760847704, "grad_norm": 0.05627620220184326, "learning_rate": 2.203606198014041e-06, "loss": 0.0006, "step": 85210 }, { "epoch": 1.440220376363621, "grad_norm": 0.0782071202993393, "learning_rate": 2.202383716287684e-06, "loss": 0.0007, "step": 85220 }, { "epoch": 1.4403893766424716, "grad_norm": 0.2014874815940857, "learning_rate": 2.201161477967496e-06, "loss": 0.0006, "step": 85230 }, { "epoch": 1.440558376921322, "grad_norm": 0.04733043909072876, "learning_rate": 2.1999394831598225e-06, "loss": 0.0009, "step": 85240 }, { "epoch": 1.4407273772001723, "grad_norm": 0.09516479820013046, "learning_rate": 2.198717731970979e-06, "loss": 0.0005, "step": 85250 }, { "epoch": 1.4408963774790229, "grad_norm": 0.006495791953057051, "learning_rate": 2.197496224507265e-06, "loss": 0.0004, "step": 85260 }, { "epoch": 1.4410653777578732, "grad_norm": 0.05586622655391693, "learning_rate": 2.1962749608749595e-06, "loss": 0.0008, "step": 85270 }, { "epoch": 1.4412343780367238, "grad_norm": 0.057371292263269424, "learning_rate": 2.1950539411803156e-06, "loss": 0.0009, "step": 85280 }, { "epoch": 1.4414033783155742, "grad_norm": 0.06461921334266663, "learning_rate": 2.19383316552957e-06, "loss": 0.0005, "step": 85290 }, { "epoch": 1.4415723785944246, "grad_norm": 0.010810323059558868, "learning_rate": 2.1926126340289345e-06, "loss": 0.0014, "step": 85300 }, { "epoch": 1.4417413788732751, "grad_norm": 0.08385465294122696, "learning_rate": 2.191392346784601e-06, "loss": 0.0007, "step": 85310 }, { "epoch": 1.4419103791521257, "grad_norm": 0.026941556483507156, "learning_rate": 2.1901723039027417e-06, "loss": 0.0008, "step": 85320 }, { "epoch": 1.442079379430976, "grad_norm": 0.08849550038576126, "learning_rate": 2.1889525054895077e-06, "loss": 0.0006, "step": 85330 }, { "epoch": 1.4422483797098264, "grad_norm": 0.08569113165140152, "learning_rate": 2.1877329516510236e-06, "loss": 0.0007, "step": 85340 }, { "epoch": 1.442417379988677, "grad_norm": 0.01780073158442974, "learning_rate": 2.186513642493401e-06, "loss": 0.0004, "step": 85350 }, { "epoch": 1.4425863802675274, "grad_norm": 0.04532487690448761, "learning_rate": 2.1852945781227203e-06, "loss": 0.0007, "step": 85360 }, { "epoch": 1.442755380546378, "grad_norm": 0.058953043073415756, "learning_rate": 2.184075758645051e-06, "loss": 0.0009, "step": 85370 }, { "epoch": 1.4429243808252283, "grad_norm": 0.1885237693786621, "learning_rate": 2.1828571841664327e-06, "loss": 0.001, "step": 85380 }, { "epoch": 1.4430933811040787, "grad_norm": 0.020072024315595627, "learning_rate": 2.1816388547928903e-06, "loss": 0.0008, "step": 85390 }, { "epoch": 1.4432623813829293, "grad_norm": 0.010699886828660965, "learning_rate": 2.180420770630421e-06, "loss": 0.001, "step": 85400 }, { "epoch": 1.4434313816617799, "grad_norm": 0.03203423693776131, "learning_rate": 2.1792029317850077e-06, "loss": 0.0004, "step": 85410 }, { "epoch": 1.4436003819406302, "grad_norm": 0.019209813326597214, "learning_rate": 2.1779853383626043e-06, "loss": 0.0012, "step": 85420 }, { "epoch": 1.4437693822194806, "grad_norm": 0.05294565483927727, "learning_rate": 2.176767990469152e-06, "loss": 0.0008, "step": 85430 }, { "epoch": 1.4439383824983312, "grad_norm": 0.02167557179927826, "learning_rate": 2.1755508882105607e-06, "loss": 0.0007, "step": 85440 }, { "epoch": 1.4441073827771815, "grad_norm": 0.04798426106572151, "learning_rate": 2.1743340316927294e-06, "loss": 0.0012, "step": 85450 }, { "epoch": 1.4442763830560321, "grad_norm": 0.058179788291454315, "learning_rate": 2.1731174210215256e-06, "loss": 0.0006, "step": 85460 }, { "epoch": 1.4444453833348825, "grad_norm": 0.00041537615470588207, "learning_rate": 2.171901056302803e-06, "loss": 0.0006, "step": 85470 }, { "epoch": 1.4446143836137328, "grad_norm": 0.03446631133556366, "learning_rate": 2.1706849376423933e-06, "loss": 0.0006, "step": 85480 }, { "epoch": 1.4447833838925834, "grad_norm": 0.048760924488306046, "learning_rate": 2.1694690651460997e-06, "loss": 0.0008, "step": 85490 }, { "epoch": 1.444952384171434, "grad_norm": 0.01005274523049593, "learning_rate": 2.1682534389197125e-06, "loss": 0.0004, "step": 85500 }, { "epoch": 1.4451213844502844, "grad_norm": 0.001322588766925037, "learning_rate": 2.1670380590689953e-06, "loss": 0.0006, "step": 85510 }, { "epoch": 1.4452903847291347, "grad_norm": 0.045743152499198914, "learning_rate": 2.1658229256996955e-06, "loss": 0.0007, "step": 85520 }, { "epoch": 1.4454593850079853, "grad_norm": 0.06122135743498802, "learning_rate": 2.1646080389175312e-06, "loss": 0.0005, "step": 85530 }, { "epoch": 1.4456283852868357, "grad_norm": 0.03301031142473221, "learning_rate": 2.1633933988282067e-06, "loss": 0.0003, "step": 85540 }, { "epoch": 1.4457973855656863, "grad_norm": 0.029237085953354836, "learning_rate": 2.1621790055373986e-06, "loss": 0.0006, "step": 85550 }, { "epoch": 1.4459663858445366, "grad_norm": 0.06849449872970581, "learning_rate": 2.1609648591507687e-06, "loss": 0.0005, "step": 85560 }, { "epoch": 1.446135386123387, "grad_norm": 0.05567774921655655, "learning_rate": 2.159750959773949e-06, "loss": 0.0008, "step": 85570 }, { "epoch": 1.4463043864022376, "grad_norm": 0.02503601461648941, "learning_rate": 2.158537307512559e-06, "loss": 0.0009, "step": 85580 }, { "epoch": 1.4464733866810882, "grad_norm": 0.04423130676150322, "learning_rate": 2.1573239024721893e-06, "loss": 0.0006, "step": 85590 }, { "epoch": 1.4466423869599385, "grad_norm": 0.025956764817237854, "learning_rate": 2.1561107447584147e-06, "loss": 0.0008, "step": 85600 }, { "epoch": 1.4468113872387889, "grad_norm": 0.06159791350364685, "learning_rate": 2.1548978344767824e-06, "loss": 0.001, "step": 85610 }, { "epoch": 1.4469803875176395, "grad_norm": 0.1021602526307106, "learning_rate": 2.153685171732825e-06, "loss": 0.0008, "step": 85620 }, { "epoch": 1.4471493877964898, "grad_norm": 0.03043730929493904, "learning_rate": 2.152472756632047e-06, "loss": 0.0009, "step": 85630 }, { "epoch": 1.4473183880753404, "grad_norm": 0.00028773280791938305, "learning_rate": 2.151260589279937e-06, "loss": 0.001, "step": 85640 }, { "epoch": 1.4474873883541908, "grad_norm": 0.031069571152329445, "learning_rate": 2.1500486697819567e-06, "loss": 0.0007, "step": 85650 }, { "epoch": 1.4476563886330411, "grad_norm": 0.03495510667562485, "learning_rate": 2.1488369982435527e-06, "loss": 0.0007, "step": 85660 }, { "epoch": 1.4478253889118917, "grad_norm": 0.005145241506397724, "learning_rate": 2.147625574770141e-06, "loss": 0.001, "step": 85670 }, { "epoch": 1.447994389190742, "grad_norm": 0.1505383849143982, "learning_rate": 2.1464143994671257e-06, "loss": 0.0009, "step": 85680 }, { "epoch": 1.4481633894695927, "grad_norm": 0.04290211573243141, "learning_rate": 2.145203472439883e-06, "loss": 0.0009, "step": 85690 }, { "epoch": 1.448332389748443, "grad_norm": 0.05467577278614044, "learning_rate": 2.1439927937937717e-06, "loss": 0.0006, "step": 85700 }, { "epoch": 1.4485013900272936, "grad_norm": 0.04019749537110329, "learning_rate": 2.1427823636341233e-06, "loss": 0.0006, "step": 85710 }, { "epoch": 1.448670390306144, "grad_norm": 0.014146536588668823, "learning_rate": 2.141572182066255e-06, "loss": 0.0007, "step": 85720 }, { "epoch": 1.4488393905849946, "grad_norm": 0.0225143451243639, "learning_rate": 2.140362249195454e-06, "loss": 0.001, "step": 85730 }, { "epoch": 1.449008390863845, "grad_norm": 0.0570768304169178, "learning_rate": 2.1391525651269944e-06, "loss": 0.001, "step": 85740 }, { "epoch": 1.4491773911426953, "grad_norm": 0.024209171533584595, "learning_rate": 2.1379431299661215e-06, "loss": 0.0006, "step": 85750 }, { "epoch": 1.4493463914215459, "grad_norm": 0.047281306236982346, "learning_rate": 2.1367339438180625e-06, "loss": 0.0025, "step": 85760 }, { "epoch": 1.4495153917003962, "grad_norm": 0.04109838977456093, "learning_rate": 2.1355250067880252e-06, "loss": 0.0007, "step": 85770 }, { "epoch": 1.4496843919792468, "grad_norm": 0.005282451398670673, "learning_rate": 2.1343163189811896e-06, "loss": 0.0004, "step": 85780 }, { "epoch": 1.4498533922580972, "grad_norm": 0.0228683240711689, "learning_rate": 2.1331078805027206e-06, "loss": 0.0008, "step": 85790 }, { "epoch": 1.4500223925369478, "grad_norm": 0.047111328691244125, "learning_rate": 2.131899691457754e-06, "loss": 0.0012, "step": 85800 }, { "epoch": 1.4501913928157981, "grad_norm": 0.05333612114191055, "learning_rate": 2.1306917519514124e-06, "loss": 0.0007, "step": 85810 }, { "epoch": 1.4503603930946487, "grad_norm": 0.04406392201781273, "learning_rate": 2.1294840620887895e-06, "loss": 0.0013, "step": 85820 }, { "epoch": 1.450529393373499, "grad_norm": 0.027557572349905968, "learning_rate": 2.1282766219749623e-06, "loss": 0.0008, "step": 85830 }, { "epoch": 1.4506983936523494, "grad_norm": 0.012420267798006535, "learning_rate": 2.1270694317149815e-06, "loss": 0.0006, "step": 85840 }, { "epoch": 1.4508673939312, "grad_norm": 0.004347395151853561, "learning_rate": 2.125862491413881e-06, "loss": 0.0005, "step": 85850 }, { "epoch": 1.4510363942100504, "grad_norm": 0.019361618906259537, "learning_rate": 2.1246558011766676e-06, "loss": 0.0007, "step": 85860 }, { "epoch": 1.451205394488901, "grad_norm": 0.01567954197525978, "learning_rate": 2.1234493611083314e-06, "loss": 0.0007, "step": 85870 }, { "epoch": 1.4513743947677513, "grad_norm": 0.01867993175983429, "learning_rate": 2.122243171313839e-06, "loss": 0.0008, "step": 85880 }, { "epoch": 1.451543395046602, "grad_norm": 0.03657732158899307, "learning_rate": 2.1210372318981325e-06, "loss": 0.0007, "step": 85890 }, { "epoch": 1.4517123953254523, "grad_norm": 0.020141759887337685, "learning_rate": 2.1198315429661354e-06, "loss": 0.0015, "step": 85900 }, { "epoch": 1.4518813956043028, "grad_norm": 0.015410812571644783, "learning_rate": 2.1186261046227507e-06, "loss": 0.0014, "step": 85910 }, { "epoch": 1.4520503958831532, "grad_norm": 0.07391265779733658, "learning_rate": 2.1174209169728537e-06, "loss": 0.001, "step": 85920 }, { "epoch": 1.4522193961620036, "grad_norm": 0.318547785282135, "learning_rate": 2.1162159801213054e-06, "loss": 0.0006, "step": 85930 }, { "epoch": 1.4523883964408542, "grad_norm": 0.007752739824354649, "learning_rate": 2.115011294172937e-06, "loss": 0.0005, "step": 85940 }, { "epoch": 1.4525573967197045, "grad_norm": 0.07410235702991486, "learning_rate": 2.113806859232566e-06, "loss": 0.0009, "step": 85950 }, { "epoch": 1.452726396998555, "grad_norm": 0.03195637837052345, "learning_rate": 2.1126026754049804e-06, "loss": 0.0009, "step": 85960 }, { "epoch": 1.4528953972774055, "grad_norm": 0.0022371308878064156, "learning_rate": 2.1113987427949532e-06, "loss": 0.0006, "step": 85970 }, { "epoch": 1.453064397556256, "grad_norm": 0.024299968034029007, "learning_rate": 2.11019506150723e-06, "loss": 0.0019, "step": 85980 }, { "epoch": 1.4532333978351064, "grad_norm": 0.017303941771388054, "learning_rate": 2.1089916316465393e-06, "loss": 0.0008, "step": 85990 }, { "epoch": 1.453402398113957, "grad_norm": 0.02556757442653179, "learning_rate": 2.1077884533175818e-06, "loss": 0.0014, "step": 86000 }, { "epoch": 1.4535713983928074, "grad_norm": 0.09371578693389893, "learning_rate": 2.106585526625044e-06, "loss": 0.0008, "step": 86010 }, { "epoch": 1.4537403986716577, "grad_norm": 0.024883732199668884, "learning_rate": 2.1053828516735826e-06, "loss": 0.0007, "step": 86020 }, { "epoch": 1.4539093989505083, "grad_norm": 0.006892631761729717, "learning_rate": 2.1041804285678376e-06, "loss": 0.0005, "step": 86030 }, { "epoch": 1.4540783992293587, "grad_norm": 0.06672769039869308, "learning_rate": 2.102978257412428e-06, "loss": 0.0012, "step": 86040 }, { "epoch": 1.4542473995082092, "grad_norm": 0.0031189655419439077, "learning_rate": 2.101776338311944e-06, "loss": 0.0004, "step": 86050 }, { "epoch": 1.4544163997870596, "grad_norm": 0.025004854425787926, "learning_rate": 2.1005746713709614e-06, "loss": 0.0003, "step": 86060 }, { "epoch": 1.45458540006591, "grad_norm": 0.0011318206088617444, "learning_rate": 2.099373256694032e-06, "loss": 0.0018, "step": 86070 }, { "epoch": 1.4547544003447606, "grad_norm": 0.027253031730651855, "learning_rate": 2.098172094385681e-06, "loss": 0.0006, "step": 86080 }, { "epoch": 1.4549234006236111, "grad_norm": 0.013232845813035965, "learning_rate": 2.096971184550418e-06, "loss": 0.0004, "step": 86090 }, { "epoch": 1.4550924009024615, "grad_norm": 0.09382961690425873, "learning_rate": 2.0957705272927293e-06, "loss": 0.0007, "step": 86100 }, { "epoch": 1.4552614011813119, "grad_norm": 0.07172657549381256, "learning_rate": 2.0945701227170736e-06, "loss": 0.0028, "step": 86110 }, { "epoch": 1.4554304014601624, "grad_norm": 0.042001448571681976, "learning_rate": 2.0933699709278965e-06, "loss": 0.0006, "step": 86120 }, { "epoch": 1.4555994017390128, "grad_norm": 0.12435533851385117, "learning_rate": 2.0921700720296135e-06, "loss": 0.0009, "step": 86130 }, { "epoch": 1.4557684020178634, "grad_norm": 0.02972419187426567, "learning_rate": 2.090970426126624e-06, "loss": 0.0012, "step": 86140 }, { "epoch": 1.4559374022967138, "grad_norm": 0.004238105844706297, "learning_rate": 2.0897710333233008e-06, "loss": 0.0004, "step": 86150 }, { "epoch": 1.4561064025755641, "grad_norm": 0.17487910389900208, "learning_rate": 2.0885718937239995e-06, "loss": 0.0015, "step": 86160 }, { "epoch": 1.4562754028544147, "grad_norm": 0.08014755696058273, "learning_rate": 2.087373007433048e-06, "loss": 0.0005, "step": 86170 }, { "epoch": 1.4564444031332653, "grad_norm": 0.1412859410047531, "learning_rate": 2.0861743745547588e-06, "loss": 0.0008, "step": 86180 }, { "epoch": 1.4566134034121156, "grad_norm": 0.0705047994852066, "learning_rate": 2.084975995193415e-06, "loss": 0.0009, "step": 86190 }, { "epoch": 1.456782403690966, "grad_norm": 0.04238751530647278, "learning_rate": 2.0837778694532845e-06, "loss": 0.0007, "step": 86200 }, { "epoch": 1.4569514039698166, "grad_norm": 0.014505607075989246, "learning_rate": 2.082579997438608e-06, "loss": 0.0009, "step": 86210 }, { "epoch": 1.457120404248667, "grad_norm": 0.02632046863436699, "learning_rate": 2.0813823792536082e-06, "loss": 0.0005, "step": 86220 }, { "epoch": 1.4572894045275175, "grad_norm": 0.031719546765089035, "learning_rate": 2.0801850150024805e-06, "loss": 0.0006, "step": 86230 }, { "epoch": 1.457458404806368, "grad_norm": 0.015553918667137623, "learning_rate": 2.078987904789403e-06, "loss": 0.0007, "step": 86240 }, { "epoch": 1.4576274050852183, "grad_norm": 0.010944337584078312, "learning_rate": 2.0777910487185325e-06, "loss": 0.0009, "step": 86250 }, { "epoch": 1.4577964053640688, "grad_norm": 0.06787962466478348, "learning_rate": 2.076594446893998e-06, "loss": 0.0008, "step": 86260 }, { "epoch": 1.4579654056429194, "grad_norm": 0.01724882423877716, "learning_rate": 2.0753980994199103e-06, "loss": 0.0009, "step": 86270 }, { "epoch": 1.4581344059217698, "grad_norm": 0.0009835042292252183, "learning_rate": 2.0742020064003576e-06, "loss": 0.0005, "step": 86280 }, { "epoch": 1.4583034062006202, "grad_norm": 0.015828389674425125, "learning_rate": 2.0730061679394086e-06, "loss": 0.0004, "step": 86290 }, { "epoch": 1.4584724064794707, "grad_norm": 0.022186938673257828, "learning_rate": 2.071810584141103e-06, "loss": 0.0005, "step": 86300 }, { "epoch": 1.458641406758321, "grad_norm": 0.012610320001840591, "learning_rate": 2.070615255109465e-06, "loss": 0.0012, "step": 86310 }, { "epoch": 1.4588104070371717, "grad_norm": 0.05332818254828453, "learning_rate": 2.0694201809484914e-06, "loss": 0.0012, "step": 86320 }, { "epoch": 1.458979407316022, "grad_norm": 0.0006286121206358075, "learning_rate": 2.0682253617621636e-06, "loss": 0.0008, "step": 86330 }, { "epoch": 1.4591484075948724, "grad_norm": 0.028262170031666756, "learning_rate": 2.0670307976544313e-06, "loss": 0.0009, "step": 86340 }, { "epoch": 1.459317407873723, "grad_norm": 0.03445601835846901, "learning_rate": 2.0658364887292327e-06, "loss": 0.0008, "step": 86350 }, { "epoch": 1.4594864081525736, "grad_norm": 0.023881226778030396, "learning_rate": 2.0646424350904737e-06, "loss": 0.0006, "step": 86360 }, { "epoch": 1.459655408431424, "grad_norm": 0.02888556756079197, "learning_rate": 2.063448636842047e-06, "loss": 0.001, "step": 86370 }, { "epoch": 1.4598244087102743, "grad_norm": 0.04445100948214531, "learning_rate": 2.062255094087815e-06, "loss": 0.0006, "step": 86380 }, { "epoch": 1.4599934089891249, "grad_norm": 0.049608681350946426, "learning_rate": 2.061061806931625e-06, "loss": 0.0006, "step": 86390 }, { "epoch": 1.4601624092679752, "grad_norm": 0.03627485781908035, "learning_rate": 2.0598687754772957e-06, "loss": 0.0006, "step": 86400 }, { "epoch": 1.4603314095468258, "grad_norm": 0.0006325527792796493, "learning_rate": 2.05867599982863e-06, "loss": 0.0006, "step": 86410 }, { "epoch": 1.4605004098256762, "grad_norm": 0.07899534702301025, "learning_rate": 2.0574834800894018e-06, "loss": 0.001, "step": 86420 }, { "epoch": 1.4606694101045266, "grad_norm": 0.06240850314497948, "learning_rate": 2.056291216363369e-06, "loss": 0.0018, "step": 86430 }, { "epoch": 1.4608384103833771, "grad_norm": 0.02459152415394783, "learning_rate": 2.0550992087542614e-06, "loss": 0.0009, "step": 86440 }, { "epoch": 1.4610074106622277, "grad_norm": 0.06909990310668945, "learning_rate": 2.053907457365791e-06, "loss": 0.0005, "step": 86450 }, { "epoch": 1.461176410941078, "grad_norm": 0.029086565598845482, "learning_rate": 2.0527159623016457e-06, "loss": 0.0003, "step": 86460 }, { "epoch": 1.4613454112199284, "grad_norm": 0.043895281851291656, "learning_rate": 2.051524723665494e-06, "loss": 0.0004, "step": 86470 }, { "epoch": 1.461514411498779, "grad_norm": 0.09322455525398254, "learning_rate": 2.0503337415609747e-06, "loss": 0.001, "step": 86480 }, { "epoch": 1.4616834117776294, "grad_norm": 0.008567149750888348, "learning_rate": 2.0491430160917135e-06, "loss": 0.001, "step": 86490 }, { "epoch": 1.46185241205648, "grad_norm": 0.02848733961582184, "learning_rate": 2.047952547361305e-06, "loss": 0.0004, "step": 86500 }, { "epoch": 1.4620214123353303, "grad_norm": 0.03597874194383621, "learning_rate": 2.04676233547333e-06, "loss": 0.001, "step": 86510 }, { "epoch": 1.4621904126141807, "grad_norm": 0.11492574959993362, "learning_rate": 2.045572380531339e-06, "loss": 0.0011, "step": 86520 }, { "epoch": 1.4623594128930313, "grad_norm": 0.18648619949817657, "learning_rate": 2.0443826826388648e-06, "loss": 0.0006, "step": 86530 }, { "epoch": 1.4625284131718819, "grad_norm": 0.06965423375368118, "learning_rate": 2.0431932418994195e-06, "loss": 0.0014, "step": 86540 }, { "epoch": 1.4626974134507322, "grad_norm": 0.02029821090400219, "learning_rate": 2.0420040584164863e-06, "loss": 0.0012, "step": 86550 }, { "epoch": 1.4628664137295826, "grad_norm": 0.1096748486161232, "learning_rate": 2.0408151322935336e-06, "loss": 0.0027, "step": 86560 }, { "epoch": 1.4630354140084332, "grad_norm": 0.10865205526351929, "learning_rate": 2.0396264636340007e-06, "loss": 0.0006, "step": 86570 }, { "epoch": 1.4632044142872835, "grad_norm": 0.06341142952442169, "learning_rate": 2.0384380525413106e-06, "loss": 0.0005, "step": 86580 }, { "epoch": 1.4633734145661341, "grad_norm": 0.023144427686929703, "learning_rate": 2.0372498991188565e-06, "loss": 0.0009, "step": 86590 }, { "epoch": 1.4635424148449845, "grad_norm": 0.03163832426071167, "learning_rate": 2.0360620034700184e-06, "loss": 0.0013, "step": 86600 }, { "epoch": 1.4637114151238348, "grad_norm": 0.011350251734256744, "learning_rate": 2.0348743656981447e-06, "loss": 0.0006, "step": 86610 }, { "epoch": 1.4638804154026854, "grad_norm": 0.0069978744722902775, "learning_rate": 2.0336869859065694e-06, "loss": 0.0005, "step": 86620 }, { "epoch": 1.4640494156815358, "grad_norm": 0.03031867742538452, "learning_rate": 2.0324998641985966e-06, "loss": 0.0005, "step": 86630 }, { "epoch": 1.4642184159603864, "grad_norm": 0.0002373101160628721, "learning_rate": 2.0313130006775134e-06, "loss": 0.0005, "step": 86640 }, { "epoch": 1.4643874162392367, "grad_norm": 0.06348241865634918, "learning_rate": 2.030126395446583e-06, "loss": 0.0011, "step": 86650 }, { "epoch": 1.4645564165180873, "grad_norm": 0.05334573611617088, "learning_rate": 2.0289400486090465e-06, "loss": 0.0004, "step": 86660 }, { "epoch": 1.4647254167969377, "grad_norm": 0.02373746782541275, "learning_rate": 2.02775396026812e-06, "loss": 0.0012, "step": 86670 }, { "epoch": 1.4648944170757883, "grad_norm": 0.017289143055677414, "learning_rate": 2.0265681305270015e-06, "loss": 0.0011, "step": 86680 }, { "epoch": 1.4650634173546386, "grad_norm": 0.04153512045741081, "learning_rate": 2.0253825594888605e-06, "loss": 0.0006, "step": 86690 }, { "epoch": 1.465232417633489, "grad_norm": 0.03110233135521412, "learning_rate": 2.0241972472568504e-06, "loss": 0.0005, "step": 86700 }, { "epoch": 1.4654014179123396, "grad_norm": 0.00017331923299934715, "learning_rate": 2.0230121939340965e-06, "loss": 0.0009, "step": 86710 }, { "epoch": 1.46557041819119, "grad_norm": 0.10718990862369537, "learning_rate": 2.0218273996237075e-06, "loss": 0.0012, "step": 86720 }, { "epoch": 1.4657394184700405, "grad_norm": 0.03949207440018654, "learning_rate": 2.0206428644287624e-06, "loss": 0.0013, "step": 86730 }, { "epoch": 1.4659084187488909, "grad_norm": 0.03155754134058952, "learning_rate": 2.019458588452325e-06, "loss": 0.001, "step": 86740 }, { "epoch": 1.4660774190277415, "grad_norm": 0.026663416996598244, "learning_rate": 2.01827457179743e-06, "loss": 0.0009, "step": 86750 }, { "epoch": 1.4662464193065918, "grad_norm": 0.06034409627318382, "learning_rate": 2.0170908145670954e-06, "loss": 0.0009, "step": 86760 }, { "epoch": 1.4664154195854424, "grad_norm": 0.02038990706205368, "learning_rate": 2.015907316864311e-06, "loss": 0.0013, "step": 86770 }, { "epoch": 1.4665844198642928, "grad_norm": 0.017077386379241943, "learning_rate": 2.01472407879205e-06, "loss": 0.0007, "step": 86780 }, { "epoch": 1.4667534201431431, "grad_norm": 0.08096768707036972, "learning_rate": 2.013541100453256e-06, "loss": 0.0007, "step": 86790 }, { "epoch": 1.4669224204219937, "grad_norm": 0.011129752732813358, "learning_rate": 2.012358381950857e-06, "loss": 0.0012, "step": 86800 }, { "epoch": 1.467091420700844, "grad_norm": 0.02629377320408821, "learning_rate": 2.0111759233877555e-06, "loss": 0.0005, "step": 86810 }, { "epoch": 1.4672604209796947, "grad_norm": 0.0009242766536772251, "learning_rate": 2.0099937248668276e-06, "loss": 0.0006, "step": 86820 }, { "epoch": 1.467429421258545, "grad_norm": 0.02693902887403965, "learning_rate": 2.008811786490933e-06, "loss": 0.0009, "step": 86830 }, { "epoch": 1.4675984215373956, "grad_norm": 0.02461072988808155, "learning_rate": 2.0076301083629075e-06, "loss": 0.0008, "step": 86840 }, { "epoch": 1.467767421816246, "grad_norm": 0.008230219595134258, "learning_rate": 2.0064486905855583e-06, "loss": 0.0007, "step": 86850 }, { "epoch": 1.4679364220950966, "grad_norm": 0.020722288638353348, "learning_rate": 2.0052675332616785e-06, "loss": 0.0003, "step": 86860 }, { "epoch": 1.468105422373947, "grad_norm": 0.013037968426942825, "learning_rate": 2.0040866364940335e-06, "loss": 0.0005, "step": 86870 }, { "epoch": 1.4682744226527973, "grad_norm": 0.029389988631010056, "learning_rate": 2.0029060003853658e-06, "loss": 0.001, "step": 86880 }, { "epoch": 1.4684434229316479, "grad_norm": 0.0019620771054178476, "learning_rate": 2.001725625038399e-06, "loss": 0.0009, "step": 86890 }, { "epoch": 1.4686124232104982, "grad_norm": 0.023910041898489, "learning_rate": 2.0005455105558275e-06, "loss": 0.0012, "step": 86900 }, { "epoch": 1.4687814234893488, "grad_norm": 0.11860333383083344, "learning_rate": 1.999365657040331e-06, "loss": 0.0004, "step": 86910 }, { "epoch": 1.4689504237681992, "grad_norm": 0.08853831887245178, "learning_rate": 1.99818606459456e-06, "loss": 0.0008, "step": 86920 }, { "epoch": 1.4691194240470498, "grad_norm": 0.04686124622821808, "learning_rate": 1.997006733321147e-06, "loss": 0.0007, "step": 86930 }, { "epoch": 1.4692884243259001, "grad_norm": 0.020372433587908745, "learning_rate": 1.995827663322695e-06, "loss": 0.0007, "step": 86940 }, { "epoch": 1.4694574246047507, "grad_norm": 0.03424699977040291, "learning_rate": 1.994648854701795e-06, "loss": 0.0006, "step": 86950 }, { "epoch": 1.469626424883601, "grad_norm": 0.016948726028203964, "learning_rate": 1.9934703075610035e-06, "loss": 0.001, "step": 86960 }, { "epoch": 1.4697954251624514, "grad_norm": 0.02187652513384819, "learning_rate": 1.9922920220028643e-06, "loss": 0.0011, "step": 86970 }, { "epoch": 1.469964425441302, "grad_norm": 0.05699220299720764, "learning_rate": 1.99111399812989e-06, "loss": 0.0007, "step": 86980 }, { "epoch": 1.4701334257201524, "grad_norm": 0.005953615996986628, "learning_rate": 1.989936236044578e-06, "loss": 0.0012, "step": 86990 }, { "epoch": 1.470302425999003, "grad_norm": 0.009984872303903103, "learning_rate": 1.9887587358493956e-06, "loss": 0.0006, "step": 87000 }, { "epoch": 1.4704714262778533, "grad_norm": 0.012570555321872234, "learning_rate": 1.9875814976467935e-06, "loss": 0.0007, "step": 87010 }, { "epoch": 1.4706404265567037, "grad_norm": 0.03216434642672539, "learning_rate": 1.9864045215391974e-06, "loss": 0.0008, "step": 87020 }, { "epoch": 1.4708094268355543, "grad_norm": 0.025994719937443733, "learning_rate": 1.985227807629008e-06, "loss": 0.0006, "step": 87030 }, { "epoch": 1.4709784271144049, "grad_norm": 0.011998817324638367, "learning_rate": 1.9840513560186063e-06, "loss": 0.0006, "step": 87040 }, { "epoch": 1.4711474273932552, "grad_norm": 0.01960444636642933, "learning_rate": 1.9828751668103492e-06, "loss": 0.0029, "step": 87050 }, { "epoch": 1.4713164276721056, "grad_norm": 0.03137729689478874, "learning_rate": 1.9816992401065726e-06, "loss": 0.0007, "step": 87060 }, { "epoch": 1.4714854279509562, "grad_norm": 0.04449846222996712, "learning_rate": 1.980523576009584e-06, "loss": 0.0004, "step": 87070 }, { "epoch": 1.4716544282298065, "grad_norm": 0.04958295449614525, "learning_rate": 1.9793481746216763e-06, "loss": 0.001, "step": 87080 }, { "epoch": 1.471823428508657, "grad_norm": 0.053153663873672485, "learning_rate": 1.9781730360451113e-06, "loss": 0.0006, "step": 87090 }, { "epoch": 1.4719924287875075, "grad_norm": 0.010244421660900116, "learning_rate": 1.9769981603821342e-06, "loss": 0.0012, "step": 87100 }, { "epoch": 1.4721614290663578, "grad_norm": 0.04046174883842468, "learning_rate": 1.9758235477349626e-06, "loss": 0.0005, "step": 87110 }, { "epoch": 1.4723304293452084, "grad_norm": 0.010600178502500057, "learning_rate": 1.9746491982057965e-06, "loss": 0.0009, "step": 87120 }, { "epoch": 1.472499429624059, "grad_norm": 0.212949737906456, "learning_rate": 1.9734751118968066e-06, "loss": 0.0023, "step": 87130 }, { "epoch": 1.4726684299029094, "grad_norm": 0.05141410976648331, "learning_rate": 1.9723012889101477e-06, "loss": 0.0005, "step": 87140 }, { "epoch": 1.4728374301817597, "grad_norm": 0.028595589101314545, "learning_rate": 1.9711277293479444e-06, "loss": 0.0004, "step": 87150 }, { "epoch": 1.4730064304606103, "grad_norm": 0.06812410801649094, "learning_rate": 1.9699544333123057e-06, "loss": 0.0008, "step": 87160 }, { "epoch": 1.4731754307394607, "grad_norm": 0.06886996328830719, "learning_rate": 1.9687814009053096e-06, "loss": 0.0021, "step": 87170 }, { "epoch": 1.4733444310183113, "grad_norm": 0.0342499203979969, "learning_rate": 1.967608632229021e-06, "loss": 0.0004, "step": 87180 }, { "epoch": 1.4735134312971616, "grad_norm": 0.055144939571619034, "learning_rate": 1.9664361273854714e-06, "loss": 0.0009, "step": 87190 }, { "epoch": 1.473682431576012, "grad_norm": 0.07175352424383163, "learning_rate": 1.9652638864766772e-06, "loss": 0.0006, "step": 87200 }, { "epoch": 1.4738514318548626, "grad_norm": 0.0303585696965456, "learning_rate": 1.9640919096046296e-06, "loss": 0.0009, "step": 87210 }, { "epoch": 1.4740204321337131, "grad_norm": 0.0030788208823651075, "learning_rate": 1.9629201968712935e-06, "loss": 0.0009, "step": 87220 }, { "epoch": 1.4741894324125635, "grad_norm": 0.04283014312386513, "learning_rate": 1.9617487483786157e-06, "loss": 0.001, "step": 87230 }, { "epoch": 1.4743584326914139, "grad_norm": 0.026105936616659164, "learning_rate": 1.960577564228519e-06, "loss": 0.0007, "step": 87240 }, { "epoch": 1.4745274329702645, "grad_norm": 0.04074949771165848, "learning_rate": 1.9594066445228986e-06, "loss": 0.0013, "step": 87250 }, { "epoch": 1.4746964332491148, "grad_norm": 0.02833901159465313, "learning_rate": 1.9582359893636344e-06, "loss": 0.0007, "step": 87260 }, { "epoch": 1.4748654335279654, "grad_norm": 0.06579770147800446, "learning_rate": 1.9570655988525745e-06, "loss": 0.0011, "step": 87270 }, { "epoch": 1.4750344338068158, "grad_norm": 0.00045693680294789374, "learning_rate": 1.955895473091553e-06, "loss": 0.001, "step": 87280 }, { "epoch": 1.4752034340856661, "grad_norm": 0.004687410779297352, "learning_rate": 1.954725612182372e-06, "loss": 0.0004, "step": 87290 }, { "epoch": 1.4753724343645167, "grad_norm": 0.016979416832327843, "learning_rate": 1.953556016226818e-06, "loss": 0.0006, "step": 87300 }, { "epoch": 1.4755414346433673, "grad_norm": 0.042390208691358566, "learning_rate": 1.9523866853266528e-06, "loss": 0.0009, "step": 87310 }, { "epoch": 1.4757104349222177, "grad_norm": 0.01581060327589512, "learning_rate": 1.95121761958361e-06, "loss": 0.0014, "step": 87320 }, { "epoch": 1.475879435201068, "grad_norm": 0.011384384706616402, "learning_rate": 1.9500488190994075e-06, "loss": 0.0011, "step": 87330 }, { "epoch": 1.4760484354799186, "grad_norm": 0.04996991902589798, "learning_rate": 1.9488802839757335e-06, "loss": 0.0007, "step": 87340 }, { "epoch": 1.476217435758769, "grad_norm": 0.026351723819971085, "learning_rate": 1.9477120143142604e-06, "loss": 0.001, "step": 87350 }, { "epoch": 1.4763864360376195, "grad_norm": 0.002755114808678627, "learning_rate": 1.946544010216628e-06, "loss": 0.0009, "step": 87360 }, { "epoch": 1.47655543631647, "grad_norm": 0.02086162567138672, "learning_rate": 1.945376271784463e-06, "loss": 0.0005, "step": 87370 }, { "epoch": 1.4767244365953203, "grad_norm": 0.02455678954720497, "learning_rate": 1.9442087991193607e-06, "loss": 0.0013, "step": 87380 }, { "epoch": 1.4768934368741709, "grad_norm": 0.04566321521997452, "learning_rate": 1.9430415923229e-06, "loss": 0.0017, "step": 87390 }, { "epoch": 1.4770624371530214, "grad_norm": 0.050864171236753464, "learning_rate": 1.9418746514966307e-06, "loss": 0.0014, "step": 87400 }, { "epoch": 1.4772314374318718, "grad_norm": 0.042852431535720825, "learning_rate": 1.940707976742084e-06, "loss": 0.0006, "step": 87410 }, { "epoch": 1.4774004377107222, "grad_norm": 0.044308166950941086, "learning_rate": 1.939541568160765e-06, "loss": 0.0012, "step": 87420 }, { "epoch": 1.4775694379895727, "grad_norm": 0.02402975969016552, "learning_rate": 1.9383754258541604e-06, "loss": 0.0005, "step": 87430 }, { "epoch": 1.477738438268423, "grad_norm": 0.06683152168989182, "learning_rate": 1.9372095499237255e-06, "loss": 0.0009, "step": 87440 }, { "epoch": 1.4779074385472737, "grad_norm": 0.030294183641672134, "learning_rate": 1.9360439404709007e-06, "loss": 0.0006, "step": 87450 }, { "epoch": 1.478076438826124, "grad_norm": 0.033787600696086884, "learning_rate": 1.9348785975970973e-06, "loss": 0.0005, "step": 87460 }, { "epoch": 1.4782454391049744, "grad_norm": 0.027894029393792152, "learning_rate": 1.933713521403708e-06, "loss": 0.0008, "step": 87470 }, { "epoch": 1.478414439383825, "grad_norm": 0.022178735584020615, "learning_rate": 1.932548711992097e-06, "loss": 0.0005, "step": 87480 }, { "epoch": 1.4785834396626756, "grad_norm": 0.08248352259397507, "learning_rate": 1.931384169463613e-06, "loss": 0.0009, "step": 87490 }, { "epoch": 1.478752439941526, "grad_norm": 0.0213492251932621, "learning_rate": 1.930219893919571e-06, "loss": 0.0009, "step": 87500 }, { "epoch": 1.4789214402203763, "grad_norm": 0.021869516000151634, "learning_rate": 1.929055885461274e-06, "loss": 0.0005, "step": 87510 }, { "epoch": 1.4790904404992269, "grad_norm": 0.00892605260014534, "learning_rate": 1.927892144189992e-06, "loss": 0.0002, "step": 87520 }, { "epoch": 1.4792594407780773, "grad_norm": 0.04024768993258476, "learning_rate": 1.9267286702069803e-06, "loss": 0.0006, "step": 87530 }, { "epoch": 1.4794284410569278, "grad_norm": 0.08087485283613205, "learning_rate": 1.9255654636134628e-06, "loss": 0.0014, "step": 87540 }, { "epoch": 1.4795974413357782, "grad_norm": 0.030795546248555183, "learning_rate": 1.9244025245106457e-06, "loss": 0.0004, "step": 87550 }, { "epoch": 1.4797664416146286, "grad_norm": 0.10120958834886551, "learning_rate": 1.9232398529997125e-06, "loss": 0.0009, "step": 87560 }, { "epoch": 1.4799354418934791, "grad_norm": 0.09195226430892944, "learning_rate": 1.9220774491818178e-06, "loss": 0.001, "step": 87570 }, { "epoch": 1.4801044421723295, "grad_norm": 0.03049328923225403, "learning_rate": 1.920915313158099e-06, "loss": 0.0005, "step": 87580 }, { "epoch": 1.48027344245118, "grad_norm": 0.03200387582182884, "learning_rate": 1.919753445029665e-06, "loss": 0.002, "step": 87590 }, { "epoch": 1.4804424427300305, "grad_norm": 0.09777234494686127, "learning_rate": 1.918591844897606e-06, "loss": 0.001, "step": 87600 }, { "epoch": 1.480611443008881, "grad_norm": 0.009541748091578484, "learning_rate": 1.917430512862987e-06, "loss": 0.0008, "step": 87610 }, { "epoch": 1.4807804432877314, "grad_norm": 0.019821155816316605, "learning_rate": 1.9162694490268478e-06, "loss": 0.0007, "step": 87620 }, { "epoch": 1.480949443566582, "grad_norm": 0.015410860069096088, "learning_rate": 1.9151086534902077e-06, "loss": 0.0008, "step": 87630 }, { "epoch": 1.4811184438454323, "grad_norm": 0.06320604681968689, "learning_rate": 1.9139481263540626e-06, "loss": 0.0005, "step": 87640 }, { "epoch": 1.4812874441242827, "grad_norm": 0.030254274606704712, "learning_rate": 1.912787867719381e-06, "loss": 0.0011, "step": 87650 }, { "epoch": 1.4814564444031333, "grad_norm": 0.06914281845092773, "learning_rate": 1.911627877687115e-06, "loss": 0.0008, "step": 87660 }, { "epoch": 1.4816254446819836, "grad_norm": 0.009485420770943165, "learning_rate": 1.910468156358185e-06, "loss": 0.001, "step": 87670 }, { "epoch": 1.4817944449608342, "grad_norm": 0.023814881220459938, "learning_rate": 1.909308703833496e-06, "loss": 0.0011, "step": 87680 }, { "epoch": 1.4819634452396846, "grad_norm": 0.0059132324531674385, "learning_rate": 1.9081495202139233e-06, "loss": 0.0006, "step": 87690 }, { "epoch": 1.4821324455185352, "grad_norm": 0.0694541186094284, "learning_rate": 1.9069906056003245e-06, "loss": 0.0008, "step": 87700 }, { "epoch": 1.4823014457973855, "grad_norm": 0.055805131793022156, "learning_rate": 1.9058319600935272e-06, "loss": 0.0005, "step": 87710 }, { "epoch": 1.4824704460762361, "grad_norm": 0.04586062207818031, "learning_rate": 1.9046735837943426e-06, "loss": 0.0007, "step": 87720 }, { "epoch": 1.4826394463550865, "grad_norm": 0.029063422232866287, "learning_rate": 1.9035154768035512e-06, "loss": 0.0006, "step": 87730 }, { "epoch": 1.4828084466339368, "grad_norm": 0.009174218401312828, "learning_rate": 1.9023576392219184e-06, "loss": 0.0012, "step": 87740 }, { "epoch": 1.4829774469127874, "grad_norm": 0.09952740371227264, "learning_rate": 1.9012000711501777e-06, "loss": 0.0017, "step": 87750 }, { "epoch": 1.4831464471916378, "grad_norm": 0.020948514342308044, "learning_rate": 1.9000427726890464e-06, "loss": 0.0011, "step": 87760 }, { "epoch": 1.4833154474704884, "grad_norm": 0.1276942640542984, "learning_rate": 1.8988857439392117e-06, "loss": 0.001, "step": 87770 }, { "epoch": 1.4834844477493387, "grad_norm": 0.05418195202946663, "learning_rate": 1.897728985001343e-06, "loss": 0.0009, "step": 87780 }, { "epoch": 1.4836534480281893, "grad_norm": 0.044336918741464615, "learning_rate": 1.896572495976083e-06, "loss": 0.0014, "step": 87790 }, { "epoch": 1.4838224483070397, "grad_norm": 0.08884186297655106, "learning_rate": 1.8954162769640544e-06, "loss": 0.001, "step": 87800 }, { "epoch": 1.4839914485858903, "grad_norm": 0.005170523654669523, "learning_rate": 1.8942603280658495e-06, "loss": 0.0007, "step": 87810 }, { "epoch": 1.4841604488647406, "grad_norm": 0.008166944608092308, "learning_rate": 1.8931046493820438e-06, "loss": 0.0005, "step": 87820 }, { "epoch": 1.484329449143591, "grad_norm": 0.12038251757621765, "learning_rate": 1.891949241013189e-06, "loss": 0.0013, "step": 87830 }, { "epoch": 1.4844984494224416, "grad_norm": 0.17388568818569183, "learning_rate": 1.890794103059807e-06, "loss": 0.0009, "step": 87840 }, { "epoch": 1.484667449701292, "grad_norm": 0.0022980275098234415, "learning_rate": 1.889639235622404e-06, "loss": 0.0004, "step": 87850 }, { "epoch": 1.4848364499801425, "grad_norm": 0.0297295693308115, "learning_rate": 1.8884846388014566e-06, "loss": 0.0007, "step": 87860 }, { "epoch": 1.4850054502589929, "grad_norm": 0.012024343013763428, "learning_rate": 1.8873303126974223e-06, "loss": 0.0005, "step": 87870 }, { "epoch": 1.4851744505378432, "grad_norm": 0.08143452554941177, "learning_rate": 1.8861762574107307e-06, "loss": 0.0007, "step": 87880 }, { "epoch": 1.4853434508166938, "grad_norm": 0.00044382974738255143, "learning_rate": 1.8850224730417933e-06, "loss": 0.0005, "step": 87890 }, { "epoch": 1.4855124510955444, "grad_norm": 0.0237219650298357, "learning_rate": 1.8838689596909909e-06, "loss": 0.0007, "step": 87900 }, { "epoch": 1.4856814513743948, "grad_norm": 0.021469522267580032, "learning_rate": 1.882715717458689e-06, "loss": 0.0005, "step": 87910 }, { "epoch": 1.4858504516532451, "grad_norm": 0.03975404426455498, "learning_rate": 1.8815627464452218e-06, "loss": 0.0008, "step": 87920 }, { "epoch": 1.4860194519320957, "grad_norm": 0.24657444655895233, "learning_rate": 1.8804100467509062e-06, "loss": 0.0007, "step": 87930 }, { "epoch": 1.486188452210946, "grad_norm": 0.02665461227297783, "learning_rate": 1.8792576184760297e-06, "loss": 0.0008, "step": 87940 }, { "epoch": 1.4863574524897967, "grad_norm": 0.033536460250616074, "learning_rate": 1.8781054617208628e-06, "loss": 0.0007, "step": 87950 }, { "epoch": 1.486526452768647, "grad_norm": 0.05046175792813301, "learning_rate": 1.8769535765856444e-06, "loss": 0.0011, "step": 87960 }, { "epoch": 1.4866954530474974, "grad_norm": 0.01105108205229044, "learning_rate": 1.8758019631705964e-06, "loss": 0.0005, "step": 87970 }, { "epoch": 1.486864453326348, "grad_norm": 0.022933317348361015, "learning_rate": 1.874650621575917e-06, "loss": 0.0008, "step": 87980 }, { "epoch": 1.4870334536051986, "grad_norm": 0.07909981161355972, "learning_rate": 1.873499551901774e-06, "loss": 0.0012, "step": 87990 }, { "epoch": 1.487202453884049, "grad_norm": 0.025490691885352135, "learning_rate": 1.8723487542483182e-06, "loss": 0.0003, "step": 88000 }, { "epoch": 1.4873714541628993, "grad_norm": 0.023210370913147926, "learning_rate": 1.8711982287156767e-06, "loss": 0.0003, "step": 88010 }, { "epoch": 1.4875404544417499, "grad_norm": 0.05981844663619995, "learning_rate": 1.8700479754039464e-06, "loss": 0.0008, "step": 88020 }, { "epoch": 1.4877094547206002, "grad_norm": 0.09941408038139343, "learning_rate": 1.868897994413209e-06, "loss": 0.0018, "step": 88030 }, { "epoch": 1.4878784549994508, "grad_norm": 0.085413359105587, "learning_rate": 1.8677482858435153e-06, "loss": 0.0012, "step": 88040 }, { "epoch": 1.4880474552783012, "grad_norm": 0.017946895211935043, "learning_rate": 1.8665988497948983e-06, "loss": 0.0009, "step": 88050 }, { "epoch": 1.4882164555571515, "grad_norm": 0.022523336112499237, "learning_rate": 1.8654496863673616e-06, "loss": 0.0015, "step": 88060 }, { "epoch": 1.4883854558360021, "grad_norm": 0.03276972100138664, "learning_rate": 1.8643007956608893e-06, "loss": 0.0009, "step": 88070 }, { "epoch": 1.4885544561148527, "grad_norm": 0.1450221985578537, "learning_rate": 1.8631521777754418e-06, "loss": 0.0016, "step": 88080 }, { "epoch": 1.488723456393703, "grad_norm": 0.042774394154548645, "learning_rate": 1.8620038328109524e-06, "loss": 0.0008, "step": 88090 }, { "epoch": 1.4888924566725534, "grad_norm": 0.009172528050839901, "learning_rate": 1.8608557608673345e-06, "loss": 0.0006, "step": 88100 }, { "epoch": 1.489061456951404, "grad_norm": 0.0414559468626976, "learning_rate": 1.8597079620444735e-06, "loss": 0.0006, "step": 88110 }, { "epoch": 1.4892304572302544, "grad_norm": 0.10334787517786026, "learning_rate": 1.8585604364422367e-06, "loss": 0.0006, "step": 88120 }, { "epoch": 1.489399457509105, "grad_norm": 0.058103326708078384, "learning_rate": 1.8574131841604604e-06, "loss": 0.0008, "step": 88130 }, { "epoch": 1.4895684577879553, "grad_norm": 0.04806658625602722, "learning_rate": 1.8562662052989656e-06, "loss": 0.0018, "step": 88140 }, { "epoch": 1.4897374580668057, "grad_norm": 0.02748355269432068, "learning_rate": 1.8551194999575406e-06, "loss": 0.0006, "step": 88150 }, { "epoch": 1.4899064583456563, "grad_norm": 0.08941474556922913, "learning_rate": 1.853973068235958e-06, "loss": 0.002, "step": 88160 }, { "epoch": 1.4900754586245069, "grad_norm": 0.10284332931041718, "learning_rate": 1.8528269102339597e-06, "loss": 0.0007, "step": 88170 }, { "epoch": 1.4902444589033572, "grad_norm": 0.05748404935002327, "learning_rate": 1.8516810260512686e-06, "loss": 0.0004, "step": 88180 }, { "epoch": 1.4904134591822076, "grad_norm": 0.006811431143432856, "learning_rate": 1.8505354157875822e-06, "loss": 0.001, "step": 88190 }, { "epoch": 1.4905824594610582, "grad_norm": 0.04848619922995567, "learning_rate": 1.8493900795425756e-06, "loss": 0.001, "step": 88200 }, { "epoch": 1.4907514597399085, "grad_norm": 0.021616630256175995, "learning_rate": 1.8482450174158956e-06, "loss": 0.0007, "step": 88210 }, { "epoch": 1.490920460018759, "grad_norm": 0.010953355580568314, "learning_rate": 1.8471002295071715e-06, "loss": 0.0007, "step": 88220 }, { "epoch": 1.4910894602976095, "grad_norm": 0.005469362251460552, "learning_rate": 1.845955715916002e-06, "loss": 0.0011, "step": 88230 }, { "epoch": 1.4912584605764598, "grad_norm": 0.09093499928712845, "learning_rate": 1.8448114767419683e-06, "loss": 0.0005, "step": 88240 }, { "epoch": 1.4914274608553104, "grad_norm": 0.06009407341480255, "learning_rate": 1.843667512084622e-06, "loss": 0.0008, "step": 88250 }, { "epoch": 1.491596461134161, "grad_norm": 0.01995786651968956, "learning_rate": 1.8425238220434966e-06, "loss": 0.001, "step": 88260 }, { "epoch": 1.4917654614130114, "grad_norm": 0.01962241530418396, "learning_rate": 1.8413804067180952e-06, "loss": 0.0006, "step": 88270 }, { "epoch": 1.4919344616918617, "grad_norm": 0.00016943324590101838, "learning_rate": 1.8402372662079039e-06, "loss": 0.0011, "step": 88280 }, { "epoch": 1.4921034619707123, "grad_norm": 0.27835261821746826, "learning_rate": 1.8390944006123785e-06, "loss": 0.0023, "step": 88290 }, { "epoch": 1.4922724622495627, "grad_norm": 0.020108027383685112, "learning_rate": 1.8379518100309562e-06, "loss": 0.0005, "step": 88300 }, { "epoch": 1.4924414625284133, "grad_norm": 0.0005495472578331828, "learning_rate": 1.8368094945630455e-06, "loss": 0.0005, "step": 88310 }, { "epoch": 1.4926104628072636, "grad_norm": 0.0009330344619229436, "learning_rate": 1.835667454308035e-06, "loss": 0.0022, "step": 88320 }, { "epoch": 1.492779463086114, "grad_norm": 0.006676590535789728, "learning_rate": 1.834525689365289e-06, "loss": 0.0005, "step": 88330 }, { "epoch": 1.4929484633649646, "grad_norm": 0.09464714676141739, "learning_rate": 1.8333841998341435e-06, "loss": 0.0007, "step": 88340 }, { "epoch": 1.4931174636438151, "grad_norm": 0.035130809992551804, "learning_rate": 1.832242985813917e-06, "loss": 0.0004, "step": 88350 }, { "epoch": 1.4932864639226655, "grad_norm": 0.023531250655651093, "learning_rate": 1.8311020474038971e-06, "loss": 0.0006, "step": 88360 }, { "epoch": 1.4934554642015159, "grad_norm": 0.0664057582616806, "learning_rate": 1.8299613847033526e-06, "loss": 0.0012, "step": 88370 }, { "epoch": 1.4936244644803665, "grad_norm": 0.050617605447769165, "learning_rate": 1.8288209978115268e-06, "loss": 0.0015, "step": 88380 }, { "epoch": 1.4937934647592168, "grad_norm": 0.05996793136000633, "learning_rate": 1.8276808868276408e-06, "loss": 0.0007, "step": 88390 }, { "epoch": 1.4939624650380674, "grad_norm": 0.13003170490264893, "learning_rate": 1.8265410518508865e-06, "loss": 0.0005, "step": 88400 }, { "epoch": 1.4941314653169178, "grad_norm": 0.028135791420936584, "learning_rate": 1.8254014929804375e-06, "loss": 0.0008, "step": 88410 }, { "epoch": 1.4943004655957681, "grad_norm": 0.14190877974033356, "learning_rate": 1.8242622103154384e-06, "loss": 0.0004, "step": 88420 }, { "epoch": 1.4944694658746187, "grad_norm": 0.029621532186865807, "learning_rate": 1.8231232039550156e-06, "loss": 0.0005, "step": 88430 }, { "epoch": 1.4946384661534693, "grad_norm": 0.0512235090136528, "learning_rate": 1.8219844739982652e-06, "loss": 0.0007, "step": 88440 }, { "epoch": 1.4948074664323197, "grad_norm": 0.08601278066635132, "learning_rate": 1.820846020544264e-06, "loss": 0.0008, "step": 88450 }, { "epoch": 1.49497646671117, "grad_norm": 0.03690945357084274, "learning_rate": 1.8197078436920612e-06, "loss": 0.0007, "step": 88460 }, { "epoch": 1.4951454669900206, "grad_norm": 0.09859257936477661, "learning_rate": 1.8185699435406867e-06, "loss": 0.001, "step": 88470 }, { "epoch": 1.495314467268871, "grad_norm": 0.09783705323934555, "learning_rate": 1.8174323201891398e-06, "loss": 0.001, "step": 88480 }, { "epoch": 1.4954834675477215, "grad_norm": 0.024148667231202126, "learning_rate": 1.8162949737364028e-06, "loss": 0.0006, "step": 88490 }, { "epoch": 1.495652467826572, "grad_norm": 0.04207601770758629, "learning_rate": 1.8151579042814267e-06, "loss": 0.0008, "step": 88500 }, { "epoch": 1.4958214681054223, "grad_norm": 0.07525702565908432, "learning_rate": 1.814021111923145e-06, "loss": 0.0008, "step": 88510 }, { "epoch": 1.4959904683842729, "grad_norm": 0.01988234929740429, "learning_rate": 1.812884596760462e-06, "loss": 0.0005, "step": 88520 }, { "epoch": 1.4961594686631232, "grad_norm": 0.009409152902662754, "learning_rate": 1.8117483588922618e-06, "loss": 0.0014, "step": 88530 }, { "epoch": 1.4963284689419738, "grad_norm": 0.017130011692643166, "learning_rate": 1.8106123984174006e-06, "loss": 0.0005, "step": 88540 }, { "epoch": 1.4964974692208242, "grad_norm": 0.02408965677022934, "learning_rate": 1.8094767154347142e-06, "loss": 0.0006, "step": 88550 }, { "epoch": 1.4966664694996747, "grad_norm": 0.005401544738560915, "learning_rate": 1.8083413100430113e-06, "loss": 0.0009, "step": 88560 }, { "epoch": 1.496835469778525, "grad_norm": 0.017198331654071808, "learning_rate": 1.8072061823410803e-06, "loss": 0.0011, "step": 88570 }, { "epoch": 1.4970044700573757, "grad_norm": 0.013952374458312988, "learning_rate": 1.8060713324276792e-06, "loss": 0.0009, "step": 88580 }, { "epoch": 1.497173470336226, "grad_norm": 0.03441685065627098, "learning_rate": 1.8049367604015472e-06, "loss": 0.0005, "step": 88590 }, { "epoch": 1.4973424706150764, "grad_norm": 0.00039839776582084596, "learning_rate": 1.803802466361399e-06, "loss": 0.0018, "step": 88600 }, { "epoch": 1.497511470893927, "grad_norm": 0.004353836644440889, "learning_rate": 1.8026684504059204e-06, "loss": 0.0007, "step": 88610 }, { "epoch": 1.4976804711727774, "grad_norm": 0.02817024476826191, "learning_rate": 1.80153471263378e-06, "loss": 0.0007, "step": 88620 }, { "epoch": 1.497849471451628, "grad_norm": 0.024944987148046494, "learning_rate": 1.800401253143615e-06, "loss": 0.0004, "step": 88630 }, { "epoch": 1.4980184717304783, "grad_norm": 0.04219798743724823, "learning_rate": 1.7992680720340449e-06, "loss": 0.0006, "step": 88640 }, { "epoch": 1.498187472009329, "grad_norm": 0.05956093594431877, "learning_rate": 1.7981351694036586e-06, "loss": 0.0012, "step": 88650 }, { "epoch": 1.4983564722881793, "grad_norm": 0.04532230645418167, "learning_rate": 1.7970025453510275e-06, "loss": 0.0007, "step": 88660 }, { "epoch": 1.4985254725670298, "grad_norm": 0.06564140319824219, "learning_rate": 1.7958701999746925e-06, "loss": 0.0009, "step": 88670 }, { "epoch": 1.4986944728458802, "grad_norm": 0.027097081765532494, "learning_rate": 1.7947381333731761e-06, "loss": 0.0005, "step": 88680 }, { "epoch": 1.4988634731247306, "grad_norm": 0.18646438419818878, "learning_rate": 1.7936063456449698e-06, "loss": 0.0021, "step": 88690 }, { "epoch": 1.4990324734035811, "grad_norm": 0.0645003616809845, "learning_rate": 1.7924748368885492e-06, "loss": 0.0009, "step": 88700 }, { "epoch": 1.4992014736824315, "grad_norm": 0.05661391094326973, "learning_rate": 1.7913436072023566e-06, "loss": 0.0004, "step": 88710 }, { "epoch": 1.499370473961282, "grad_norm": 0.027739612385630608, "learning_rate": 1.7902126566848177e-06, "loss": 0.0024, "step": 88720 }, { "epoch": 1.4995394742401325, "grad_norm": 0.03819940239191055, "learning_rate": 1.7890819854343284e-06, "loss": 0.0006, "step": 88730 }, { "epoch": 1.499708474518983, "grad_norm": 0.06162050738930702, "learning_rate": 1.787951593549263e-06, "loss": 0.0008, "step": 88740 }, { "epoch": 1.4998774747978334, "grad_norm": 0.021486075595021248, "learning_rate": 1.7868214811279738e-06, "loss": 0.0008, "step": 88750 }, { "epoch": 1.500046475076684, "grad_norm": 0.025850096717476845, "learning_rate": 1.7856916482687825e-06, "loss": 0.0005, "step": 88760 }, { "epoch": 1.5002154753555343, "grad_norm": 0.05704595521092415, "learning_rate": 1.784562095069991e-06, "loss": 0.0004, "step": 88770 }, { "epoch": 1.5003844756343847, "grad_norm": 0.00453208526596427, "learning_rate": 1.7834328216298786e-06, "loss": 0.0003, "step": 88780 }, { "epoch": 1.5005534759132353, "grad_norm": 0.012295718304812908, "learning_rate": 1.782303828046693e-06, "loss": 0.0008, "step": 88790 }, { "epoch": 1.5007224761920859, "grad_norm": 0.06527142971754074, "learning_rate": 1.7811751144186667e-06, "loss": 0.0011, "step": 88800 }, { "epoch": 1.5008914764709362, "grad_norm": 0.010464944876730442, "learning_rate": 1.7800466808439993e-06, "loss": 0.0003, "step": 88810 }, { "epoch": 1.5010604767497866, "grad_norm": 0.05920897051692009, "learning_rate": 1.778918527420873e-06, "loss": 0.0004, "step": 88820 }, { "epoch": 1.501229477028637, "grad_norm": 0.07279383391141891, "learning_rate": 1.7777906542474398e-06, "loss": 0.0005, "step": 88830 }, { "epoch": 1.5013984773074875, "grad_norm": 0.025966499000787735, "learning_rate": 1.7766630614218317e-06, "loss": 0.001, "step": 88840 }, { "epoch": 1.5015674775863381, "grad_norm": 0.0517917238175869, "learning_rate": 1.775535749042156e-06, "loss": 0.0007, "step": 88850 }, { "epoch": 1.5017364778651885, "grad_norm": 0.0029979932587593794, "learning_rate": 1.7744087172064917e-06, "loss": 0.0005, "step": 88860 }, { "epoch": 1.5019054781440389, "grad_norm": 0.02118530496954918, "learning_rate": 1.773281966012899e-06, "loss": 0.0006, "step": 88870 }, { "epoch": 1.5020744784228894, "grad_norm": 0.0917520821094513, "learning_rate": 1.7721554955594072e-06, "loss": 0.0015, "step": 88880 }, { "epoch": 1.50224347870174, "grad_norm": 0.024326149374246597, "learning_rate": 1.7710293059440286e-06, "loss": 0.0005, "step": 88890 }, { "epoch": 1.5024124789805904, "grad_norm": 0.006080344319343567, "learning_rate": 1.7699033972647435e-06, "loss": 0.0007, "step": 88900 }, { "epoch": 1.5025814792594407, "grad_norm": 0.00796883087605238, "learning_rate": 1.768777769619514e-06, "loss": 0.0006, "step": 88910 }, { "epoch": 1.502750479538291, "grad_norm": 0.060068417340517044, "learning_rate": 1.7676524231062731e-06, "loss": 0.0008, "step": 88920 }, { "epoch": 1.5029194798171417, "grad_norm": 0.04129534587264061, "learning_rate": 1.7665273578229342e-06, "loss": 0.0005, "step": 88930 }, { "epoch": 1.5030884800959923, "grad_norm": 0.027377134189009666, "learning_rate": 1.7654025738673797e-06, "loss": 0.0006, "step": 88940 }, { "epoch": 1.5032574803748426, "grad_norm": 0.01775474287569523, "learning_rate": 1.7642780713374736e-06, "loss": 0.001, "step": 88950 }, { "epoch": 1.503426480653693, "grad_norm": 0.03469599410891533, "learning_rate": 1.7631538503310524e-06, "loss": 0.0008, "step": 88960 }, { "epoch": 1.5035954809325436, "grad_norm": 0.001071075676009059, "learning_rate": 1.7620299109459305e-06, "loss": 0.0006, "step": 88970 }, { "epoch": 1.503764481211394, "grad_norm": 0.006017435807734728, "learning_rate": 1.7609062532798936e-06, "loss": 0.0015, "step": 88980 }, { "epoch": 1.5039334814902445, "grad_norm": 0.26868242025375366, "learning_rate": 1.759782877430708e-06, "loss": 0.0008, "step": 88990 }, { "epoch": 1.504102481769095, "grad_norm": 0.0289106834679842, "learning_rate": 1.7586597834961094e-06, "loss": 0.0012, "step": 89000 }, { "epoch": 1.5042714820479453, "grad_norm": 0.04162640869617462, "learning_rate": 1.757536971573816e-06, "loss": 0.001, "step": 89010 }, { "epoch": 1.5044404823267958, "grad_norm": 0.06132710352540016, "learning_rate": 1.7564144417615148e-06, "loss": 0.0024, "step": 89020 }, { "epoch": 1.5046094826056464, "grad_norm": 0.015400673262774944, "learning_rate": 1.7552921941568741e-06, "loss": 0.0008, "step": 89030 }, { "epoch": 1.5047784828844968, "grad_norm": 0.018372181802988052, "learning_rate": 1.7541702288575314e-06, "loss": 0.0012, "step": 89040 }, { "epoch": 1.5049474831633471, "grad_norm": 0.08104084432125092, "learning_rate": 1.7530485459611073e-06, "loss": 0.0008, "step": 89050 }, { "epoch": 1.5051164834421977, "grad_norm": 0.01839877851307392, "learning_rate": 1.7519271455651897e-06, "loss": 0.0002, "step": 89060 }, { "epoch": 1.505285483721048, "grad_norm": 0.025520803406834602, "learning_rate": 1.75080602776735e-06, "loss": 0.0005, "step": 89070 }, { "epoch": 1.5054544839998987, "grad_norm": 0.07787895202636719, "learning_rate": 1.7496851926651265e-06, "loss": 0.002, "step": 89080 }, { "epoch": 1.505623484278749, "grad_norm": 0.04697718843817711, "learning_rate": 1.7485646403560392e-06, "loss": 0.0009, "step": 89090 }, { "epoch": 1.5057924845575994, "grad_norm": 0.009050512686371803, "learning_rate": 1.7474443709375837e-06, "loss": 0.0005, "step": 89100 }, { "epoch": 1.50596148483645, "grad_norm": 0.03001599945127964, "learning_rate": 1.7463243845072248e-06, "loss": 0.0011, "step": 89110 }, { "epoch": 1.5061304851153006, "grad_norm": 0.1008617952466011, "learning_rate": 1.7452046811624113e-06, "loss": 0.001, "step": 89120 }, { "epoch": 1.506299485394151, "grad_norm": 0.036419469863176346, "learning_rate": 1.7440852610005582e-06, "loss": 0.0007, "step": 89130 }, { "epoch": 1.5064684856730013, "grad_norm": 0.03142918273806572, "learning_rate": 1.7429661241190626e-06, "loss": 0.0006, "step": 89140 }, { "epoch": 1.5066374859518519, "grad_norm": 0.030899101868271828, "learning_rate": 1.741847270615295e-06, "loss": 0.0005, "step": 89150 }, { "epoch": 1.5068064862307022, "grad_norm": 0.057271748781204224, "learning_rate": 1.7407287005866025e-06, "loss": 0.0007, "step": 89160 }, { "epoch": 1.5069754865095528, "grad_norm": 0.05494721606373787, "learning_rate": 1.7396104141303032e-06, "loss": 0.0007, "step": 89170 }, { "epoch": 1.5071444867884032, "grad_norm": 0.018767060711979866, "learning_rate": 1.7384924113436964e-06, "loss": 0.0006, "step": 89180 }, { "epoch": 1.5073134870672535, "grad_norm": 0.0328710712492466, "learning_rate": 1.7373746923240502e-06, "loss": 0.0005, "step": 89190 }, { "epoch": 1.5074824873461041, "grad_norm": 0.04069463163614273, "learning_rate": 1.736257257168616e-06, "loss": 0.0006, "step": 89200 }, { "epoch": 1.5076514876249547, "grad_norm": 0.07939145714044571, "learning_rate": 1.7351401059746114e-06, "loss": 0.0008, "step": 89210 }, { "epoch": 1.507820487903805, "grad_norm": 0.0318235382437706, "learning_rate": 1.7340232388392375e-06, "loss": 0.0004, "step": 89220 }, { "epoch": 1.5079894881826554, "grad_norm": 0.08584918081760406, "learning_rate": 1.7329066558596646e-06, "loss": 0.0008, "step": 89230 }, { "epoch": 1.5081584884615058, "grad_norm": 0.038147006183862686, "learning_rate": 1.731790357133044e-06, "loss": 0.0007, "step": 89240 }, { "epoch": 1.5083274887403564, "grad_norm": 0.016866449266672134, "learning_rate": 1.7306743427564954e-06, "loss": 0.0014, "step": 89250 }, { "epoch": 1.508496489019207, "grad_norm": 0.051926031708717346, "learning_rate": 1.7295586128271208e-06, "loss": 0.0005, "step": 89260 }, { "epoch": 1.5086654892980573, "grad_norm": 0.0010678662220016122, "learning_rate": 1.7284431674419905e-06, "loss": 0.0006, "step": 89270 }, { "epoch": 1.5088344895769077, "grad_norm": 0.12304358929395676, "learning_rate": 1.727328006698158e-06, "loss": 0.001, "step": 89280 }, { "epoch": 1.5090034898557583, "grad_norm": 0.06094713136553764, "learning_rate": 1.7262131306926433e-06, "loss": 0.0005, "step": 89290 }, { "epoch": 1.5091724901346089, "grad_norm": 0.05747349560260773, "learning_rate": 1.7250985395224496e-06, "loss": 0.0006, "step": 89300 }, { "epoch": 1.5093414904134592, "grad_norm": 0.051456887274980545, "learning_rate": 1.7239842332845487e-06, "loss": 0.0006, "step": 89310 }, { "epoch": 1.5095104906923096, "grad_norm": 0.013854267075657845, "learning_rate": 1.7228702120758922e-06, "loss": 0.0014, "step": 89320 }, { "epoch": 1.50967949097116, "grad_norm": 0.0752004012465477, "learning_rate": 1.7217564759934053e-06, "loss": 0.0005, "step": 89330 }, { "epoch": 1.5098484912500105, "grad_norm": 0.04393812641501427, "learning_rate": 1.7206430251339907e-06, "loss": 0.0006, "step": 89340 }, { "epoch": 1.5100174915288611, "grad_norm": 0.0351971797645092, "learning_rate": 1.7195298595945193e-06, "loss": 0.0009, "step": 89350 }, { "epoch": 1.5101864918077115, "grad_norm": 0.03731498122215271, "learning_rate": 1.7184169794718448e-06, "loss": 0.0006, "step": 89360 }, { "epoch": 1.5103554920865618, "grad_norm": 0.15052951872348785, "learning_rate": 1.717304384862794e-06, "loss": 0.0016, "step": 89370 }, { "epoch": 1.5105244923654124, "grad_norm": 0.008420226164162159, "learning_rate": 1.7161920758641654e-06, "loss": 0.0027, "step": 89380 }, { "epoch": 1.510693492644263, "grad_norm": 0.031745459884405136, "learning_rate": 1.715080052572738e-06, "loss": 0.0003, "step": 89390 }, { "epoch": 1.5108624929231134, "grad_norm": 0.02988767810165882, "learning_rate": 1.7139683150852604e-06, "loss": 0.0005, "step": 89400 }, { "epoch": 1.5110314932019637, "grad_norm": 0.07824013382196426, "learning_rate": 1.7128568634984622e-06, "loss": 0.0007, "step": 89410 }, { "epoch": 1.511200493480814, "grad_norm": 0.08616514503955841, "learning_rate": 1.7117456979090418e-06, "loss": 0.0008, "step": 89420 }, { "epoch": 1.5113694937596647, "grad_norm": 0.25163033604621887, "learning_rate": 1.7106348184136789e-06, "loss": 0.0006, "step": 89430 }, { "epoch": 1.5115384940385153, "grad_norm": 0.010954326018691063, "learning_rate": 1.7095242251090233e-06, "loss": 0.001, "step": 89440 }, { "epoch": 1.5117074943173656, "grad_norm": 0.007738580461591482, "learning_rate": 1.7084139180917037e-06, "loss": 0.0007, "step": 89450 }, { "epoch": 1.511876494596216, "grad_norm": 0.04122927039861679, "learning_rate": 1.70730389745832e-06, "loss": 0.0011, "step": 89460 }, { "epoch": 1.5120454948750666, "grad_norm": 0.09648521989583969, "learning_rate": 1.7061941633054523e-06, "loss": 0.0007, "step": 89470 }, { "epoch": 1.5122144951539171, "grad_norm": 0.05812167376279831, "learning_rate": 1.7050847157296496e-06, "loss": 0.0011, "step": 89480 }, { "epoch": 1.5123834954327675, "grad_norm": 0.06150609627366066, "learning_rate": 1.7039755548274427e-06, "loss": 0.0006, "step": 89490 }, { "epoch": 1.5125524957116179, "grad_norm": 0.013848185539245605, "learning_rate": 1.7028666806953303e-06, "loss": 0.0007, "step": 89500 }, { "epoch": 1.5127214959904682, "grad_norm": 0.012053185142576694, "learning_rate": 1.7017580934297918e-06, "loss": 0.0002, "step": 89510 }, { "epoch": 1.5128904962693188, "grad_norm": 0.054048582911491394, "learning_rate": 1.7006497931272798e-06, "loss": 0.001, "step": 89520 }, { "epoch": 1.5130594965481694, "grad_norm": 0.09194289892911911, "learning_rate": 1.6995417798842228e-06, "loss": 0.0008, "step": 89530 }, { "epoch": 1.5132284968270198, "grad_norm": 0.010368667542934418, "learning_rate": 1.6984340537970202e-06, "loss": 0.0003, "step": 89540 }, { "epoch": 1.5133974971058701, "grad_norm": 0.012516893446445465, "learning_rate": 1.6973266149620533e-06, "loss": 0.0007, "step": 89550 }, { "epoch": 1.5135664973847207, "grad_norm": 0.004726815968751907, "learning_rate": 1.6962194634756708e-06, "loss": 0.0008, "step": 89560 }, { "epoch": 1.5137354976635713, "grad_norm": 0.054614678025245667, "learning_rate": 1.6951125994342043e-06, "loss": 0.0012, "step": 89570 }, { "epoch": 1.5139044979424217, "grad_norm": 0.046499382704496384, "learning_rate": 1.6940060229339521e-06, "loss": 0.0007, "step": 89580 }, { "epoch": 1.514073498221272, "grad_norm": 0.035863835364580154, "learning_rate": 1.692899734071194e-06, "loss": 0.0008, "step": 89590 }, { "epoch": 1.5142424985001224, "grad_norm": 0.01402368862181902, "learning_rate": 1.6917937329421842e-06, "loss": 0.0009, "step": 89600 }, { "epoch": 1.514411498778973, "grad_norm": 0.0271760281175375, "learning_rate": 1.6906880196431458e-06, "loss": 0.001, "step": 89610 }, { "epoch": 1.5145804990578235, "grad_norm": 0.016171742230653763, "learning_rate": 1.689582594270286e-06, "loss": 0.001, "step": 89620 }, { "epoch": 1.514749499336674, "grad_norm": 0.2035505771636963, "learning_rate": 1.6884774569197782e-06, "loss": 0.0008, "step": 89630 }, { "epoch": 1.5149184996155243, "grad_norm": 0.06680934876203537, "learning_rate": 1.6873726076877778e-06, "loss": 0.0007, "step": 89640 }, { "epoch": 1.5150874998943749, "grad_norm": 0.02429303154349327, "learning_rate": 1.686268046670409e-06, "loss": 0.0012, "step": 89650 }, { "epoch": 1.5152565001732254, "grad_norm": 0.006075267214328051, "learning_rate": 1.6851637739637771e-06, "loss": 0.0011, "step": 89660 }, { "epoch": 1.5154255004520758, "grad_norm": 0.05170568823814392, "learning_rate": 1.684059789663956e-06, "loss": 0.0004, "step": 89670 }, { "epoch": 1.5155945007309262, "grad_norm": 0.06817694008350372, "learning_rate": 1.6829560938670008e-06, "loss": 0.001, "step": 89680 }, { "epoch": 1.5157635010097765, "grad_norm": 0.06747003644704819, "learning_rate": 1.6818526866689356e-06, "loss": 0.0006, "step": 89690 }, { "epoch": 1.515932501288627, "grad_norm": 0.007273124530911446, "learning_rate": 1.6807495681657632e-06, "loss": 0.001, "step": 89700 }, { "epoch": 1.5161015015674777, "grad_norm": 0.02065047062933445, "learning_rate": 1.679646738453462e-06, "loss": 0.0007, "step": 89710 }, { "epoch": 1.516270501846328, "grad_norm": 0.06045887991786003, "learning_rate": 1.678544197627981e-06, "loss": 0.0008, "step": 89720 }, { "epoch": 1.5164395021251784, "grad_norm": 0.040009673684835434, "learning_rate": 1.677441945785247e-06, "loss": 0.0012, "step": 89730 }, { "epoch": 1.516608502404029, "grad_norm": 0.8757259249687195, "learning_rate": 1.676339983021164e-06, "loss": 0.001, "step": 89740 }, { "epoch": 1.5167775026828796, "grad_norm": 0.03649216145277023, "learning_rate": 1.6752383094316038e-06, "loss": 0.001, "step": 89750 }, { "epoch": 1.51694650296173, "grad_norm": 0.03585437312722206, "learning_rate": 1.6741369251124218e-06, "loss": 0.0006, "step": 89760 }, { "epoch": 1.5171155032405803, "grad_norm": 0.18319280445575714, "learning_rate": 1.6730358301594396e-06, "loss": 0.0002, "step": 89770 }, { "epoch": 1.5172845035194307, "grad_norm": 0.08437007665634155, "learning_rate": 1.6719350246684613e-06, "loss": 0.0008, "step": 89780 }, { "epoch": 1.5174535037982813, "grad_norm": 0.021380001679062843, "learning_rate": 1.6708345087352589e-06, "loss": 0.0013, "step": 89790 }, { "epoch": 1.5176225040771318, "grad_norm": 0.074330635368824, "learning_rate": 1.6697342824555861e-06, "loss": 0.0008, "step": 89800 }, { "epoch": 1.5177915043559822, "grad_norm": 0.011674449779093266, "learning_rate": 1.6686343459251647e-06, "loss": 0.0007, "step": 89810 }, { "epoch": 1.5179605046348326, "grad_norm": 0.016544923186302185, "learning_rate": 1.6675346992396973e-06, "loss": 0.0003, "step": 89820 }, { "epoch": 1.5181295049136831, "grad_norm": 0.08490558713674545, "learning_rate": 1.6664353424948558e-06, "loss": 0.0013, "step": 89830 }, { "epoch": 1.5182985051925337, "grad_norm": 0.1269432008266449, "learning_rate": 1.6653362757862928e-06, "loss": 0.0011, "step": 89840 }, { "epoch": 1.518467505471384, "grad_norm": 0.07166081666946411, "learning_rate": 1.6642374992096295e-06, "loss": 0.0006, "step": 89850 }, { "epoch": 1.5186365057502345, "grad_norm": 0.10532987862825394, "learning_rate": 1.6631390128604653e-06, "loss": 0.0011, "step": 89860 }, { "epoch": 1.5188055060290848, "grad_norm": 0.05109262093901634, "learning_rate": 1.662040816834376e-06, "loss": 0.0006, "step": 89870 }, { "epoch": 1.5189745063079354, "grad_norm": 0.014516928233206272, "learning_rate": 1.6609429112269072e-06, "loss": 0.0008, "step": 89880 }, { "epoch": 1.519143506586786, "grad_norm": 0.017849545925855637, "learning_rate": 1.6598452961335843e-06, "loss": 0.0008, "step": 89890 }, { "epoch": 1.5193125068656363, "grad_norm": 0.11236193776130676, "learning_rate": 1.6587479716499027e-06, "loss": 0.0006, "step": 89900 }, { "epoch": 1.5194815071444867, "grad_norm": 0.07865837961435318, "learning_rate": 1.6576509378713363e-06, "loss": 0.0007, "step": 89910 }, { "epoch": 1.5196505074233373, "grad_norm": 0.07344283908605576, "learning_rate": 1.6565541948933322e-06, "loss": 0.001, "step": 89920 }, { "epoch": 1.5198195077021877, "grad_norm": 0.11414451152086258, "learning_rate": 1.6554577428113145e-06, "loss": 0.0005, "step": 89930 }, { "epoch": 1.5199885079810382, "grad_norm": 0.0004753537359647453, "learning_rate": 1.654361581720676e-06, "loss": 0.0003, "step": 89940 }, { "epoch": 1.5201575082598886, "grad_norm": 0.017375130206346512, "learning_rate": 1.6532657117167916e-06, "loss": 0.001, "step": 89950 }, { "epoch": 1.520326508538739, "grad_norm": 0.037377480417490005, "learning_rate": 1.6521701328950041e-06, "loss": 0.0007, "step": 89960 }, { "epoch": 1.5204955088175895, "grad_norm": 0.0739019438624382, "learning_rate": 1.651074845350637e-06, "loss": 0.0008, "step": 89970 }, { "epoch": 1.5206645090964401, "grad_norm": 0.0003539329918567091, "learning_rate": 1.6499798491789827e-06, "loss": 0.0007, "step": 89980 }, { "epoch": 1.5208335093752905, "grad_norm": 0.00020151161879766732, "learning_rate": 1.6488851444753146e-06, "loss": 0.0005, "step": 89990 }, { "epoch": 1.5210025096541409, "grad_norm": 0.022759245708584785, "learning_rate": 1.6477907313348735e-06, "loss": 0.0016, "step": 90000 }, { "epoch": 1.5211715099329914, "grad_norm": 0.006832859478890896, "learning_rate": 1.6466966098528825e-06, "loss": 0.0006, "step": 90010 }, { "epoch": 1.5213405102118418, "grad_norm": 0.03370920941233635, "learning_rate": 1.6456027801245317e-06, "loss": 0.0007, "step": 90020 }, { "epoch": 1.5215095104906924, "grad_norm": 0.009634205140173435, "learning_rate": 1.6445092422449932e-06, "loss": 0.0008, "step": 90030 }, { "epoch": 1.5216785107695427, "grad_norm": 0.03818223625421524, "learning_rate": 1.6434159963094065e-06, "loss": 0.0008, "step": 90040 }, { "epoch": 1.521847511048393, "grad_norm": 0.01654481515288353, "learning_rate": 1.6423230424128933e-06, "loss": 0.0008, "step": 90050 }, { "epoch": 1.5220165113272437, "grad_norm": 0.04384077340364456, "learning_rate": 1.6412303806505414e-06, "loss": 0.0008, "step": 90060 }, { "epoch": 1.5221855116060943, "grad_norm": 0.010644759051501751, "learning_rate": 1.640138011117422e-06, "loss": 0.0005, "step": 90070 }, { "epoch": 1.5223545118849446, "grad_norm": 0.07942457497119904, "learning_rate": 1.6390459339085728e-06, "loss": 0.0008, "step": 90080 }, { "epoch": 1.522523512163795, "grad_norm": 0.14543570578098297, "learning_rate": 1.637954149119012e-06, "loss": 0.0008, "step": 90090 }, { "epoch": 1.5226925124426456, "grad_norm": 0.0862889289855957, "learning_rate": 1.636862656843729e-06, "loss": 0.0008, "step": 90100 }, { "epoch": 1.522861512721496, "grad_norm": 0.0043488903902471066, "learning_rate": 1.6357714571776901e-06, "loss": 0.0016, "step": 90110 }, { "epoch": 1.5230305130003465, "grad_norm": 0.02066611312329769, "learning_rate": 1.6346805502158364e-06, "loss": 0.0007, "step": 90120 }, { "epoch": 1.523199513279197, "grad_norm": 0.0023197117261588573, "learning_rate": 1.6335899360530783e-06, "loss": 0.0008, "step": 90130 }, { "epoch": 1.5233685135580473, "grad_norm": 0.009627950377762318, "learning_rate": 1.6324996147843075e-06, "loss": 0.0005, "step": 90140 }, { "epoch": 1.5235375138368978, "grad_norm": 0.01624545268714428, "learning_rate": 1.631409586504385e-06, "loss": 0.0005, "step": 90150 }, { "epoch": 1.5237065141157484, "grad_norm": 0.004025209695100784, "learning_rate": 1.6303198513081514e-06, "loss": 0.0005, "step": 90160 }, { "epoch": 1.5238755143945988, "grad_norm": 0.034432973712682724, "learning_rate": 1.6292304092904155e-06, "loss": 0.0014, "step": 90170 }, { "epoch": 1.5240445146734491, "grad_norm": 0.0053732809610664845, "learning_rate": 1.6281412605459668e-06, "loss": 0.0005, "step": 90180 }, { "epoch": 1.5242135149522995, "grad_norm": 0.10048115253448486, "learning_rate": 1.6270524051695647e-06, "loss": 0.0008, "step": 90190 }, { "epoch": 1.52438251523115, "grad_norm": 0.07562459260225296, "learning_rate": 1.6259638432559467e-06, "loss": 0.0009, "step": 90200 }, { "epoch": 1.5245515155100007, "grad_norm": 0.03330732509493828, "learning_rate": 1.6248755748998203e-06, "loss": 0.0007, "step": 90210 }, { "epoch": 1.524720515788851, "grad_norm": 0.016535423696041107, "learning_rate": 1.6237876001958735e-06, "loss": 0.0003, "step": 90220 }, { "epoch": 1.5248895160677014, "grad_norm": 0.08232079446315765, "learning_rate": 1.622699919238762e-06, "loss": 0.0006, "step": 90230 }, { "epoch": 1.525058516346552, "grad_norm": 0.006675524637103081, "learning_rate": 1.621612532123123e-06, "loss": 0.0008, "step": 90240 }, { "epoch": 1.5252275166254026, "grad_norm": 0.009124464355409145, "learning_rate": 1.62052543894356e-06, "loss": 0.0005, "step": 90250 }, { "epoch": 1.525396516904253, "grad_norm": 0.0359950065612793, "learning_rate": 1.6194386397946598e-06, "loss": 0.0005, "step": 90260 }, { "epoch": 1.5255655171831033, "grad_norm": 0.03500552102923393, "learning_rate": 1.6183521347709752e-06, "loss": 0.0006, "step": 90270 }, { "epoch": 1.5257345174619537, "grad_norm": 0.0056722103618085384, "learning_rate": 1.6172659239670396e-06, "loss": 0.0002, "step": 90280 }, { "epoch": 1.5259035177408042, "grad_norm": 0.014110148884356022, "learning_rate": 1.6161800074773582e-06, "loss": 0.0003, "step": 90290 }, { "epoch": 1.5260725180196548, "grad_norm": 0.042396847158670425, "learning_rate": 1.615094385396413e-06, "loss": 0.0011, "step": 90300 }, { "epoch": 1.5262415182985052, "grad_norm": 0.0611516535282135, "learning_rate": 1.6140090578186546e-06, "loss": 0.0015, "step": 90310 }, { "epoch": 1.5264105185773555, "grad_norm": 0.059691231697797775, "learning_rate": 1.6129240248385153e-06, "loss": 0.0012, "step": 90320 }, { "epoch": 1.5265795188562061, "grad_norm": 0.03298520669341087, "learning_rate": 1.6118392865503952e-06, "loss": 0.0008, "step": 90330 }, { "epoch": 1.5267485191350567, "grad_norm": 0.09905547648668289, "learning_rate": 1.6107548430486746e-06, "loss": 0.0015, "step": 90340 }, { "epoch": 1.526917519413907, "grad_norm": 0.05093686282634735, "learning_rate": 1.6096706944277023e-06, "loss": 0.0008, "step": 90350 }, { "epoch": 1.5270865196927574, "grad_norm": 0.022593246772885323, "learning_rate": 1.6085868407818062e-06, "loss": 0.0006, "step": 90360 }, { "epoch": 1.5272555199716078, "grad_norm": 0.000559734005946666, "learning_rate": 1.6075032822052883e-06, "loss": 0.0015, "step": 90370 }, { "epoch": 1.5274245202504584, "grad_norm": 0.036225344985723495, "learning_rate": 1.6064200187924207e-06, "loss": 0.0011, "step": 90380 }, { "epoch": 1.527593520529309, "grad_norm": 0.022825142368674278, "learning_rate": 1.605337050637455e-06, "loss": 0.001, "step": 90390 }, { "epoch": 1.5277625208081593, "grad_norm": 0.06424136459827423, "learning_rate": 1.6042543778346115e-06, "loss": 0.0008, "step": 90400 }, { "epoch": 1.5279315210870097, "grad_norm": 0.005746909417212009, "learning_rate": 1.6031720004780921e-06, "loss": 0.0003, "step": 90410 }, { "epoch": 1.5281005213658603, "grad_norm": 0.017361126840114594, "learning_rate": 1.6020899186620642e-06, "loss": 0.0006, "step": 90420 }, { "epoch": 1.5282695216447109, "grad_norm": 0.09066200256347656, "learning_rate": 1.6010081324806792e-06, "loss": 0.0004, "step": 90430 }, { "epoch": 1.5284385219235612, "grad_norm": 0.048238012939691544, "learning_rate": 1.5999266420280529e-06, "loss": 0.001, "step": 90440 }, { "epoch": 1.5286075222024116, "grad_norm": 0.05118170753121376, "learning_rate": 1.5988454473982845e-06, "loss": 0.0009, "step": 90450 }, { "epoch": 1.528776522481262, "grad_norm": 0.017097176983952522, "learning_rate": 1.5977645486854393e-06, "loss": 0.0012, "step": 90460 }, { "epoch": 1.5289455227601125, "grad_norm": 0.1016089916229248, "learning_rate": 1.5966839459835626e-06, "loss": 0.0009, "step": 90470 }, { "epoch": 1.5291145230389631, "grad_norm": 0.0010697973193600774, "learning_rate": 1.5956036393866737e-06, "loss": 0.0008, "step": 90480 }, { "epoch": 1.5292835233178135, "grad_norm": 0.02048424817621708, "learning_rate": 1.5945236289887611e-06, "loss": 0.0006, "step": 90490 }, { "epoch": 1.5294525235966638, "grad_norm": 0.0900135487318039, "learning_rate": 1.5934439148837926e-06, "loss": 0.001, "step": 90500 }, { "epoch": 1.5296215238755144, "grad_norm": 0.03776565194129944, "learning_rate": 1.592364497165711e-06, "loss": 0.0009, "step": 90510 }, { "epoch": 1.529790524154365, "grad_norm": 0.1263919621706009, "learning_rate": 1.5912853759284264e-06, "loss": 0.0008, "step": 90520 }, { "epoch": 1.5299595244332154, "grad_norm": 0.009383470751345158, "learning_rate": 1.590206551265831e-06, "loss": 0.0004, "step": 90530 }, { "epoch": 1.5301285247120657, "grad_norm": 0.048516057431697845, "learning_rate": 1.5891280232717854e-06, "loss": 0.0006, "step": 90540 }, { "epoch": 1.530297524990916, "grad_norm": 0.174342080950737, "learning_rate": 1.588049792040129e-06, "loss": 0.0013, "step": 90550 }, { "epoch": 1.5304665252697667, "grad_norm": 0.012062018737196922, "learning_rate": 1.5869718576646702e-06, "loss": 0.0005, "step": 90560 }, { "epoch": 1.5306355255486173, "grad_norm": 0.0026453419122844934, "learning_rate": 1.5858942202391986e-06, "loss": 0.0008, "step": 90570 }, { "epoch": 1.5308045258274676, "grad_norm": 0.024594619870185852, "learning_rate": 1.5848168798574699e-06, "loss": 0.0009, "step": 90580 }, { "epoch": 1.530973526106318, "grad_norm": 0.07544858008623123, "learning_rate": 1.5837398366132206e-06, "loss": 0.0009, "step": 90590 }, { "epoch": 1.5311425263851686, "grad_norm": 0.08281031996011734, "learning_rate": 1.5826630906001561e-06, "loss": 0.0007, "step": 90600 }, { "epoch": 1.5313115266640192, "grad_norm": 0.008873536251485348, "learning_rate": 1.5815866419119624e-06, "loss": 0.0006, "step": 90610 }, { "epoch": 1.5314805269428695, "grad_norm": 0.04672101140022278, "learning_rate": 1.5805104906422914e-06, "loss": 0.0014, "step": 90620 }, { "epoch": 1.5316495272217199, "grad_norm": 0.023583827540278435, "learning_rate": 1.5794346368847752e-06, "loss": 0.0007, "step": 90630 }, { "epoch": 1.5318185275005702, "grad_norm": 0.09966600686311722, "learning_rate": 1.5783590807330206e-06, "loss": 0.0006, "step": 90640 }, { "epoch": 1.5319875277794208, "grad_norm": 0.05041343718767166, "learning_rate": 1.5772838222806024e-06, "loss": 0.0007, "step": 90650 }, { "epoch": 1.5321565280582714, "grad_norm": 0.006506835110485554, "learning_rate": 1.5762088616210769e-06, "loss": 0.0003, "step": 90660 }, { "epoch": 1.5323255283371218, "grad_norm": 0.06299679726362228, "learning_rate": 1.575134198847968e-06, "loss": 0.0007, "step": 90670 }, { "epoch": 1.5324945286159721, "grad_norm": 0.028970442712306976, "learning_rate": 1.5740598340547769e-06, "loss": 0.001, "step": 90680 }, { "epoch": 1.5326635288948227, "grad_norm": 0.07725638151168823, "learning_rate": 1.5729857673349797e-06, "loss": 0.0006, "step": 90690 }, { "epoch": 1.5328325291736733, "grad_norm": 0.008100302889943123, "learning_rate": 1.5719119987820264e-06, "loss": 0.0008, "step": 90700 }, { "epoch": 1.5330015294525237, "grad_norm": 0.00746137136593461, "learning_rate": 1.570838528489337e-06, "loss": 0.0004, "step": 90710 }, { "epoch": 1.533170529731374, "grad_norm": 0.06312989443540573, "learning_rate": 1.569765356550312e-06, "loss": 0.0008, "step": 90720 }, { "epoch": 1.5333395300102244, "grad_norm": 0.10903391987085342, "learning_rate": 1.568692483058319e-06, "loss": 0.0007, "step": 90730 }, { "epoch": 1.533508530289075, "grad_norm": 0.14214962720870972, "learning_rate": 1.5676199081067067e-06, "loss": 0.0017, "step": 90740 }, { "epoch": 1.5336775305679256, "grad_norm": 0.015006943605840206, "learning_rate": 1.5665476317887912e-06, "loss": 0.0004, "step": 90750 }, { "epoch": 1.533846530846776, "grad_norm": 0.005455281585454941, "learning_rate": 1.5654756541978689e-06, "loss": 0.0005, "step": 90760 }, { "epoch": 1.5340155311256263, "grad_norm": 0.003434483427554369, "learning_rate": 1.564403975427204e-06, "loss": 0.0004, "step": 90770 }, { "epoch": 1.5341845314044769, "grad_norm": 0.017020530998706818, "learning_rate": 1.5633325955700402e-06, "loss": 0.0003, "step": 90780 }, { "epoch": 1.5343535316833274, "grad_norm": 0.029370540753006935, "learning_rate": 1.5622615147195903e-06, "loss": 0.0006, "step": 90790 }, { "epoch": 1.5345225319621778, "grad_norm": 0.06792864948511124, "learning_rate": 1.5611907329690462e-06, "loss": 0.0007, "step": 90800 }, { "epoch": 1.5346915322410282, "grad_norm": 0.048491671681404114, "learning_rate": 1.560120250411568e-06, "loss": 0.0054, "step": 90810 }, { "epoch": 1.5348605325198785, "grad_norm": 0.019118905067443848, "learning_rate": 1.5590500671402969e-06, "loss": 0.0011, "step": 90820 }, { "epoch": 1.5350295327987291, "grad_norm": 0.03405429795384407, "learning_rate": 1.5579801832483394e-06, "loss": 0.0007, "step": 90830 }, { "epoch": 1.5351985330775797, "grad_norm": 0.0075188977643847466, "learning_rate": 1.5569105988287836e-06, "loss": 0.0004, "step": 90840 }, { "epoch": 1.53536753335643, "grad_norm": 0.05416296422481537, "learning_rate": 1.5558413139746887e-06, "loss": 0.0009, "step": 90850 }, { "epoch": 1.5355365336352804, "grad_norm": 0.028006546199321747, "learning_rate": 1.5547723287790856e-06, "loss": 0.0008, "step": 90860 }, { "epoch": 1.535705533914131, "grad_norm": 0.026694685220718384, "learning_rate": 1.5537036433349827e-06, "loss": 0.0006, "step": 90870 }, { "epoch": 1.5358745341929814, "grad_norm": 0.033826615661382675, "learning_rate": 1.5526352577353604e-06, "loss": 0.0005, "step": 90880 }, { "epoch": 1.536043534471832, "grad_norm": 0.015938522294163704, "learning_rate": 1.5515671720731746e-06, "loss": 0.0013, "step": 90890 }, { "epoch": 1.5362125347506823, "grad_norm": 0.011160106398165226, "learning_rate": 1.550499386441352e-06, "loss": 0.0008, "step": 90900 }, { "epoch": 1.5363815350295327, "grad_norm": 0.009316002018749714, "learning_rate": 1.5494319009327968e-06, "loss": 0.0005, "step": 90910 }, { "epoch": 1.5365505353083833, "grad_norm": 0.04132316634058952, "learning_rate": 1.5483647156403837e-06, "loss": 0.0003, "step": 90920 }, { "epoch": 1.5367195355872338, "grad_norm": 0.04599205404520035, "learning_rate": 1.5472978306569653e-06, "loss": 0.0008, "step": 90930 }, { "epoch": 1.5368885358660842, "grad_norm": 0.02022514119744301, "learning_rate": 1.5462312460753626e-06, "loss": 0.0005, "step": 90940 }, { "epoch": 1.5370575361449346, "grad_norm": 0.005118310451507568, "learning_rate": 1.5451649619883774e-06, "loss": 0.0009, "step": 90950 }, { "epoch": 1.5372265364237852, "grad_norm": 0.040655795484781265, "learning_rate": 1.5440989784887773e-06, "loss": 0.0007, "step": 90960 }, { "epoch": 1.5373955367026355, "grad_norm": 0.03651266545057297, "learning_rate": 1.5430332956693122e-06, "loss": 0.0006, "step": 90970 }, { "epoch": 1.537564536981486, "grad_norm": 0.003880225121974945, "learning_rate": 1.5419679136226984e-06, "loss": 0.0008, "step": 90980 }, { "epoch": 1.5377335372603365, "grad_norm": 0.060235753655433655, "learning_rate": 1.540902832441632e-06, "loss": 0.0005, "step": 90990 }, { "epoch": 1.5379025375391868, "grad_norm": 0.01511063426733017, "learning_rate": 1.5398380522187778e-06, "loss": 0.0008, "step": 91000 }, { "epoch": 1.5380715378180374, "grad_norm": 0.03903907164931297, "learning_rate": 1.538773573046779e-06, "loss": 0.0007, "step": 91010 }, { "epoch": 1.538240538096888, "grad_norm": 0.00315358629450202, "learning_rate": 1.5377093950182476e-06, "loss": 0.0006, "step": 91020 }, { "epoch": 1.5384095383757384, "grad_norm": 0.011011041700839996, "learning_rate": 1.5366455182257762e-06, "loss": 0.0006, "step": 91030 }, { "epoch": 1.5385785386545887, "grad_norm": 0.007849492132663727, "learning_rate": 1.5355819427619228e-06, "loss": 0.0007, "step": 91040 }, { "epoch": 1.538747538933439, "grad_norm": 0.03877522423863411, "learning_rate": 1.5345186687192265e-06, "loss": 0.0009, "step": 91050 }, { "epoch": 1.5389165392122897, "grad_norm": 0.023960836231708527, "learning_rate": 1.5334556961901964e-06, "loss": 0.002, "step": 91060 }, { "epoch": 1.5390855394911402, "grad_norm": 0.05260419473052025, "learning_rate": 1.532393025267318e-06, "loss": 0.0007, "step": 91070 }, { "epoch": 1.5392545397699906, "grad_norm": 0.0148914884775877, "learning_rate": 1.5313306560430457e-06, "loss": 0.0008, "step": 91080 }, { "epoch": 1.539423540048841, "grad_norm": 0.032751284539699554, "learning_rate": 1.5302685886098146e-06, "loss": 0.0007, "step": 91090 }, { "epoch": 1.5395925403276915, "grad_norm": 0.0445295013487339, "learning_rate": 1.5292068230600254e-06, "loss": 0.0005, "step": 91100 }, { "epoch": 1.5397615406065421, "grad_norm": 0.008731862530112267, "learning_rate": 1.5281453594860607e-06, "loss": 0.0005, "step": 91110 }, { "epoch": 1.5399305408853925, "grad_norm": 0.05220063775777817, "learning_rate": 1.5270841979802697e-06, "loss": 0.0008, "step": 91120 }, { "epoch": 1.5400995411642429, "grad_norm": 0.07239104062318802, "learning_rate": 1.5260233386349798e-06, "loss": 0.0003, "step": 91130 }, { "epoch": 1.5402685414430932, "grad_norm": 0.20314405858516693, "learning_rate": 1.5249627815424933e-06, "loss": 0.0007, "step": 91140 }, { "epoch": 1.5404375417219438, "grad_norm": 0.00026998284738510847, "learning_rate": 1.5239025267950802e-06, "loss": 0.0007, "step": 91150 }, { "epoch": 1.5406065420007944, "grad_norm": 0.0450599379837513, "learning_rate": 1.5228425744849906e-06, "loss": 0.0008, "step": 91160 }, { "epoch": 1.5407755422796447, "grad_norm": 0.013693476095795631, "learning_rate": 1.5217829247044424e-06, "loss": 0.0007, "step": 91170 }, { "epoch": 1.5409445425584951, "grad_norm": 0.0012772573390975595, "learning_rate": 1.5207235775456343e-06, "loss": 0.0006, "step": 91180 }, { "epoch": 1.5411135428373457, "grad_norm": 0.018909180536866188, "learning_rate": 1.5196645331007305e-06, "loss": 0.0006, "step": 91190 }, { "epoch": 1.5412825431161963, "grad_norm": 0.07891616970300674, "learning_rate": 1.5186057914618768e-06, "loss": 0.0006, "step": 91200 }, { "epoch": 1.5414515433950466, "grad_norm": 0.02020966075360775, "learning_rate": 1.5175473527211847e-06, "loss": 0.0009, "step": 91210 }, { "epoch": 1.541620543673897, "grad_norm": 0.06416349858045578, "learning_rate": 1.5164892169707485e-06, "loss": 0.0007, "step": 91220 }, { "epoch": 1.5417895439527474, "grad_norm": 0.02633839100599289, "learning_rate": 1.5154313843026259e-06, "loss": 0.0005, "step": 91230 }, { "epoch": 1.541958544231598, "grad_norm": 0.009072850458323956, "learning_rate": 1.5143738548088566e-06, "loss": 0.0008, "step": 91240 }, { "epoch": 1.5421275445104485, "grad_norm": 0.003974773455411196, "learning_rate": 1.513316628581451e-06, "loss": 0.0009, "step": 91250 }, { "epoch": 1.542296544789299, "grad_norm": 0.014070197939872742, "learning_rate": 1.512259705712391e-06, "loss": 0.0005, "step": 91260 }, { "epoch": 1.5424655450681493, "grad_norm": 0.02480059303343296, "learning_rate": 1.511203086293635e-06, "loss": 0.0006, "step": 91270 }, { "epoch": 1.5426345453469998, "grad_norm": 0.061281755566596985, "learning_rate": 1.5101467704171159e-06, "loss": 0.0005, "step": 91280 }, { "epoch": 1.5428035456258504, "grad_norm": 0.03887879103422165, "learning_rate": 1.5090907581747349e-06, "loss": 0.001, "step": 91290 }, { "epoch": 1.5429725459047008, "grad_norm": 0.03775886818766594, "learning_rate": 1.5080350496583729e-06, "loss": 0.0004, "step": 91300 }, { "epoch": 1.5431415461835511, "grad_norm": 0.04389318823814392, "learning_rate": 1.5069796449598795e-06, "loss": 0.0016, "step": 91310 }, { "epoch": 1.5433105464624015, "grad_norm": 0.01458847988396883, "learning_rate": 1.5059245441710824e-06, "loss": 0.0007, "step": 91320 }, { "epoch": 1.543479546741252, "grad_norm": 0.10211494565010071, "learning_rate": 1.5048697473837775e-06, "loss": 0.0011, "step": 91330 }, { "epoch": 1.5436485470201027, "grad_norm": 0.044371504336595535, "learning_rate": 1.5038152546897406e-06, "loss": 0.0008, "step": 91340 }, { "epoch": 1.543817547298953, "grad_norm": 0.05276376008987427, "learning_rate": 1.5027610661807145e-06, "loss": 0.0005, "step": 91350 }, { "epoch": 1.5439865475778034, "grad_norm": 0.022463826462626457, "learning_rate": 1.5017071819484214e-06, "loss": 0.0004, "step": 91360 }, { "epoch": 1.544155547856654, "grad_norm": 0.02301204949617386, "learning_rate": 1.5006536020845518e-06, "loss": 0.0009, "step": 91370 }, { "epoch": 1.5443245481355046, "grad_norm": 0.10036075860261917, "learning_rate": 1.4996003266807734e-06, "loss": 0.0005, "step": 91380 }, { "epoch": 1.544493548414355, "grad_norm": 0.002767896978184581, "learning_rate": 1.4985473558287278e-06, "loss": 0.0006, "step": 91390 }, { "epoch": 1.5446625486932053, "grad_norm": 0.03478080406785011, "learning_rate": 1.497494689620026e-06, "loss": 0.0006, "step": 91400 }, { "epoch": 1.5448315489720557, "grad_norm": 0.000982145662419498, "learning_rate": 1.4964423281462576e-06, "loss": 0.0005, "step": 91410 }, { "epoch": 1.5450005492509062, "grad_norm": 0.04009811207652092, "learning_rate": 1.4953902714989798e-06, "loss": 0.0006, "step": 91420 }, { "epoch": 1.5451695495297568, "grad_norm": 0.0612616166472435, "learning_rate": 1.494338519769729e-06, "loss": 0.0007, "step": 91430 }, { "epoch": 1.5453385498086072, "grad_norm": 0.028253573924303055, "learning_rate": 1.4932870730500143e-06, "loss": 0.0006, "step": 91440 }, { "epoch": 1.5455075500874575, "grad_norm": 0.028928278014063835, "learning_rate": 1.4922359314313122e-06, "loss": 0.0008, "step": 91450 }, { "epoch": 1.5456765503663081, "grad_norm": 0.10750661045312881, "learning_rate": 1.4911850950050806e-06, "loss": 0.0011, "step": 91460 }, { "epoch": 1.5458455506451587, "grad_norm": 0.029245685786008835, "learning_rate": 1.4901345638627467e-06, "loss": 0.0008, "step": 91470 }, { "epoch": 1.546014550924009, "grad_norm": 0.0031404420733451843, "learning_rate": 1.489084338095711e-06, "loss": 0.0005, "step": 91480 }, { "epoch": 1.5461835512028594, "grad_norm": 0.0078032235614955425, "learning_rate": 1.4880344177953493e-06, "loss": 0.0017, "step": 91490 }, { "epoch": 1.5463525514817098, "grad_norm": 0.007911061868071556, "learning_rate": 1.4869848030530081e-06, "loss": 0.0007, "step": 91500 }, { "epoch": 1.5465215517605604, "grad_norm": 0.12613831460475922, "learning_rate": 1.4859354939600118e-06, "loss": 0.0019, "step": 91510 }, { "epoch": 1.546690552039411, "grad_norm": 0.050074364989995956, "learning_rate": 1.4848864906076516e-06, "loss": 0.0011, "step": 91520 }, { "epoch": 1.5468595523182613, "grad_norm": 0.01013795007020235, "learning_rate": 1.4838377930871994e-06, "loss": 0.0011, "step": 91530 }, { "epoch": 1.5470285525971117, "grad_norm": 0.040607839822769165, "learning_rate": 1.4827894014898936e-06, "loss": 0.0007, "step": 91540 }, { "epoch": 1.5471975528759623, "grad_norm": 0.06654667854309082, "learning_rate": 1.4817413159069533e-06, "loss": 0.0007, "step": 91550 }, { "epoch": 1.5473665531548129, "grad_norm": 0.05213646590709686, "learning_rate": 1.4806935364295627e-06, "loss": 0.0022, "step": 91560 }, { "epoch": 1.5475355534336632, "grad_norm": 0.01856975443661213, "learning_rate": 1.479646063148888e-06, "loss": 0.0006, "step": 91570 }, { "epoch": 1.5477045537125136, "grad_norm": 0.0003089377423748374, "learning_rate": 1.4785988961560598e-06, "loss": 0.0007, "step": 91580 }, { "epoch": 1.547873553991364, "grad_norm": 0.05260462313890457, "learning_rate": 1.477552035542192e-06, "loss": 0.0007, "step": 91590 }, { "epoch": 1.5480425542702145, "grad_norm": 0.005849786102771759, "learning_rate": 1.4765054813983616e-06, "loss": 0.0004, "step": 91600 }, { "epoch": 1.5482115545490651, "grad_norm": 0.029813293367624283, "learning_rate": 1.475459233815626e-06, "loss": 0.0015, "step": 91610 }, { "epoch": 1.5483805548279155, "grad_norm": 0.025738809257745743, "learning_rate": 1.4744132928850151e-06, "loss": 0.0005, "step": 91620 }, { "epoch": 1.5485495551067658, "grad_norm": 0.07673606276512146, "learning_rate": 1.4733676586975288e-06, "loss": 0.0005, "step": 91630 }, { "epoch": 1.5487185553856164, "grad_norm": 0.019551604986190796, "learning_rate": 1.4723223313441426e-06, "loss": 0.001, "step": 91640 }, { "epoch": 1.548887555664467, "grad_norm": 0.018417326733469963, "learning_rate": 1.4712773109158057e-06, "loss": 0.0009, "step": 91650 }, { "epoch": 1.5490565559433174, "grad_norm": 0.009148292243480682, "learning_rate": 1.4702325975034416e-06, "loss": 0.0006, "step": 91660 }, { "epoch": 1.5492255562221677, "grad_norm": 0.3337763249874115, "learning_rate": 1.4691881911979417e-06, "loss": 0.0016, "step": 91670 }, { "epoch": 1.549394556501018, "grad_norm": 0.07064887881278992, "learning_rate": 1.468144092090178e-06, "loss": 0.0008, "step": 91680 }, { "epoch": 1.5495635567798687, "grad_norm": 0.010934130288660526, "learning_rate": 1.4671003002709888e-06, "loss": 0.0007, "step": 91690 }, { "epoch": 1.5497325570587193, "grad_norm": 0.004269908182322979, "learning_rate": 1.4660568158311928e-06, "loss": 0.0009, "step": 91700 }, { "epoch": 1.5499015573375696, "grad_norm": 0.09389406442642212, "learning_rate": 1.4650136388615743e-06, "loss": 0.0008, "step": 91710 }, { "epoch": 1.55007055761642, "grad_norm": 0.019758351147174835, "learning_rate": 1.4639707694528982e-06, "loss": 0.0004, "step": 91720 }, { "epoch": 1.5502395578952706, "grad_norm": 0.013832749798893929, "learning_rate": 1.4629282076958955e-06, "loss": 0.0005, "step": 91730 }, { "epoch": 1.550408558174121, "grad_norm": 0.19052620232105255, "learning_rate": 1.4618859536812785e-06, "loss": 0.0018, "step": 91740 }, { "epoch": 1.5505775584529715, "grad_norm": 0.007601190824061632, "learning_rate": 1.4608440074997244e-06, "loss": 0.002, "step": 91750 }, { "epoch": 1.5507465587318219, "grad_norm": 0.012108417227864265, "learning_rate": 1.459802369241891e-06, "loss": 0.0004, "step": 91760 }, { "epoch": 1.5509155590106722, "grad_norm": 0.026216894388198853, "learning_rate": 1.4587610389984024e-06, "loss": 0.001, "step": 91770 }, { "epoch": 1.5510845592895228, "grad_norm": 0.016562877222895622, "learning_rate": 1.4577200168598625e-06, "loss": 0.0006, "step": 91780 }, { "epoch": 1.5512535595683734, "grad_norm": 0.045029088854789734, "learning_rate": 1.4566793029168425e-06, "loss": 0.0011, "step": 91790 }, { "epoch": 1.5514225598472238, "grad_norm": 0.026214392855763435, "learning_rate": 1.4556388972598922e-06, "loss": 0.0007, "step": 91800 }, { "epoch": 1.5515915601260741, "grad_norm": 0.002082178369164467, "learning_rate": 1.4545987999795297e-06, "loss": 0.0006, "step": 91810 }, { "epoch": 1.5517605604049247, "grad_norm": 0.005932193715125322, "learning_rate": 1.453559011166249e-06, "loss": 0.0004, "step": 91820 }, { "epoch": 1.551929560683775, "grad_norm": 0.07331282645463943, "learning_rate": 1.4525195309105172e-06, "loss": 0.0004, "step": 91830 }, { "epoch": 1.5520985609626257, "grad_norm": 0.00024068939092103392, "learning_rate": 1.451480359302776e-06, "loss": 0.0006, "step": 91840 }, { "epoch": 1.552267561241476, "grad_norm": 0.01877421885728836, "learning_rate": 1.4504414964334351e-06, "loss": 0.0013, "step": 91850 }, { "epoch": 1.5524365615203264, "grad_norm": 0.03928346931934357, "learning_rate": 1.449402942392883e-06, "loss": 0.0006, "step": 91860 }, { "epoch": 1.552605561799177, "grad_norm": 0.05283835157752037, "learning_rate": 1.4483646972714765e-06, "loss": 0.0008, "step": 91870 }, { "epoch": 1.5527745620780276, "grad_norm": 0.036525264382362366, "learning_rate": 1.4473267611595514e-06, "loss": 0.0012, "step": 91880 }, { "epoch": 1.552943562356878, "grad_norm": 0.02714865282177925, "learning_rate": 1.4462891341474094e-06, "loss": 0.0006, "step": 91890 }, { "epoch": 1.5531125626357283, "grad_norm": 0.019561218097805977, "learning_rate": 1.445251816325331e-06, "loss": 0.0006, "step": 91900 }, { "epoch": 1.5532815629145789, "grad_norm": 0.0465192086994648, "learning_rate": 1.444214807783569e-06, "loss": 0.0006, "step": 91910 }, { "epoch": 1.5534505631934292, "grad_norm": 0.016598006710410118, "learning_rate": 1.4431781086123453e-06, "loss": 0.001, "step": 91920 }, { "epoch": 1.5536195634722798, "grad_norm": 0.0028368341736495495, "learning_rate": 1.442141718901861e-06, "loss": 0.0005, "step": 91930 }, { "epoch": 1.5537885637511302, "grad_norm": 0.1509881168603897, "learning_rate": 1.441105638742284e-06, "loss": 0.0012, "step": 91940 }, { "epoch": 1.5539575640299805, "grad_norm": 0.14502137899398804, "learning_rate": 1.4400698682237606e-06, "loss": 0.0016, "step": 91950 }, { "epoch": 1.5541265643088311, "grad_norm": 0.016168328002095222, "learning_rate": 1.439034407436406e-06, "loss": 0.002, "step": 91960 }, { "epoch": 1.5542955645876817, "grad_norm": 0.16724376380443573, "learning_rate": 1.4379992564703126e-06, "loss": 0.0006, "step": 91970 }, { "epoch": 1.554464564866532, "grad_norm": 0.005992779042571783, "learning_rate": 1.436964415415541e-06, "loss": 0.0007, "step": 91980 }, { "epoch": 1.5546335651453824, "grad_norm": 0.10158688575029373, "learning_rate": 1.43592988436213e-06, "loss": 0.0013, "step": 91990 }, { "epoch": 1.5548025654242328, "grad_norm": 0.05698493868112564, "learning_rate": 1.4348956634000855e-06, "loss": 0.0008, "step": 92000 }, { "epoch": 1.5549715657030834, "grad_norm": 0.0953177884221077, "learning_rate": 1.4338617526193915e-06, "loss": 0.0008, "step": 92010 }, { "epoch": 1.555140565981934, "grad_norm": 0.02836747094988823, "learning_rate": 1.4328281521100034e-06, "loss": 0.0014, "step": 92020 }, { "epoch": 1.5553095662607843, "grad_norm": 0.05428193509578705, "learning_rate": 1.4317948619618515e-06, "loss": 0.0007, "step": 92030 }, { "epoch": 1.5554785665396347, "grad_norm": 0.04886719584465027, "learning_rate": 1.4307618822648328e-06, "loss": 0.001, "step": 92040 }, { "epoch": 1.5556475668184853, "grad_norm": 0.029720531776547432, "learning_rate": 1.4297292131088258e-06, "loss": 0.0004, "step": 92050 }, { "epoch": 1.5558165670973358, "grad_norm": 0.010107689537107944, "learning_rate": 1.4286968545836739e-06, "loss": 0.0002, "step": 92060 }, { "epoch": 1.5559855673761862, "grad_norm": 0.044548213481903076, "learning_rate": 1.4276648067792004e-06, "loss": 0.0004, "step": 92070 }, { "epoch": 1.5561545676550366, "grad_norm": 0.031103257089853287, "learning_rate": 1.4266330697851955e-06, "loss": 0.0008, "step": 92080 }, { "epoch": 1.556323567933887, "grad_norm": 0.04000338166952133, "learning_rate": 1.4256016436914282e-06, "loss": 0.0005, "step": 92090 }, { "epoch": 1.5564925682127375, "grad_norm": 0.009134330786764622, "learning_rate": 1.424570528587635e-06, "loss": 0.0015, "step": 92100 }, { "epoch": 1.556661568491588, "grad_norm": 0.0020735403522849083, "learning_rate": 1.4235397245635307e-06, "loss": 0.0005, "step": 92110 }, { "epoch": 1.5568305687704385, "grad_norm": 0.022044990211725235, "learning_rate": 1.4225092317087974e-06, "loss": 0.0005, "step": 92120 }, { "epoch": 1.5569995690492888, "grad_norm": 0.04893451929092407, "learning_rate": 1.4214790501130953e-06, "loss": 0.0005, "step": 92130 }, { "epoch": 1.5571685693281394, "grad_norm": 0.06270928680896759, "learning_rate": 1.420449179866053e-06, "loss": 0.0015, "step": 92140 }, { "epoch": 1.55733756960699, "grad_norm": 0.0685892105102539, "learning_rate": 1.4194196210572754e-06, "loss": 0.0006, "step": 92150 }, { "epoch": 1.5575065698858404, "grad_norm": 0.006450988817960024, "learning_rate": 1.4183903737763404e-06, "loss": 0.0004, "step": 92160 }, { "epoch": 1.5576755701646907, "grad_norm": 0.020199205726385117, "learning_rate": 1.4173614381127948e-06, "loss": 0.0006, "step": 92170 }, { "epoch": 1.557844570443541, "grad_norm": 0.020659884437918663, "learning_rate": 1.4163328141561638e-06, "loss": 0.0008, "step": 92180 }, { "epoch": 1.5580135707223917, "grad_norm": 0.06154713034629822, "learning_rate": 1.41530450199594e-06, "loss": 0.001, "step": 92190 }, { "epoch": 1.5581825710012422, "grad_norm": 0.023259872570633888, "learning_rate": 1.4142765017215926e-06, "loss": 0.0011, "step": 92200 }, { "epoch": 1.5583515712800926, "grad_norm": 0.035250257700681686, "learning_rate": 1.4132488134225642e-06, "loss": 0.001, "step": 92210 }, { "epoch": 1.558520571558943, "grad_norm": 0.043679896742105484, "learning_rate": 1.412221437188266e-06, "loss": 0.0006, "step": 92220 }, { "epoch": 1.5586895718377936, "grad_norm": 0.06833620369434357, "learning_rate": 1.4111943731080858e-06, "loss": 0.0006, "step": 92230 }, { "epoch": 1.5588585721166441, "grad_norm": 0.01950804889202118, "learning_rate": 1.4101676212713843e-06, "loss": 0.0006, "step": 92240 }, { "epoch": 1.5590275723954945, "grad_norm": 0.06699059903621674, "learning_rate": 1.409141181767492e-06, "loss": 0.0007, "step": 92250 }, { "epoch": 1.5591965726743449, "grad_norm": 0.010098809376358986, "learning_rate": 1.4081150546857165e-06, "loss": 0.001, "step": 92260 }, { "epoch": 1.5593655729531952, "grad_norm": 0.010151817463338375, "learning_rate": 1.4070892401153324e-06, "loss": 0.0011, "step": 92270 }, { "epoch": 1.5595345732320458, "grad_norm": 0.0016320428112521768, "learning_rate": 1.4060637381455943e-06, "loss": 0.0009, "step": 92280 }, { "epoch": 1.5597035735108964, "grad_norm": 0.014087764546275139, "learning_rate": 1.4050385488657225e-06, "loss": 0.0006, "step": 92290 }, { "epoch": 1.5598725737897468, "grad_norm": 0.0018531668465584517, "learning_rate": 1.404013672364916e-06, "loss": 0.0009, "step": 92300 }, { "epoch": 1.5600415740685971, "grad_norm": 0.0181476641446352, "learning_rate": 1.4029891087323422e-06, "loss": 0.0004, "step": 92310 }, { "epoch": 1.5602105743474477, "grad_norm": 0.04024354740977287, "learning_rate": 1.401964858057145e-06, "loss": 0.0015, "step": 92320 }, { "epoch": 1.5603795746262983, "grad_norm": 0.024551119655370712, "learning_rate": 1.4009409204284363e-06, "loss": 0.0009, "step": 92330 }, { "epoch": 1.5605485749051486, "grad_norm": 0.028060588985681534, "learning_rate": 1.3999172959353074e-06, "loss": 0.0008, "step": 92340 }, { "epoch": 1.560717575183999, "grad_norm": 0.03398832306265831, "learning_rate": 1.3988939846668149e-06, "loss": 0.001, "step": 92350 }, { "epoch": 1.5608865754628494, "grad_norm": 0.08230680972337723, "learning_rate": 1.3978709867119945e-06, "loss": 0.0008, "step": 92360 }, { "epoch": 1.5610555757417, "grad_norm": 0.14842632412910461, "learning_rate": 1.39684830215985e-06, "loss": 0.0008, "step": 92370 }, { "epoch": 1.5612245760205505, "grad_norm": 0.010570487938821316, "learning_rate": 1.3958259310993605e-06, "loss": 0.0009, "step": 92380 }, { "epoch": 1.561393576299401, "grad_norm": 0.057076577097177505, "learning_rate": 1.3948038736194792e-06, "loss": 0.0009, "step": 92390 }, { "epoch": 1.5615625765782513, "grad_norm": 0.02614855021238327, "learning_rate": 1.393782129809127e-06, "loss": 0.0006, "step": 92400 }, { "epoch": 1.5617315768571018, "grad_norm": 0.015939511358737946, "learning_rate": 1.3927606997572024e-06, "loss": 0.0003, "step": 92410 }, { "epoch": 1.5619005771359524, "grad_norm": 0.007223693188279867, "learning_rate": 1.3917395835525739e-06, "loss": 0.0011, "step": 92420 }, { "epoch": 1.5620695774148028, "grad_norm": 0.023942166939377785, "learning_rate": 1.390718781284086e-06, "loss": 0.0008, "step": 92430 }, { "epoch": 1.5622385776936532, "grad_norm": 0.03485532104969025, "learning_rate": 1.3896982930405494e-06, "loss": 0.0005, "step": 92440 }, { "epoch": 1.5624075779725035, "grad_norm": 0.007194232195615768, "learning_rate": 1.3886781189107556e-06, "loss": 0.0005, "step": 92450 }, { "epoch": 1.562576578251354, "grad_norm": 0.014133160002529621, "learning_rate": 1.3876582589834614e-06, "loss": 0.001, "step": 92460 }, { "epoch": 1.5627455785302047, "grad_norm": 0.07405367493629456, "learning_rate": 1.3866387133474018e-06, "loss": 0.0011, "step": 92470 }, { "epoch": 1.562914578809055, "grad_norm": 0.06717579811811447, "learning_rate": 1.38561948209128e-06, "loss": 0.0005, "step": 92480 }, { "epoch": 1.5630835790879054, "grad_norm": 0.019617587327957153, "learning_rate": 1.384600565303777e-06, "loss": 0.0005, "step": 92490 }, { "epoch": 1.563252579366756, "grad_norm": 0.09758741408586502, "learning_rate": 1.3835819630735403e-06, "loss": 0.0006, "step": 92500 }, { "epoch": 1.5634215796456066, "grad_norm": 0.06455901265144348, "learning_rate": 1.3825636754891958e-06, "loss": 0.0006, "step": 92510 }, { "epoch": 1.563590579924457, "grad_norm": 0.021474866196513176, "learning_rate": 1.3815457026393375e-06, "loss": 0.0004, "step": 92520 }, { "epoch": 1.5637595802033073, "grad_norm": 0.05228448659181595, "learning_rate": 1.3805280446125364e-06, "loss": 0.0011, "step": 92530 }, { "epoch": 1.5639285804821577, "grad_norm": 0.00021487113554030657, "learning_rate": 1.3795107014973302e-06, "loss": 0.0004, "step": 92540 }, { "epoch": 1.5640975807610082, "grad_norm": 0.007745729759335518, "learning_rate": 1.3784936733822364e-06, "loss": 0.001, "step": 92550 }, { "epoch": 1.5642665810398588, "grad_norm": 0.048509631305933, "learning_rate": 1.3774769603557382e-06, "loss": 0.0006, "step": 92560 }, { "epoch": 1.5644355813187092, "grad_norm": 0.03314121067523956, "learning_rate": 1.3764605625062961e-06, "loss": 0.0009, "step": 92570 }, { "epoch": 1.5646045815975596, "grad_norm": 0.029128141701221466, "learning_rate": 1.3754444799223427e-06, "loss": 0.0014, "step": 92580 }, { "epoch": 1.5647735818764101, "grad_norm": 0.03834288939833641, "learning_rate": 1.3744287126922789e-06, "loss": 0.0005, "step": 92590 }, { "epoch": 1.5649425821552607, "grad_norm": 0.06999506801366806, "learning_rate": 1.3734132609044843e-06, "loss": 0.0023, "step": 92600 }, { "epoch": 1.565111582434111, "grad_norm": 0.04065316915512085, "learning_rate": 1.3723981246473085e-06, "loss": 0.0009, "step": 92610 }, { "epoch": 1.5652805827129614, "grad_norm": 0.029068050906062126, "learning_rate": 1.3713833040090701e-06, "loss": 0.0028, "step": 92620 }, { "epoch": 1.5654495829918118, "grad_norm": 0.0029551261104643345, "learning_rate": 1.3703687990780673e-06, "loss": 0.0012, "step": 92630 }, { "epoch": 1.5656185832706624, "grad_norm": 0.0025054332800209522, "learning_rate": 1.3693546099425632e-06, "loss": 0.0005, "step": 92640 }, { "epoch": 1.565787583549513, "grad_norm": 0.026966840028762817, "learning_rate": 1.3683407366908007e-06, "loss": 0.0003, "step": 92650 }, { "epoch": 1.5659565838283633, "grad_norm": 0.013984468765556812, "learning_rate": 1.3673271794109883e-06, "loss": 0.0008, "step": 92660 }, { "epoch": 1.5661255841072137, "grad_norm": 0.01775095798075199, "learning_rate": 1.3663139381913116e-06, "loss": 0.0004, "step": 92670 }, { "epoch": 1.5662945843860643, "grad_norm": 0.08793025463819504, "learning_rate": 1.3653010131199295e-06, "loss": 0.0007, "step": 92680 }, { "epoch": 1.5664635846649146, "grad_norm": 0.12743797898292542, "learning_rate": 1.3642884042849686e-06, "loss": 0.0008, "step": 92690 }, { "epoch": 1.5666325849437652, "grad_norm": 0.02725476399064064, "learning_rate": 1.3632761117745335e-06, "loss": 0.0017, "step": 92700 }, { "epoch": 1.5668015852226156, "grad_norm": 0.05294652655720711, "learning_rate": 1.3622641356766951e-06, "loss": 0.0013, "step": 92710 }, { "epoch": 1.566970585501466, "grad_norm": 0.02998647838830948, "learning_rate": 1.3612524760795038e-06, "loss": 0.0015, "step": 92720 }, { "epoch": 1.5671395857803165, "grad_norm": 0.009507248178124428, "learning_rate": 1.3602411330709758e-06, "loss": 0.0003, "step": 92730 }, { "epoch": 1.5673085860591671, "grad_norm": 0.0252500269562006, "learning_rate": 1.3592301067391057e-06, "loss": 0.0008, "step": 92740 }, { "epoch": 1.5674775863380175, "grad_norm": 0.0627959668636322, "learning_rate": 1.3582193971718549e-06, "loss": 0.0004, "step": 92750 }, { "epoch": 1.5676465866168678, "grad_norm": 0.04591644927859306, "learning_rate": 1.357209004457163e-06, "loss": 0.0008, "step": 92760 }, { "epoch": 1.5678155868957184, "grad_norm": 0.032508768141269684, "learning_rate": 1.3561989286829353e-06, "loss": 0.0004, "step": 92770 }, { "epoch": 1.5679845871745688, "grad_norm": 0.02691843919456005, "learning_rate": 1.3551891699370556e-06, "loss": 0.0004, "step": 92780 }, { "epoch": 1.5681535874534194, "grad_norm": 0.002264541108161211, "learning_rate": 1.3541797283073781e-06, "loss": 0.0006, "step": 92790 }, { "epoch": 1.5683225877322697, "grad_norm": 0.004401510581374168, "learning_rate": 1.35317060388173e-06, "loss": 0.0014, "step": 92800 }, { "epoch": 1.56849158801112, "grad_norm": 0.051362864673137665, "learning_rate": 1.3521617967479066e-06, "loss": 0.0005, "step": 92810 }, { "epoch": 1.5686605882899707, "grad_norm": 0.0028736076783388853, "learning_rate": 1.3511533069936833e-06, "loss": 0.0006, "step": 92820 }, { "epoch": 1.5688295885688213, "grad_norm": 0.018490580841898918, "learning_rate": 1.3501451347067995e-06, "loss": 0.0006, "step": 92830 }, { "epoch": 1.5689985888476716, "grad_norm": 0.024485796689987183, "learning_rate": 1.3491372799749747e-06, "loss": 0.0008, "step": 92840 }, { "epoch": 1.569167589126522, "grad_norm": 0.038191087543964386, "learning_rate": 1.3481297428858935e-06, "loss": 0.0005, "step": 92850 }, { "epoch": 1.5693365894053726, "grad_norm": 0.07634992152452469, "learning_rate": 1.34712252352722e-06, "loss": 0.0005, "step": 92860 }, { "epoch": 1.569505589684223, "grad_norm": 0.014637090265750885, "learning_rate": 1.3461156219865845e-06, "loss": 0.0005, "step": 92870 }, { "epoch": 1.5696745899630735, "grad_norm": 0.09911244362592697, "learning_rate": 1.345109038351594e-06, "loss": 0.0008, "step": 92880 }, { "epoch": 1.5698435902419239, "grad_norm": 0.03441356495022774, "learning_rate": 1.344102772709825e-06, "loss": 0.0011, "step": 92890 }, { "epoch": 1.5700125905207742, "grad_norm": 0.05653800442814827, "learning_rate": 1.3430968251488285e-06, "loss": 0.0005, "step": 92900 }, { "epoch": 1.5701815907996248, "grad_norm": 0.06830038875341415, "learning_rate": 1.3420911957561255e-06, "loss": 0.0006, "step": 92910 }, { "epoch": 1.5703505910784754, "grad_norm": 0.005915106739848852, "learning_rate": 1.3410858846192116e-06, "loss": 0.0012, "step": 92920 }, { "epoch": 1.5705195913573258, "grad_norm": 0.04270845279097557, "learning_rate": 1.3400808918255549e-06, "loss": 0.0008, "step": 92930 }, { "epoch": 1.5706885916361761, "grad_norm": 0.04755331203341484, "learning_rate": 1.3390762174625916e-06, "loss": 0.0005, "step": 92940 }, { "epoch": 1.5708575919150265, "grad_norm": 0.02603033185005188, "learning_rate": 1.3380718616177363e-06, "loss": 0.0005, "step": 92950 }, { "epoch": 1.571026592193877, "grad_norm": 0.008106399327516556, "learning_rate": 1.3370678243783703e-06, "loss": 0.0004, "step": 92960 }, { "epoch": 1.5711955924727277, "grad_norm": 0.01038984302431345, "learning_rate": 1.3360641058318507e-06, "loss": 0.0006, "step": 92970 }, { "epoch": 1.571364592751578, "grad_norm": 0.03053467348217964, "learning_rate": 1.335060706065508e-06, "loss": 0.0011, "step": 92980 }, { "epoch": 1.5715335930304284, "grad_norm": 0.03615260496735573, "learning_rate": 1.3340576251666387e-06, "loss": 0.0006, "step": 92990 }, { "epoch": 1.571702593309279, "grad_norm": 0.021119754761457443, "learning_rate": 1.3330548632225188e-06, "loss": 0.0008, "step": 93000 }, { "epoch": 1.5718715935881296, "grad_norm": 0.09246402978897095, "learning_rate": 1.3320524203203933e-06, "loss": 0.0012, "step": 93010 }, { "epoch": 1.57204059386698, "grad_norm": 0.013367106206715107, "learning_rate": 1.3310502965474776e-06, "loss": 0.0006, "step": 93020 }, { "epoch": 1.5722095941458303, "grad_norm": 0.0342743843793869, "learning_rate": 1.330048491990964e-06, "loss": 0.001, "step": 93030 }, { "epoch": 1.5723785944246806, "grad_norm": 0.05452071875333786, "learning_rate": 1.3290470067380117e-06, "loss": 0.0008, "step": 93040 }, { "epoch": 1.5725475947035312, "grad_norm": 0.015118611045181751, "learning_rate": 1.3280458408757574e-06, "loss": 0.0009, "step": 93050 }, { "epoch": 1.5727165949823818, "grad_norm": 0.08342970162630081, "learning_rate": 1.3270449944913044e-06, "loss": 0.0004, "step": 93060 }, { "epoch": 1.5728855952612322, "grad_norm": 0.024492163211107254, "learning_rate": 1.3260444676717343e-06, "loss": 0.0005, "step": 93070 }, { "epoch": 1.5730545955400825, "grad_norm": 0.01578182354569435, "learning_rate": 1.3250442605040941e-06, "loss": 0.0006, "step": 93080 }, { "epoch": 1.5732235958189331, "grad_norm": 0.11322375386953354, "learning_rate": 1.324044373075411e-06, "loss": 0.0015, "step": 93090 }, { "epoch": 1.5733925960977837, "grad_norm": 0.0352368988096714, "learning_rate": 1.3230448054726762e-06, "loss": 0.001, "step": 93100 }, { "epoch": 1.573561596376634, "grad_norm": 0.007330413442105055, "learning_rate": 1.3220455577828607e-06, "loss": 0.0005, "step": 93110 }, { "epoch": 1.5737305966554844, "grad_norm": 0.044726476073265076, "learning_rate": 1.3210466300928993e-06, "loss": 0.0006, "step": 93120 }, { "epoch": 1.5738995969343348, "grad_norm": 0.048842594027519226, "learning_rate": 1.3200480224897079e-06, "loss": 0.0006, "step": 93130 }, { "epoch": 1.5740685972131854, "grad_norm": 0.006548158824443817, "learning_rate": 1.3190497350601667e-06, "loss": 0.0005, "step": 93140 }, { "epoch": 1.574237597492036, "grad_norm": 0.027237258851528168, "learning_rate": 1.3180517678911337e-06, "loss": 0.0017, "step": 93150 }, { "epoch": 1.5744065977708863, "grad_norm": 0.016414618119597435, "learning_rate": 1.317054121069436e-06, "loss": 0.0005, "step": 93160 }, { "epoch": 1.5745755980497367, "grad_norm": 0.031219804659485817, "learning_rate": 1.3160567946818752e-06, "loss": 0.0007, "step": 93170 }, { "epoch": 1.5747445983285873, "grad_norm": 0.032161157578229904, "learning_rate": 1.315059788815221e-06, "loss": 0.0006, "step": 93180 }, { "epoch": 1.5749135986074378, "grad_norm": 0.03028140589594841, "learning_rate": 1.3140631035562196e-06, "loss": 0.0006, "step": 93190 }, { "epoch": 1.5750825988862882, "grad_norm": 0.03312603756785393, "learning_rate": 1.313066738991588e-06, "loss": 0.0006, "step": 93200 }, { "epoch": 1.5752515991651386, "grad_norm": 0.0011683525517582893, "learning_rate": 1.3120706952080127e-06, "loss": 0.0004, "step": 93210 }, { "epoch": 1.575420599443989, "grad_norm": 0.01985262706875801, "learning_rate": 1.3110749722921562e-06, "loss": 0.0008, "step": 93220 }, { "epoch": 1.5755895997228395, "grad_norm": 0.010746241547167301, "learning_rate": 1.310079570330649e-06, "loss": 0.0008, "step": 93230 }, { "epoch": 1.57575860000169, "grad_norm": 0.009657425805926323, "learning_rate": 1.309084489410099e-06, "loss": 0.0006, "step": 93240 }, { "epoch": 1.5759276002805405, "grad_norm": 0.006918651517480612, "learning_rate": 1.3080897296170797e-06, "loss": 0.0006, "step": 93250 }, { "epoch": 1.5760966005593908, "grad_norm": 0.017401212826371193, "learning_rate": 1.3070952910381435e-06, "loss": 0.0005, "step": 93260 }, { "epoch": 1.5762656008382414, "grad_norm": 0.012027682736515999, "learning_rate": 1.3061011737598078e-06, "loss": 0.001, "step": 93270 }, { "epoch": 1.576434601117092, "grad_norm": 0.05377326160669327, "learning_rate": 1.3051073778685685e-06, "loss": 0.0006, "step": 93280 }, { "epoch": 1.5766036013959424, "grad_norm": 0.0717761442065239, "learning_rate": 1.3041139034508882e-06, "loss": 0.0008, "step": 93290 }, { "epoch": 1.5767726016747927, "grad_norm": 0.006123771890997887, "learning_rate": 1.303120750593207e-06, "loss": 0.0015, "step": 93300 }, { "epoch": 1.576941601953643, "grad_norm": 0.01824995130300522, "learning_rate": 1.3021279193819303e-06, "loss": 0.001, "step": 93310 }, { "epoch": 1.5771106022324937, "grad_norm": 0.051778171211481094, "learning_rate": 1.301135409903443e-06, "loss": 0.0003, "step": 93320 }, { "epoch": 1.5772796025113442, "grad_norm": 0.02047240547835827, "learning_rate": 1.3001432222440951e-06, "loss": 0.0007, "step": 93330 }, { "epoch": 1.5774486027901946, "grad_norm": 0.15653613209724426, "learning_rate": 1.299151356490213e-06, "loss": 0.0004, "step": 93340 }, { "epoch": 1.577617603069045, "grad_norm": 0.049618061631917953, "learning_rate": 1.2981598127280954e-06, "loss": 0.0011, "step": 93350 }, { "epoch": 1.5777866033478956, "grad_norm": 0.0032934839837253094, "learning_rate": 1.2971685910440086e-06, "loss": 0.0007, "step": 93360 }, { "epoch": 1.5779556036267461, "grad_norm": 0.031259384006261826, "learning_rate": 1.2961776915241946e-06, "loss": 0.0005, "step": 93370 }, { "epoch": 1.5781246039055965, "grad_norm": 0.0658779963850975, "learning_rate": 1.2951871142548684e-06, "loss": 0.0003, "step": 93380 }, { "epoch": 1.5782936041844469, "grad_norm": 0.03829159960150719, "learning_rate": 1.2941968593222122e-06, "loss": 0.0007, "step": 93390 }, { "epoch": 1.5784626044632972, "grad_norm": 0.021253319457173347, "learning_rate": 1.293206926812386e-06, "loss": 0.001, "step": 93400 }, { "epoch": 1.5786316047421478, "grad_norm": 0.00683000311255455, "learning_rate": 1.2922173168115148e-06, "loss": 0.0004, "step": 93410 }, { "epoch": 1.5788006050209984, "grad_norm": 0.08008040487766266, "learning_rate": 1.2912280294057023e-06, "loss": 0.0009, "step": 93420 }, { "epoch": 1.5789696052998488, "grad_norm": 0.03998623043298721, "learning_rate": 1.290239064681022e-06, "loss": 0.001, "step": 93430 }, { "epoch": 1.5791386055786991, "grad_norm": 0.10981569439172745, "learning_rate": 1.2892504227235152e-06, "loss": 0.0011, "step": 93440 }, { "epoch": 1.5793076058575497, "grad_norm": 0.03829672187566757, "learning_rate": 1.2882621036192028e-06, "loss": 0.0008, "step": 93450 }, { "epoch": 1.5794766061364003, "grad_norm": 0.004690766800194979, "learning_rate": 1.2872741074540695e-06, "loss": 0.002, "step": 93460 }, { "epoch": 1.5796456064152506, "grad_norm": 0.006633737590163946, "learning_rate": 1.286286434314079e-06, "loss": 0.0007, "step": 93470 }, { "epoch": 1.579814606694101, "grad_norm": 0.012032300233840942, "learning_rate": 1.2852990842851603e-06, "loss": 0.0005, "step": 93480 }, { "epoch": 1.5799836069729514, "grad_norm": 0.013286178931593895, "learning_rate": 1.2843120574532214e-06, "loss": 0.0008, "step": 93490 }, { "epoch": 1.580152607251802, "grad_norm": 0.026893189176917076, "learning_rate": 1.2833253539041346e-06, "loss": 0.0006, "step": 93500 }, { "epoch": 1.5803216075306525, "grad_norm": 0.06069996580481529, "learning_rate": 1.2823389737237513e-06, "loss": 0.0006, "step": 93510 }, { "epoch": 1.580490607809503, "grad_norm": 0.01458305586129427, "learning_rate": 1.2813529169978883e-06, "loss": 0.0006, "step": 93520 }, { "epoch": 1.5806596080883533, "grad_norm": 0.110781729221344, "learning_rate": 1.2803671838123399e-06, "loss": 0.0006, "step": 93530 }, { "epoch": 1.5808286083672038, "grad_norm": 0.01895264908671379, "learning_rate": 1.2793817742528674e-06, "loss": 0.001, "step": 93540 }, { "epoch": 1.5809976086460544, "grad_norm": 0.1498795449733734, "learning_rate": 1.2783966884052074e-06, "loss": 0.0007, "step": 93550 }, { "epoch": 1.5811666089249048, "grad_norm": 0.05143171176314354, "learning_rate": 1.2774119263550666e-06, "loss": 0.0005, "step": 93560 }, { "epoch": 1.5813356092037552, "grad_norm": 0.009230494499206543, "learning_rate": 1.2764274881881266e-06, "loss": 0.001, "step": 93570 }, { "epoch": 1.5815046094826055, "grad_norm": 0.021424710750579834, "learning_rate": 1.2754433739900346e-06, "loss": 0.0007, "step": 93580 }, { "epoch": 1.581673609761456, "grad_norm": 0.10135804116725922, "learning_rate": 1.2744595838464164e-06, "loss": 0.0012, "step": 93590 }, { "epoch": 1.5818426100403067, "grad_norm": 0.03953375294804573, "learning_rate": 1.2734761178428639e-06, "loss": 0.0008, "step": 93600 }, { "epoch": 1.582011610319157, "grad_norm": 0.015265276655554771, "learning_rate": 1.2724929760649456e-06, "loss": 0.0004, "step": 93610 }, { "epoch": 1.5821806105980074, "grad_norm": 0.03104473650455475, "learning_rate": 1.2715101585981971e-06, "loss": 0.0019, "step": 93620 }, { "epoch": 1.582349610876858, "grad_norm": 0.003955503460019827, "learning_rate": 1.2705276655281312e-06, "loss": 0.0005, "step": 93630 }, { "epoch": 1.5825186111557084, "grad_norm": 0.09750876575708389, "learning_rate": 1.269545496940227e-06, "loss": 0.0006, "step": 93640 }, { "epoch": 1.582687611434559, "grad_norm": 0.028847867622971535, "learning_rate": 1.2685636529199401e-06, "loss": 0.0008, "step": 93650 }, { "epoch": 1.5828566117134093, "grad_norm": 0.08246053010225296, "learning_rate": 1.267582133552694e-06, "loss": 0.0007, "step": 93660 }, { "epoch": 1.5830256119922597, "grad_norm": 0.02705477736890316, "learning_rate": 1.266600938923887e-06, "loss": 0.0013, "step": 93670 }, { "epoch": 1.5831946122711102, "grad_norm": 0.07032588869333267, "learning_rate": 1.2656200691188852e-06, "loss": 0.0004, "step": 93680 }, { "epoch": 1.5833636125499608, "grad_norm": 0.00811270996928215, "learning_rate": 1.2646395242230313e-06, "loss": 0.0005, "step": 93690 }, { "epoch": 1.5835326128288112, "grad_norm": 0.010227940045297146, "learning_rate": 1.2636593043216388e-06, "loss": 0.0006, "step": 93700 }, { "epoch": 1.5837016131076616, "grad_norm": 0.007332955952733755, "learning_rate": 1.2626794094999883e-06, "loss": 0.0003, "step": 93710 }, { "epoch": 1.5838706133865121, "grad_norm": 0.058238618075847626, "learning_rate": 1.2616998398433378e-06, "loss": 0.0008, "step": 93720 }, { "epoch": 1.5840396136653625, "grad_norm": 0.00826265849173069, "learning_rate": 1.2607205954369128e-06, "loss": 0.0004, "step": 93730 }, { "epoch": 1.584208613944213, "grad_norm": 0.012059211730957031, "learning_rate": 1.2597416763659131e-06, "loss": 0.0006, "step": 93740 }, { "epoch": 1.5843776142230634, "grad_norm": 0.016500458121299744, "learning_rate": 1.2587630827155095e-06, "loss": 0.0006, "step": 93750 }, { "epoch": 1.5845466145019138, "grad_norm": 0.11306676268577576, "learning_rate": 1.257784814570846e-06, "loss": 0.0011, "step": 93760 }, { "epoch": 1.5847156147807644, "grad_norm": 0.11525103449821472, "learning_rate": 1.2568068720170335e-06, "loss": 0.0003, "step": 93770 }, { "epoch": 1.584884615059615, "grad_norm": 0.007584443315863609, "learning_rate": 1.2558292551391604e-06, "loss": 0.0009, "step": 93780 }, { "epoch": 1.5850536153384653, "grad_norm": 0.04393519461154938, "learning_rate": 1.2548519640222817e-06, "loss": 0.0009, "step": 93790 }, { "epoch": 1.5852226156173157, "grad_norm": 0.005927392281591892, "learning_rate": 1.2538749987514297e-06, "loss": 0.001, "step": 93800 }, { "epoch": 1.5853916158961663, "grad_norm": 0.04695868119597435, "learning_rate": 1.2528983594116017e-06, "loss": 0.0005, "step": 93810 }, { "epoch": 1.5855606161750166, "grad_norm": 0.11101292073726654, "learning_rate": 1.251922046087773e-06, "loss": 0.0003, "step": 93820 }, { "epoch": 1.5857296164538672, "grad_norm": 0.049037858843803406, "learning_rate": 1.2509460588648841e-06, "loss": 0.001, "step": 93830 }, { "epoch": 1.5858986167327176, "grad_norm": 0.039560019969940186, "learning_rate": 1.2499703978278544e-06, "loss": 0.0004, "step": 93840 }, { "epoch": 1.586067617011568, "grad_norm": 0.052576590329408646, "learning_rate": 1.2489950630615683e-06, "loss": 0.0006, "step": 93850 }, { "epoch": 1.5862366172904185, "grad_norm": 0.0020860591903328896, "learning_rate": 1.2480200546508869e-06, "loss": 0.001, "step": 93860 }, { "epoch": 1.5864056175692691, "grad_norm": 0.022277476266026497, "learning_rate": 1.2470453726806381e-06, "loss": 0.0003, "step": 93870 }, { "epoch": 1.5865746178481195, "grad_norm": 0.0447673462331295, "learning_rate": 1.2460710172356266e-06, "loss": 0.001, "step": 93880 }, { "epoch": 1.5867436181269698, "grad_norm": 0.01280398853123188, "learning_rate": 1.2450969884006237e-06, "loss": 0.0006, "step": 93890 }, { "epoch": 1.5869126184058202, "grad_norm": 0.02612018957734108, "learning_rate": 1.2441232862603776e-06, "loss": 0.0009, "step": 93900 }, { "epoch": 1.5870816186846708, "grad_norm": 0.051986292004585266, "learning_rate": 1.2431499108996009e-06, "loss": 0.0018, "step": 93910 }, { "epoch": 1.5872506189635214, "grad_norm": 0.04087941721081734, "learning_rate": 1.2421768624029845e-06, "loss": 0.0005, "step": 93920 }, { "epoch": 1.5874196192423717, "grad_norm": 0.02893008291721344, "learning_rate": 1.2412041408551885e-06, "loss": 0.0009, "step": 93930 }, { "epoch": 1.587588619521222, "grad_norm": 0.024935085326433182, "learning_rate": 1.2402317463408454e-06, "loss": 0.0003, "step": 93940 }, { "epoch": 1.5877576198000727, "grad_norm": 0.024023279547691345, "learning_rate": 1.2392596789445554e-06, "loss": 0.0006, "step": 93950 }, { "epoch": 1.5879266200789233, "grad_norm": 0.07324106246232986, "learning_rate": 1.2382879387508944e-06, "loss": 0.0007, "step": 93960 }, { "epoch": 1.5880956203577736, "grad_norm": 0.04412218555808067, "learning_rate": 1.2373165258444098e-06, "loss": 0.0005, "step": 93970 }, { "epoch": 1.588264620636624, "grad_norm": 0.021460387855768204, "learning_rate": 1.236345440309617e-06, "loss": 0.0006, "step": 93980 }, { "epoch": 1.5884336209154744, "grad_norm": 0.04855572059750557, "learning_rate": 1.2353746822310076e-06, "loss": 0.0006, "step": 93990 }, { "epoch": 1.588602621194325, "grad_norm": 0.3658362030982971, "learning_rate": 1.2344042516930399e-06, "loss": 0.001, "step": 94000 }, { "epoch": 1.5887716214731755, "grad_norm": 0.009330356493592262, "learning_rate": 1.2334341487801477e-06, "loss": 0.0012, "step": 94010 }, { "epoch": 1.5889406217520259, "grad_norm": 0.004394082818180323, "learning_rate": 1.2324643735767328e-06, "loss": 0.0005, "step": 94020 }, { "epoch": 1.5891096220308762, "grad_norm": 0.052747923880815506, "learning_rate": 1.2314949261671732e-06, "loss": 0.0004, "step": 94030 }, { "epoch": 1.5892786223097268, "grad_norm": 0.01922859065234661, "learning_rate": 1.2305258066358123e-06, "loss": 0.0007, "step": 94040 }, { "epoch": 1.5894476225885774, "grad_norm": 0.06810221076011658, "learning_rate": 1.2295570150669712e-06, "loss": 0.0014, "step": 94050 }, { "epoch": 1.5896166228674278, "grad_norm": 0.09049668163061142, "learning_rate": 1.2285885515449364e-06, "loss": 0.0006, "step": 94060 }, { "epoch": 1.5897856231462781, "grad_norm": 0.02533421292901039, "learning_rate": 1.2276204161539722e-06, "loss": 0.0006, "step": 94070 }, { "epoch": 1.5899546234251285, "grad_norm": 0.02883775718510151, "learning_rate": 1.2266526089783077e-06, "loss": 0.0023, "step": 94080 }, { "epoch": 1.590123623703979, "grad_norm": 0.08370274305343628, "learning_rate": 1.2256851301021499e-06, "loss": 0.0007, "step": 94090 }, { "epoch": 1.5902926239828297, "grad_norm": 0.03740803152322769, "learning_rate": 1.224717979609672e-06, "loss": 0.0005, "step": 94100 }, { "epoch": 1.59046162426168, "grad_norm": 0.005330103915184736, "learning_rate": 1.2237511575850209e-06, "loss": 0.0001, "step": 94110 }, { "epoch": 1.5906306245405304, "grad_norm": 0.054041408002376556, "learning_rate": 1.222784664112317e-06, "loss": 0.0019, "step": 94120 }, { "epoch": 1.590799624819381, "grad_norm": 0.04788007214665413, "learning_rate": 1.2218184992756472e-06, "loss": 0.0008, "step": 94130 }, { "epoch": 1.5909686250982316, "grad_norm": 0.10085374116897583, "learning_rate": 1.2208526631590727e-06, "loss": 0.0011, "step": 94140 }, { "epoch": 1.591137625377082, "grad_norm": 0.011281809769570827, "learning_rate": 1.2198871558466296e-06, "loss": 0.0015, "step": 94150 }, { "epoch": 1.5913066256559323, "grad_norm": 0.01467775460332632, "learning_rate": 1.2189219774223165e-06, "loss": 0.0004, "step": 94160 }, { "epoch": 1.5914756259347826, "grad_norm": 0.06958004832267761, "learning_rate": 1.2179571279701137e-06, "loss": 0.0006, "step": 94170 }, { "epoch": 1.5916446262136332, "grad_norm": 0.07026882469654083, "learning_rate": 1.216992607573963e-06, "loss": 0.0008, "step": 94180 }, { "epoch": 1.5918136264924838, "grad_norm": 0.0071510751731693745, "learning_rate": 1.2160284163177844e-06, "loss": 0.0009, "step": 94190 }, { "epoch": 1.5919826267713342, "grad_norm": 0.007311527617275715, "learning_rate": 1.215064554285469e-06, "loss": 0.001, "step": 94200 }, { "epoch": 1.5921516270501845, "grad_norm": 0.025926044210791588, "learning_rate": 1.2141010215608745e-06, "loss": 0.0006, "step": 94210 }, { "epoch": 1.5923206273290351, "grad_norm": 0.04072462022304535, "learning_rate": 1.2131378182278352e-06, "loss": 0.0005, "step": 94220 }, { "epoch": 1.5924896276078857, "grad_norm": 0.04394914582371712, "learning_rate": 1.2121749443701525e-06, "loss": 0.0003, "step": 94230 }, { "epoch": 1.592658627886736, "grad_norm": 0.02323491871356964, "learning_rate": 1.2112124000716035e-06, "loss": 0.0008, "step": 94240 }, { "epoch": 1.5928276281655864, "grad_norm": 0.04634846746921539, "learning_rate": 1.2102501854159316e-06, "loss": 0.0007, "step": 94250 }, { "epoch": 1.5929966284444368, "grad_norm": 0.016460781916975975, "learning_rate": 1.209288300486856e-06, "loss": 0.0005, "step": 94260 }, { "epoch": 1.5931656287232874, "grad_norm": 0.028184879571199417, "learning_rate": 1.2083267453680636e-06, "loss": 0.0006, "step": 94270 }, { "epoch": 1.593334629002138, "grad_norm": 0.004588103853166103, "learning_rate": 1.2073655201432171e-06, "loss": 0.0005, "step": 94280 }, { "epoch": 1.5935036292809883, "grad_norm": 0.0014949159231036901, "learning_rate": 1.2064046248959443e-06, "loss": 0.0005, "step": 94290 }, { "epoch": 1.5936726295598387, "grad_norm": 0.03530004993081093, "learning_rate": 1.2054440597098515e-06, "loss": 0.0007, "step": 94300 }, { "epoch": 1.5938416298386893, "grad_norm": 0.007246180437505245, "learning_rate": 1.2044838246685087e-06, "loss": 0.0005, "step": 94310 }, { "epoch": 1.5940106301175399, "grad_norm": 0.07312962412834167, "learning_rate": 1.2035239198554627e-06, "loss": 0.0011, "step": 94320 }, { "epoch": 1.5941796303963902, "grad_norm": 0.00831496249884367, "learning_rate": 1.2025643453542307e-06, "loss": 0.0007, "step": 94330 }, { "epoch": 1.5943486306752406, "grad_norm": 0.0062678721733391285, "learning_rate": 1.2016051012483003e-06, "loss": 0.0008, "step": 94340 }, { "epoch": 1.594517630954091, "grad_norm": 0.04183892160654068, "learning_rate": 1.200646187621129e-06, "loss": 0.0011, "step": 94350 }, { "epoch": 1.5946866312329415, "grad_norm": 0.04647721350193024, "learning_rate": 1.1996876045561483e-06, "loss": 0.0006, "step": 94360 }, { "epoch": 1.594855631511792, "grad_norm": 0.03425600752234459, "learning_rate": 1.1987293521367581e-06, "loss": 0.0007, "step": 94370 }, { "epoch": 1.5950246317906425, "grad_norm": 0.0020859267096966505, "learning_rate": 1.1977714304463333e-06, "loss": 0.0011, "step": 94380 }, { "epoch": 1.5951936320694928, "grad_norm": 0.0032163539435714483, "learning_rate": 1.1968138395682143e-06, "loss": 0.0007, "step": 94390 }, { "epoch": 1.5953626323483434, "grad_norm": 0.028047826141119003, "learning_rate": 1.1958565795857203e-06, "loss": 0.0005, "step": 94400 }, { "epoch": 1.595531632627194, "grad_norm": 0.04284824803471565, "learning_rate": 1.1948996505821335e-06, "loss": 0.0008, "step": 94410 }, { "epoch": 1.5957006329060444, "grad_norm": 0.012573568150401115, "learning_rate": 1.1939430526407148e-06, "loss": 0.0005, "step": 94420 }, { "epoch": 1.5958696331848947, "grad_norm": 0.049135226756334305, "learning_rate": 1.1929867858446897e-06, "loss": 0.0008, "step": 94430 }, { "epoch": 1.596038633463745, "grad_norm": 0.02598082646727562, "learning_rate": 1.1920308502772609e-06, "loss": 0.0021, "step": 94440 }, { "epoch": 1.5962076337425957, "grad_norm": 0.046017665416002274, "learning_rate": 1.1910752460215973e-06, "loss": 0.0004, "step": 94450 }, { "epoch": 1.5963766340214463, "grad_norm": 0.02556859329342842, "learning_rate": 1.1901199731608416e-06, "loss": 0.0008, "step": 94460 }, { "epoch": 1.5965456343002966, "grad_norm": 0.018384618684649467, "learning_rate": 1.189165031778109e-06, "loss": 0.0011, "step": 94470 }, { "epoch": 1.596714634579147, "grad_norm": 0.094480961561203, "learning_rate": 1.1882104219564812e-06, "loss": 0.0018, "step": 94480 }, { "epoch": 1.5968836348579976, "grad_norm": 0.02965855970978737, "learning_rate": 1.1872561437790165e-06, "loss": 0.0006, "step": 94490 }, { "epoch": 1.5970526351368481, "grad_norm": 0.09040623903274536, "learning_rate": 1.1863021973287392e-06, "loss": 0.0006, "step": 94500 }, { "epoch": 1.5972216354156985, "grad_norm": 0.044634588062763214, "learning_rate": 1.1853485826886485e-06, "loss": 0.0009, "step": 94510 }, { "epoch": 1.5973906356945489, "grad_norm": 0.00631709024310112, "learning_rate": 1.184395299941713e-06, "loss": 0.0005, "step": 94520 }, { "epoch": 1.5975596359733992, "grad_norm": 0.012562483549118042, "learning_rate": 1.1834423491708757e-06, "loss": 0.0007, "step": 94530 }, { "epoch": 1.5977286362522498, "grad_norm": 0.08589354902505875, "learning_rate": 1.1824897304590437e-06, "loss": 0.0008, "step": 94540 }, { "epoch": 1.5978976365311004, "grad_norm": 0.014393548481166363, "learning_rate": 1.181537443889103e-06, "loss": 0.0006, "step": 94550 }, { "epoch": 1.5980666368099508, "grad_norm": 0.0159346554428339, "learning_rate": 1.180585489543904e-06, "loss": 0.0006, "step": 94560 }, { "epoch": 1.5982356370888011, "grad_norm": 0.02267078123986721, "learning_rate": 1.1796338675062746e-06, "loss": 0.0012, "step": 94570 }, { "epoch": 1.5984046373676517, "grad_norm": 0.04347150772809982, "learning_rate": 1.1786825778590073e-06, "loss": 0.0006, "step": 94580 }, { "epoch": 1.598573637646502, "grad_norm": 0.014009674079716206, "learning_rate": 1.1777316206848722e-06, "loss": 0.0007, "step": 94590 }, { "epoch": 1.5987426379253526, "grad_norm": 0.04441831260919571, "learning_rate": 1.1767809960666033e-06, "loss": 0.0002, "step": 94600 }, { "epoch": 1.598911638204203, "grad_norm": 0.018436765298247337, "learning_rate": 1.1758307040869132e-06, "loss": 0.0009, "step": 94610 }, { "epoch": 1.5990806384830534, "grad_norm": 0.14143379032611847, "learning_rate": 1.1748807448284793e-06, "loss": 0.0008, "step": 94620 }, { "epoch": 1.599249638761904, "grad_norm": 0.056121762841939926, "learning_rate": 1.173931118373955e-06, "loss": 0.0004, "step": 94630 }, { "epoch": 1.5994186390407545, "grad_norm": 0.028397779911756516, "learning_rate": 1.1729818248059599e-06, "loss": 0.0002, "step": 94640 }, { "epoch": 1.599587639319605, "grad_norm": 0.06756635010242462, "learning_rate": 1.1720328642070893e-06, "loss": 0.0005, "step": 94650 }, { "epoch": 1.5997566395984553, "grad_norm": 0.002616355661302805, "learning_rate": 1.1710842366599057e-06, "loss": 0.0007, "step": 94660 }, { "epoch": 1.5999256398773058, "grad_norm": 0.037892259657382965, "learning_rate": 1.170135942246946e-06, "loss": 0.0004, "step": 94670 }, { "epoch": 1.6000946401561562, "grad_norm": 0.004484300035983324, "learning_rate": 1.1691879810507139e-06, "loss": 0.0009, "step": 94680 }, { "epoch": 1.6002636404350068, "grad_norm": 0.05947664752602577, "learning_rate": 1.1682403531536885e-06, "loss": 0.0005, "step": 94690 }, { "epoch": 1.6004326407138572, "grad_norm": 0.10121885687112808, "learning_rate": 1.1672930586383175e-06, "loss": 0.0006, "step": 94700 }, { "epoch": 1.6006016409927075, "grad_norm": 0.0022841356694698334, "learning_rate": 1.1663460975870216e-06, "loss": 0.001, "step": 94710 }, { "epoch": 1.600770641271558, "grad_norm": 0.00949120707809925, "learning_rate": 1.1653994700821885e-06, "loss": 0.0004, "step": 94720 }, { "epoch": 1.6009396415504087, "grad_norm": 0.01416967436671257, "learning_rate": 1.1644531762061805e-06, "loss": 0.0009, "step": 94730 }, { "epoch": 1.601108641829259, "grad_norm": 0.025021463632583618, "learning_rate": 1.1635072160413313e-06, "loss": 0.0006, "step": 94740 }, { "epoch": 1.6012776421081094, "grad_norm": 0.10143998265266418, "learning_rate": 1.162561589669941e-06, "loss": 0.001, "step": 94750 }, { "epoch": 1.6014466423869598, "grad_norm": 0.04918771609663963, "learning_rate": 1.1616162971742867e-06, "loss": 0.0005, "step": 94760 }, { "epoch": 1.6016156426658104, "grad_norm": 0.026630748063325882, "learning_rate": 1.1606713386366109e-06, "loss": 0.0005, "step": 94770 }, { "epoch": 1.601784642944661, "grad_norm": 0.010483386926352978, "learning_rate": 1.1597267141391315e-06, "loss": 0.0006, "step": 94780 }, { "epoch": 1.6019536432235113, "grad_norm": 0.10334274172782898, "learning_rate": 1.1587824237640334e-06, "loss": 0.0011, "step": 94790 }, { "epoch": 1.6021226435023617, "grad_norm": 0.03335344418883324, "learning_rate": 1.1578384675934766e-06, "loss": 0.001, "step": 94800 }, { "epoch": 1.6022916437812122, "grad_norm": 0.01131820771843195, "learning_rate": 1.1568948457095874e-06, "loss": 0.0006, "step": 94810 }, { "epoch": 1.6024606440600628, "grad_norm": 0.020446304231882095, "learning_rate": 1.1559515581944687e-06, "loss": 0.001, "step": 94820 }, { "epoch": 1.6026296443389132, "grad_norm": 0.06710178405046463, "learning_rate": 1.1550086051301878e-06, "loss": 0.0006, "step": 94830 }, { "epoch": 1.6027986446177636, "grad_norm": 0.029595445841550827, "learning_rate": 1.1540659865987891e-06, "loss": 0.0007, "step": 94840 }, { "epoch": 1.602967644896614, "grad_norm": 0.0384245291352272, "learning_rate": 1.1531237026822817e-06, "loss": 0.0014, "step": 94850 }, { "epoch": 1.6031366451754645, "grad_norm": 0.00988872442394495, "learning_rate": 1.1521817534626527e-06, "loss": 0.0013, "step": 94860 }, { "epoch": 1.603305645454315, "grad_norm": 0.011559010483324528, "learning_rate": 1.1512401390218525e-06, "loss": 0.0008, "step": 94870 }, { "epoch": 1.6034746457331654, "grad_norm": 0.09325367212295532, "learning_rate": 1.150298859441808e-06, "loss": 0.0007, "step": 94880 }, { "epoch": 1.6036436460120158, "grad_norm": 0.019055569544434547, "learning_rate": 1.1493579148044149e-06, "loss": 0.0004, "step": 94890 }, { "epoch": 1.6038126462908664, "grad_norm": 0.058484043926000595, "learning_rate": 1.1484173051915415e-06, "loss": 0.001, "step": 94900 }, { "epoch": 1.603981646569717, "grad_norm": 0.05305975303053856, "learning_rate": 1.1474770306850226e-06, "loss": 0.0005, "step": 94910 }, { "epoch": 1.6041506468485673, "grad_norm": 0.015423264354467392, "learning_rate": 1.146537091366669e-06, "loss": 0.0004, "step": 94920 }, { "epoch": 1.6043196471274177, "grad_norm": 0.012684252113103867, "learning_rate": 1.1455974873182574e-06, "loss": 0.0018, "step": 94930 }, { "epoch": 1.604488647406268, "grad_norm": 0.0017607345944270492, "learning_rate": 1.144658218621541e-06, "loss": 0.0004, "step": 94940 }, { "epoch": 1.6046576476851186, "grad_norm": 0.0076165455393493176, "learning_rate": 1.143719285358238e-06, "loss": 0.0017, "step": 94950 }, { "epoch": 1.6048266479639692, "grad_norm": 0.009829283691942692, "learning_rate": 1.1427806876100412e-06, "loss": 0.0003, "step": 94960 }, { "epoch": 1.6049956482428196, "grad_norm": 0.008983315899968147, "learning_rate": 1.1418424254586146e-06, "loss": 0.0004, "step": 94970 }, { "epoch": 1.60516464852167, "grad_norm": 0.002080840291455388, "learning_rate": 1.1409044989855889e-06, "loss": 0.0006, "step": 94980 }, { "epoch": 1.6053336488005205, "grad_norm": 0.021074660122394562, "learning_rate": 1.1399669082725707e-06, "loss": 0.0009, "step": 94990 }, { "epoch": 1.6055026490793711, "grad_norm": 0.12969990074634552, "learning_rate": 1.1390296534011325e-06, "loss": 0.0013, "step": 95000 }, { "epoch": 1.6056716493582215, "grad_norm": 0.08277317136526108, "learning_rate": 1.1380927344528226e-06, "loss": 0.0008, "step": 95010 }, { "epoch": 1.6058406496370718, "grad_norm": 0.010615160688757896, "learning_rate": 1.1371561515091544e-06, "loss": 0.0004, "step": 95020 }, { "epoch": 1.6060096499159222, "grad_norm": 0.06074380502104759, "learning_rate": 1.1362199046516193e-06, "loss": 0.0011, "step": 95030 }, { "epoch": 1.6061786501947728, "grad_norm": 0.07141372561454773, "learning_rate": 1.1352839939616706e-06, "loss": 0.0005, "step": 95040 }, { "epoch": 1.6063476504736234, "grad_norm": 0.02140755206346512, "learning_rate": 1.1343484195207411e-06, "loss": 0.0006, "step": 95050 }, { "epoch": 1.6065166507524737, "grad_norm": 0.1394871026277542, "learning_rate": 1.1334131814102272e-06, "loss": 0.001, "step": 95060 }, { "epoch": 1.606685651031324, "grad_norm": 0.05843876674771309, "learning_rate": 1.1324782797115007e-06, "loss": 0.0005, "step": 95070 }, { "epoch": 1.6068546513101747, "grad_norm": 0.07482954859733582, "learning_rate": 1.1315437145059038e-06, "loss": 0.0007, "step": 95080 }, { "epoch": 1.6070236515890253, "grad_norm": 0.0020129596814513206, "learning_rate": 1.1306094858747458e-06, "loss": 0.0004, "step": 95090 }, { "epoch": 1.6071926518678756, "grad_norm": 0.013063608668744564, "learning_rate": 1.1296755938993097e-06, "loss": 0.001, "step": 95100 }, { "epoch": 1.607361652146726, "grad_norm": 0.040359411388635635, "learning_rate": 1.1287420386608506e-06, "loss": 0.0004, "step": 95110 }, { "epoch": 1.6075306524255764, "grad_norm": 0.0003978550957981497, "learning_rate": 1.127808820240589e-06, "loss": 0.0004, "step": 95120 }, { "epoch": 1.607699652704427, "grad_norm": 0.016882719472050667, "learning_rate": 1.1268759387197232e-06, "loss": 0.0003, "step": 95130 }, { "epoch": 1.6078686529832775, "grad_norm": 0.051557507365942, "learning_rate": 1.125943394179415e-06, "loss": 0.0002, "step": 95140 }, { "epoch": 1.6080376532621279, "grad_norm": 0.004775538574904203, "learning_rate": 1.1250111867008028e-06, "loss": 0.0007, "step": 95150 }, { "epoch": 1.6082066535409782, "grad_norm": 0.03052687458693981, "learning_rate": 1.124079316364991e-06, "loss": 0.0004, "step": 95160 }, { "epoch": 1.6083756538198288, "grad_norm": 0.006776569876819849, "learning_rate": 1.1231477832530597e-06, "loss": 0.0004, "step": 95170 }, { "epoch": 1.6085446540986794, "grad_norm": 0.01142202503979206, "learning_rate": 1.1222165874460528e-06, "loss": 0.0002, "step": 95180 }, { "epoch": 1.6087136543775298, "grad_norm": 0.21770192682743073, "learning_rate": 1.121285729024993e-06, "loss": 0.0007, "step": 95190 }, { "epoch": 1.6088826546563801, "grad_norm": 0.014671850949525833, "learning_rate": 1.1203552080708662e-06, "loss": 0.0007, "step": 95200 }, { "epoch": 1.6090516549352305, "grad_norm": 0.09682852029800415, "learning_rate": 1.1194250246646333e-06, "loss": 0.0007, "step": 95210 }, { "epoch": 1.609220655214081, "grad_norm": 0.014746912755072117, "learning_rate": 1.1184951788872266e-06, "loss": 0.0016, "step": 95220 }, { "epoch": 1.6093896554929317, "grad_norm": 0.035342611372470856, "learning_rate": 1.1175656708195443e-06, "loss": 0.0004, "step": 95230 }, { "epoch": 1.609558655771782, "grad_norm": 0.02012399584054947, "learning_rate": 1.1166365005424612e-06, "loss": 0.0004, "step": 95240 }, { "epoch": 1.6097276560506324, "grad_norm": 0.030961062759160995, "learning_rate": 1.1157076681368162e-06, "loss": 0.001, "step": 95250 }, { "epoch": 1.609896656329483, "grad_norm": 0.05708079785108566, "learning_rate": 1.1147791736834252e-06, "loss": 0.0007, "step": 95260 }, { "epoch": 1.6100656566083336, "grad_norm": 0.02694096788764, "learning_rate": 1.113851017263069e-06, "loss": 0.0005, "step": 95270 }, { "epoch": 1.610234656887184, "grad_norm": 0.09392502158880234, "learning_rate": 1.1129231989565031e-06, "loss": 0.0007, "step": 95280 }, { "epoch": 1.6104036571660343, "grad_norm": 0.007937021553516388, "learning_rate": 1.1119957188444525e-06, "loss": 0.0005, "step": 95290 }, { "epoch": 1.6105726574448846, "grad_norm": 0.0383593924343586, "learning_rate": 1.111068577007613e-06, "loss": 0.0004, "step": 95300 }, { "epoch": 1.6107416577237352, "grad_norm": 0.014574541710317135, "learning_rate": 1.110141773526649e-06, "loss": 0.0006, "step": 95310 }, { "epoch": 1.6109106580025858, "grad_norm": 0.0773870125412941, "learning_rate": 1.1092153084821982e-06, "loss": 0.0005, "step": 95320 }, { "epoch": 1.6110796582814362, "grad_norm": 0.013775880448520184, "learning_rate": 1.1082891819548658e-06, "loss": 0.0004, "step": 95330 }, { "epoch": 1.6112486585602865, "grad_norm": 0.00031712432974018157, "learning_rate": 1.1073633940252316e-06, "loss": 0.0003, "step": 95340 }, { "epoch": 1.6114176588391371, "grad_norm": 0.03490206226706505, "learning_rate": 1.106437944773841e-06, "loss": 0.0014, "step": 95350 }, { "epoch": 1.6115866591179877, "grad_norm": 0.001340964576229453, "learning_rate": 1.1055128342812154e-06, "loss": 0.0005, "step": 95360 }, { "epoch": 1.611755659396838, "grad_norm": 0.1313982903957367, "learning_rate": 1.104588062627841e-06, "loss": 0.0006, "step": 95370 }, { "epoch": 1.6119246596756884, "grad_norm": 0.08295604586601257, "learning_rate": 1.1036636298941805e-06, "loss": 0.0004, "step": 95380 }, { "epoch": 1.6120936599545388, "grad_norm": 0.04703642800450325, "learning_rate": 1.1027395361606609e-06, "loss": 0.0003, "step": 95390 }, { "epoch": 1.6122626602333894, "grad_norm": 0.030354078859090805, "learning_rate": 1.1018157815076857e-06, "loss": 0.0007, "step": 95400 }, { "epoch": 1.61243166051224, "grad_norm": 0.002051990944892168, "learning_rate": 1.1008923660156228e-06, "loss": 0.0009, "step": 95410 }, { "epoch": 1.6126006607910903, "grad_norm": 0.015003432519733906, "learning_rate": 1.0999692897648173e-06, "loss": 0.0004, "step": 95420 }, { "epoch": 1.6127696610699407, "grad_norm": 0.0902337059378624, "learning_rate": 1.0990465528355788e-06, "loss": 0.0009, "step": 95430 }, { "epoch": 1.6129386613487913, "grad_norm": 0.2827770709991455, "learning_rate": 1.0981241553081916e-06, "loss": 0.0011, "step": 95440 }, { "epoch": 1.6131076616276416, "grad_norm": 0.04656357318162918, "learning_rate": 1.0972020972629067e-06, "loss": 0.0007, "step": 95450 }, { "epoch": 1.6132766619064922, "grad_norm": 0.009094469249248505, "learning_rate": 1.0962803787799485e-06, "loss": 0.0006, "step": 95460 }, { "epoch": 1.6134456621853426, "grad_norm": 0.005886971019208431, "learning_rate": 1.095358999939512e-06, "loss": 0.0003, "step": 95470 }, { "epoch": 1.613614662464193, "grad_norm": 0.036646418273448944, "learning_rate": 1.0944379608217604e-06, "loss": 0.0009, "step": 95480 }, { "epoch": 1.6137836627430435, "grad_norm": 0.010496441274881363, "learning_rate": 1.0935172615068307e-06, "loss": 0.0006, "step": 95490 }, { "epoch": 1.613952663021894, "grad_norm": 0.047105688601732254, "learning_rate": 1.092596902074825e-06, "loss": 0.001, "step": 95500 }, { "epoch": 1.6141216633007445, "grad_norm": 0.09071959555149078, "learning_rate": 1.0916768826058222e-06, "loss": 0.0006, "step": 95510 }, { "epoch": 1.6142906635795948, "grad_norm": 0.019688261672854424, "learning_rate": 1.0907572031798652e-06, "loss": 0.0011, "step": 95520 }, { "epoch": 1.6144596638584454, "grad_norm": 0.029169324785470963, "learning_rate": 1.089837863876974e-06, "loss": 0.0005, "step": 95530 }, { "epoch": 1.6146286641372958, "grad_norm": 0.035156168043613434, "learning_rate": 1.088918864777132e-06, "loss": 0.0007, "step": 95540 }, { "epoch": 1.6147976644161464, "grad_norm": 0.0048329150304198265, "learning_rate": 1.0880002059603e-06, "loss": 0.0003, "step": 95550 }, { "epoch": 1.6149666646949967, "grad_norm": 0.028820117935538292, "learning_rate": 1.0870818875064033e-06, "loss": 0.0006, "step": 95560 }, { "epoch": 1.615135664973847, "grad_norm": 0.04869238659739494, "learning_rate": 1.0861639094953418e-06, "loss": 0.0006, "step": 95570 }, { "epoch": 1.6153046652526977, "grad_norm": 0.13940592110157013, "learning_rate": 1.0852462720069818e-06, "loss": 0.0023, "step": 95580 }, { "epoch": 1.6154736655315483, "grad_norm": 0.027209477499127388, "learning_rate": 1.084328975121165e-06, "loss": 0.0005, "step": 95590 }, { "epoch": 1.6156426658103986, "grad_norm": 0.011152585968375206, "learning_rate": 1.0834120189176978e-06, "loss": 0.0014, "step": 95600 }, { "epoch": 1.615811666089249, "grad_norm": 0.03328166902065277, "learning_rate": 1.0824954034763624e-06, "loss": 0.0009, "step": 95610 }, { "epoch": 1.6159806663680996, "grad_norm": 0.034088313579559326, "learning_rate": 1.0815791288769061e-06, "loss": 0.0005, "step": 95620 }, { "epoch": 1.61614966664695, "grad_norm": 0.025707952678203583, "learning_rate": 1.0806631951990526e-06, "loss": 0.0006, "step": 95630 }, { "epoch": 1.6163186669258005, "grad_norm": 0.003974339924752712, "learning_rate": 1.0797476025224896e-06, "loss": 0.0003, "step": 95640 }, { "epoch": 1.6164876672046509, "grad_norm": 0.09770644456148148, "learning_rate": 1.078832350926879e-06, "loss": 0.0006, "step": 95650 }, { "epoch": 1.6166566674835012, "grad_norm": 0.06613285094499588, "learning_rate": 1.077917440491852e-06, "loss": 0.0016, "step": 95660 }, { "epoch": 1.6168256677623518, "grad_norm": 0.05171317607164383, "learning_rate": 1.077002871297012e-06, "loss": 0.0006, "step": 95670 }, { "epoch": 1.6169946680412024, "grad_norm": 0.004788296762853861, "learning_rate": 1.0760886434219287e-06, "loss": 0.0004, "step": 95680 }, { "epoch": 1.6171636683200528, "grad_norm": 0.01328999549150467, "learning_rate": 1.0751747569461462e-06, "loss": 0.0006, "step": 95690 }, { "epoch": 1.6173326685989031, "grad_norm": 0.047808896750211716, "learning_rate": 1.0742612119491752e-06, "loss": 0.0009, "step": 95700 }, { "epoch": 1.6175016688777535, "grad_norm": 0.02984941564500332, "learning_rate": 1.0733480085105002e-06, "loss": 0.0004, "step": 95710 }, { "epoch": 1.617670669156604, "grad_norm": 0.05902513116598129, "learning_rate": 1.0724351467095723e-06, "loss": 0.0011, "step": 95720 }, { "epoch": 1.6178396694354547, "grad_norm": 0.05337316915392876, "learning_rate": 1.071522626625816e-06, "loss": 0.0006, "step": 95730 }, { "epoch": 1.618008669714305, "grad_norm": 0.04958257079124451, "learning_rate": 1.070610448338627e-06, "loss": 0.0003, "step": 95740 }, { "epoch": 1.6181776699931554, "grad_norm": 0.06459430605173111, "learning_rate": 1.0696986119273656e-06, "loss": 0.0004, "step": 95750 }, { "epoch": 1.618346670272006, "grad_norm": 0.0003064874035771936, "learning_rate": 1.0687871174713688e-06, "loss": 0.0008, "step": 95760 }, { "epoch": 1.6185156705508565, "grad_norm": 0.04987462982535362, "learning_rate": 1.0678759650499387e-06, "loss": 0.0009, "step": 95770 }, { "epoch": 1.618684670829707, "grad_norm": 0.03283727169036865, "learning_rate": 1.0669651547423527e-06, "loss": 0.0008, "step": 95780 }, { "epoch": 1.6188536711085573, "grad_norm": 0.01741410419344902, "learning_rate": 1.0660546866278532e-06, "loss": 0.0009, "step": 95790 }, { "epoch": 1.6190226713874076, "grad_norm": 0.02572944574058056, "learning_rate": 1.0651445607856576e-06, "loss": 0.0006, "step": 95800 }, { "epoch": 1.6191916716662582, "grad_norm": 0.006492604501545429, "learning_rate": 1.064234777294948e-06, "loss": 0.0007, "step": 95810 }, { "epoch": 1.6193606719451088, "grad_norm": 0.013690764084458351, "learning_rate": 1.0633253362348838e-06, "loss": 0.0003, "step": 95820 }, { "epoch": 1.6195296722239592, "grad_norm": 0.06615430116653442, "learning_rate": 1.062416237684588e-06, "loss": 0.0006, "step": 95830 }, { "epoch": 1.6196986725028095, "grad_norm": 0.025486761704087257, "learning_rate": 1.061507481723157e-06, "loss": 0.0004, "step": 95840 }, { "epoch": 1.61986767278166, "grad_norm": 0.0018564617494121194, "learning_rate": 1.060599068429659e-06, "loss": 0.001, "step": 95850 }, { "epoch": 1.6200366730605107, "grad_norm": 0.12936896085739136, "learning_rate": 1.0596909978831276e-06, "loss": 0.0007, "step": 95860 }, { "epoch": 1.620205673339361, "grad_norm": 0.03564248979091644, "learning_rate": 1.058783270162571e-06, "loss": 0.0007, "step": 95870 }, { "epoch": 1.6203746736182114, "grad_norm": 0.010401569306850433, "learning_rate": 1.0578758853469662e-06, "loss": 0.0008, "step": 95880 }, { "epoch": 1.6205436738970618, "grad_norm": 0.01914243958890438, "learning_rate": 1.0569688435152586e-06, "loss": 0.0006, "step": 95890 }, { "epoch": 1.6207126741759124, "grad_norm": 0.023274172097444534, "learning_rate": 1.056062144746367e-06, "loss": 0.0006, "step": 95900 }, { "epoch": 1.620881674454763, "grad_norm": 0.010225529782474041, "learning_rate": 1.0551557891191766e-06, "loss": 0.0012, "step": 95910 }, { "epoch": 1.6210506747336133, "grad_norm": 0.016484683379530907, "learning_rate": 1.054249776712547e-06, "loss": 0.0005, "step": 95920 }, { "epoch": 1.6212196750124637, "grad_norm": 0.1197252869606018, "learning_rate": 1.053344107605303e-06, "loss": 0.0014, "step": 95930 }, { "epoch": 1.6213886752913143, "grad_norm": 0.09648619592189789, "learning_rate": 1.0524387818762448e-06, "loss": 0.0008, "step": 95940 }, { "epoch": 1.6215576755701648, "grad_norm": 0.0543203242123127, "learning_rate": 1.0515337996041381e-06, "loss": 0.0004, "step": 95950 }, { "epoch": 1.6217266758490152, "grad_norm": 0.014883480966091156, "learning_rate": 1.0506291608677226e-06, "loss": 0.0007, "step": 95960 }, { "epoch": 1.6218956761278656, "grad_norm": 0.01520876307040453, "learning_rate": 1.0497248657457038e-06, "loss": 0.0003, "step": 95970 }, { "epoch": 1.622064676406716, "grad_norm": 0.017585331574082375, "learning_rate": 1.0488209143167616e-06, "loss": 0.0005, "step": 95980 }, { "epoch": 1.6222336766855665, "grad_norm": 0.007562616840004921, "learning_rate": 1.047917306659545e-06, "loss": 0.001, "step": 95990 }, { "epoch": 1.622402676964417, "grad_norm": 0.0027630627155303955, "learning_rate": 1.0470140428526693e-06, "loss": 0.0006, "step": 96000 }, { "epoch": 1.6225716772432675, "grad_norm": 0.016282545402646065, "learning_rate": 1.0461111229747263e-06, "loss": 0.0016, "step": 96010 }, { "epoch": 1.6227406775221178, "grad_norm": 0.0015029292553663254, "learning_rate": 1.0452085471042705e-06, "loss": 0.0005, "step": 96020 }, { "epoch": 1.6229096778009684, "grad_norm": 0.05951765552163124, "learning_rate": 1.0443063153198346e-06, "loss": 0.0018, "step": 96030 }, { "epoch": 1.623078678079819, "grad_norm": 0.019307712092995644, "learning_rate": 1.0434044276999138e-06, "loss": 0.0005, "step": 96040 }, { "epoch": 1.6232476783586693, "grad_norm": 0.0006605935632251203, "learning_rate": 1.0425028843229773e-06, "loss": 0.0018, "step": 96050 }, { "epoch": 1.6234166786375197, "grad_norm": 0.02443947084248066, "learning_rate": 1.0416016852674648e-06, "loss": 0.0005, "step": 96060 }, { "epoch": 1.62358567891637, "grad_norm": 0.08372944593429565, "learning_rate": 1.0407008306117862e-06, "loss": 0.0011, "step": 96070 }, { "epoch": 1.6237546791952207, "grad_norm": 0.06334172189235687, "learning_rate": 1.0398003204343166e-06, "loss": 0.0006, "step": 96080 }, { "epoch": 1.6239236794740712, "grad_norm": 0.0012170334812253714, "learning_rate": 1.0389001548134088e-06, "loss": 0.0003, "step": 96090 }, { "epoch": 1.6240926797529216, "grad_norm": 0.028422201052308083, "learning_rate": 1.0380003338273774e-06, "loss": 0.0004, "step": 96100 }, { "epoch": 1.624261680031772, "grad_norm": 0.015722034499049187, "learning_rate": 1.037100857554515e-06, "loss": 0.0004, "step": 96110 }, { "epoch": 1.6244306803106225, "grad_norm": 0.024624217301607132, "learning_rate": 1.0362017260730772e-06, "loss": 0.0007, "step": 96120 }, { "epoch": 1.6245996805894731, "grad_norm": 0.0006813265499658883, "learning_rate": 1.0353029394612957e-06, "loss": 0.0002, "step": 96130 }, { "epoch": 1.6247686808683235, "grad_norm": 0.04793710634112358, "learning_rate": 1.0344044977973666e-06, "loss": 0.0004, "step": 96140 }, { "epoch": 1.6249376811471739, "grad_norm": 0.07139737904071808, "learning_rate": 1.0335064011594615e-06, "loss": 0.0008, "step": 96150 }, { "epoch": 1.6251066814260242, "grad_norm": 0.000769991718698293, "learning_rate": 1.032608649625716e-06, "loss": 0.0007, "step": 96160 }, { "epoch": 1.6252756817048748, "grad_norm": 0.026528295129537582, "learning_rate": 1.0317112432742416e-06, "loss": 0.0005, "step": 96170 }, { "epoch": 1.6254446819837254, "grad_norm": 0.002808311488479376, "learning_rate": 1.0308141821831147e-06, "loss": 0.0006, "step": 96180 }, { "epoch": 1.6256136822625757, "grad_norm": 0.037516504526138306, "learning_rate": 1.0299174664303862e-06, "loss": 0.0004, "step": 96190 }, { "epoch": 1.625782682541426, "grad_norm": 0.08719465881586075, "learning_rate": 1.0290210960940722e-06, "loss": 0.0013, "step": 96200 }, { "epoch": 1.6259516828202767, "grad_norm": 0.030474737286567688, "learning_rate": 1.0281250712521633e-06, "loss": 0.0004, "step": 96210 }, { "epoch": 1.6261206830991273, "grad_norm": 0.012552831321954727, "learning_rate": 1.0272293919826182e-06, "loss": 0.0008, "step": 96220 }, { "epoch": 1.6262896833779776, "grad_norm": 0.010344977490603924, "learning_rate": 1.0263340583633635e-06, "loss": 0.0006, "step": 96230 }, { "epoch": 1.626458683656828, "grad_norm": 0.006808555219322443, "learning_rate": 1.0254390704722984e-06, "loss": 0.0009, "step": 96240 }, { "epoch": 1.6266276839356784, "grad_norm": 0.03128369525074959, "learning_rate": 1.0245444283872913e-06, "loss": 0.0004, "step": 96250 }, { "epoch": 1.626796684214529, "grad_norm": 0.08118956536054611, "learning_rate": 1.0236501321861819e-06, "loss": 0.0008, "step": 96260 }, { "epoch": 1.6269656844933795, "grad_norm": 0.03353813290596008, "learning_rate": 1.0227561819467753e-06, "loss": 0.0006, "step": 96270 }, { "epoch": 1.6271346847722299, "grad_norm": 0.09277798235416412, "learning_rate": 1.0218625777468527e-06, "loss": 0.0008, "step": 96280 }, { "epoch": 1.6273036850510803, "grad_norm": 0.03057316318154335, "learning_rate": 1.020969319664159e-06, "loss": 0.0008, "step": 96290 }, { "epoch": 1.6274726853299308, "grad_norm": 0.01385189313441515, "learning_rate": 1.0200764077764146e-06, "loss": 0.0009, "step": 96300 }, { "epoch": 1.6276416856087814, "grad_norm": 0.03868904337286949, "learning_rate": 1.0191838421613043e-06, "loss": 0.0018, "step": 96310 }, { "epoch": 1.6278106858876318, "grad_norm": 0.058415502309799194, "learning_rate": 1.018291622896489e-06, "loss": 0.0017, "step": 96320 }, { "epoch": 1.6279796861664821, "grad_norm": 0.03958326205611229, "learning_rate": 1.0173997500595927e-06, "loss": 0.0006, "step": 96330 }, { "epoch": 1.6281486864453325, "grad_norm": 0.047710344195365906, "learning_rate": 1.0165082237282158e-06, "loss": 0.0004, "step": 96340 }, { "epoch": 1.628317686724183, "grad_norm": 0.04047977924346924, "learning_rate": 1.0156170439799224e-06, "loss": 0.0005, "step": 96350 }, { "epoch": 1.6284866870030337, "grad_norm": 0.03744013234972954, "learning_rate": 1.0147262108922522e-06, "loss": 0.0005, "step": 96360 }, { "epoch": 1.628655687281884, "grad_norm": 0.025144357234239578, "learning_rate": 1.01383572454271e-06, "loss": 0.0006, "step": 96370 }, { "epoch": 1.6288246875607344, "grad_norm": 0.0007659460534341633, "learning_rate": 1.0129455850087737e-06, "loss": 0.0004, "step": 96380 }, { "epoch": 1.628993687839585, "grad_norm": 0.06955332309007645, "learning_rate": 1.0120557923678886e-06, "loss": 0.0005, "step": 96390 }, { "epoch": 1.6291626881184353, "grad_norm": 0.011934510432183743, "learning_rate": 1.0111663466974724e-06, "loss": 0.0015, "step": 96400 }, { "epoch": 1.629331688397286, "grad_norm": 0.03708849102258682, "learning_rate": 1.010277248074909e-06, "loss": 0.0011, "step": 96410 }, { "epoch": 1.6295006886761363, "grad_norm": 0.02652420848608017, "learning_rate": 1.0093884965775564e-06, "loss": 0.0005, "step": 96420 }, { "epoch": 1.6296696889549867, "grad_norm": 0.18634644150733948, "learning_rate": 1.0085000922827386e-06, "loss": 0.001, "step": 96430 }, { "epoch": 1.6298386892338372, "grad_norm": 0.014714261516928673, "learning_rate": 1.0076120352677543e-06, "loss": 0.001, "step": 96440 }, { "epoch": 1.6300076895126878, "grad_norm": 0.07127939164638519, "learning_rate": 1.0067243256098646e-06, "loss": 0.0007, "step": 96450 }, { "epoch": 1.6301766897915382, "grad_norm": 0.027194982394576073, "learning_rate": 1.005836963386308e-06, "loss": 0.0007, "step": 96460 }, { "epoch": 1.6303456900703885, "grad_norm": 0.04858466610312462, "learning_rate": 1.004949948674286e-06, "loss": 0.0007, "step": 96470 }, { "epoch": 1.6305146903492391, "grad_norm": 0.019683321937918663, "learning_rate": 1.0040632815509765e-06, "loss": 0.0005, "step": 96480 }, { "epoch": 1.6306836906280895, "grad_norm": 0.09658891707658768, "learning_rate": 1.0031769620935212e-06, "loss": 0.0005, "step": 96490 }, { "epoch": 1.63085269090694, "grad_norm": 0.0899178758263588, "learning_rate": 1.002290990379035e-06, "loss": 0.0013, "step": 96500 }, { "epoch": 1.6310216911857904, "grad_norm": 0.06441479176282883, "learning_rate": 1.0014053664846035e-06, "loss": 0.0009, "step": 96510 }, { "epoch": 1.6311906914646408, "grad_norm": 0.043808892369270325, "learning_rate": 1.000520090487277e-06, "loss": 0.0007, "step": 96520 }, { "epoch": 1.6313596917434914, "grad_norm": 0.035942934453487396, "learning_rate": 9.996351624640821e-07, "loss": 0.0003, "step": 96530 }, { "epoch": 1.631528692022342, "grad_norm": 0.03058365173637867, "learning_rate": 9.98750582492009e-07, "loss": 0.0008, "step": 96540 }, { "epoch": 1.6316976923011923, "grad_norm": 0.06809443980455399, "learning_rate": 9.978663506480235e-07, "loss": 0.0011, "step": 96550 }, { "epoch": 1.6318666925800427, "grad_norm": 0.008974218741059303, "learning_rate": 9.969824670090543e-07, "loss": 0.0008, "step": 96560 }, { "epoch": 1.6320356928588933, "grad_norm": 0.05109541118144989, "learning_rate": 9.96098931652007e-07, "loss": 0.0007, "step": 96570 }, { "epoch": 1.6322046931377436, "grad_norm": 0.028603311628103256, "learning_rate": 9.952157446537508e-07, "loss": 0.0008, "step": 96580 }, { "epoch": 1.6323736934165942, "grad_norm": 0.013319021090865135, "learning_rate": 9.9433290609113e-07, "loss": 0.0008, "step": 96590 }, { "epoch": 1.6325426936954446, "grad_norm": 0.0273862574249506, "learning_rate": 9.934504160409526e-07, "loss": 0.0009, "step": 96600 }, { "epoch": 1.632711693974295, "grad_norm": 0.057017650455236435, "learning_rate": 9.925682745800009e-07, "loss": 0.0008, "step": 96610 }, { "epoch": 1.6328806942531455, "grad_norm": 0.011289657093584538, "learning_rate": 9.916864817850275e-07, "loss": 0.0005, "step": 96620 }, { "epoch": 1.633049694531996, "grad_norm": 0.014769106172025204, "learning_rate": 9.908050377327494e-07, "loss": 0.0015, "step": 96630 }, { "epoch": 1.6332186948108465, "grad_norm": 0.19689464569091797, "learning_rate": 9.899239424998586e-07, "loss": 0.0004, "step": 96640 }, { "epoch": 1.6333876950896968, "grad_norm": 0.0896286591887474, "learning_rate": 9.890431961630148e-07, "loss": 0.0015, "step": 96650 }, { "epoch": 1.6335566953685472, "grad_norm": 0.017027780413627625, "learning_rate": 9.88162798798845e-07, "loss": 0.0004, "step": 96660 }, { "epoch": 1.6337256956473978, "grad_norm": 0.016162235289812088, "learning_rate": 9.87282750483951e-07, "loss": 0.0002, "step": 96670 }, { "epoch": 1.6338946959262484, "grad_norm": 0.07593441009521484, "learning_rate": 9.864030512948991e-07, "loss": 0.0007, "step": 96680 }, { "epoch": 1.6340636962050987, "grad_norm": 0.033916737884283066, "learning_rate": 9.855237013082285e-07, "loss": 0.0008, "step": 96690 }, { "epoch": 1.634232696483949, "grad_norm": 0.022884488105773926, "learning_rate": 9.846447006004456e-07, "loss": 0.0003, "step": 96700 }, { "epoch": 1.6344016967627997, "grad_norm": 0.08436908572912216, "learning_rate": 9.8376604924803e-07, "loss": 0.0004, "step": 96710 }, { "epoch": 1.6345706970416503, "grad_norm": 0.03133808448910713, "learning_rate": 9.828877473274257e-07, "loss": 0.0004, "step": 96720 }, { "epoch": 1.6347396973205006, "grad_norm": 0.031930964440107346, "learning_rate": 9.82009794915052e-07, "loss": 0.001, "step": 96730 }, { "epoch": 1.634908697599351, "grad_norm": 0.06534985452890396, "learning_rate": 9.811321920872924e-07, "loss": 0.001, "step": 96740 }, { "epoch": 1.6350776978782013, "grad_norm": 0.044219955801963806, "learning_rate": 9.802549389205041e-07, "loss": 0.0006, "step": 96750 }, { "epoch": 1.635246698157052, "grad_norm": 0.039992161095142365, "learning_rate": 9.79378035491013e-07, "loss": 0.0005, "step": 96760 }, { "epoch": 1.6354156984359025, "grad_norm": 0.07529482990503311, "learning_rate": 9.785014818751125e-07, "loss": 0.0005, "step": 96770 }, { "epoch": 1.6355846987147529, "grad_norm": 0.01657024398446083, "learning_rate": 9.77625278149069e-07, "loss": 0.0004, "step": 96780 }, { "epoch": 1.6357536989936032, "grad_norm": 0.06686761975288391, "learning_rate": 9.767494243891134e-07, "loss": 0.0012, "step": 96790 }, { "epoch": 1.6359226992724538, "grad_norm": 0.03590511530637741, "learning_rate": 9.758739206714512e-07, "loss": 0.0009, "step": 96800 }, { "epoch": 1.6360916995513044, "grad_norm": 0.00774614792317152, "learning_rate": 9.74998767072257e-07, "loss": 0.0006, "step": 96810 }, { "epoch": 1.6362606998301548, "grad_norm": 0.06778514385223389, "learning_rate": 9.741239636676696e-07, "loss": 0.0008, "step": 96820 }, { "epoch": 1.6364297001090051, "grad_norm": 0.09307301044464111, "learning_rate": 9.732495105338036e-07, "loss": 0.0009, "step": 96830 }, { "epoch": 1.6365987003878555, "grad_norm": 0.027598995715379715, "learning_rate": 9.723754077467417e-07, "loss": 0.0006, "step": 96840 }, { "epoch": 1.636767700666706, "grad_norm": 0.06962253898382187, "learning_rate": 9.715016553825318e-07, "loss": 0.0007, "step": 96850 }, { "epoch": 1.6369367009455567, "grad_norm": 0.02600184455513954, "learning_rate": 9.706282535171979e-07, "loss": 0.0002, "step": 96860 }, { "epoch": 1.637105701224407, "grad_norm": 0.01340588927268982, "learning_rate": 9.697552022267276e-07, "loss": 0.0008, "step": 96870 }, { "epoch": 1.6372747015032574, "grad_norm": 0.03624692186713219, "learning_rate": 9.688825015870829e-07, "loss": 0.0013, "step": 96880 }, { "epoch": 1.637443701782108, "grad_norm": 0.14636138081550598, "learning_rate": 9.680101516741908e-07, "loss": 0.0007, "step": 96890 }, { "epoch": 1.6376127020609585, "grad_norm": 0.03972083702683449, "learning_rate": 9.671381525639517e-07, "loss": 0.0008, "step": 96900 }, { "epoch": 1.637781702339809, "grad_norm": 0.04988046735525131, "learning_rate": 9.662665043322317e-07, "loss": 0.0007, "step": 96910 }, { "epoch": 1.6379507026186593, "grad_norm": 0.016621902585029602, "learning_rate": 9.653952070548712e-07, "loss": 0.0008, "step": 96920 }, { "epoch": 1.6381197028975096, "grad_norm": 0.04055316373705864, "learning_rate": 9.645242608076749e-07, "loss": 0.0012, "step": 96930 }, { "epoch": 1.6382887031763602, "grad_norm": 0.026886506006121635, "learning_rate": 9.636536656664209e-07, "loss": 0.0005, "step": 96940 }, { "epoch": 1.6384577034552108, "grad_norm": 0.03368248790502548, "learning_rate": 9.627834217068532e-07, "loss": 0.0008, "step": 96950 }, { "epoch": 1.6386267037340612, "grad_norm": 0.004224222619086504, "learning_rate": 9.619135290046906e-07, "loss": 0.0006, "step": 96960 }, { "epoch": 1.6387957040129115, "grad_norm": 0.004865554627031088, "learning_rate": 9.610439876356142e-07, "loss": 0.0003, "step": 96970 }, { "epoch": 1.638964704291762, "grad_norm": 0.04468980059027672, "learning_rate": 9.601747976752802e-07, "loss": 0.0026, "step": 96980 }, { "epoch": 1.6391337045706127, "grad_norm": 0.06228204444050789, "learning_rate": 9.593059591993132e-07, "loss": 0.0012, "step": 96990 }, { "epoch": 1.639302704849463, "grad_norm": 0.09721628576517105, "learning_rate": 9.58437472283304e-07, "loss": 0.0005, "step": 97000 }, { "epoch": 1.6394717051283134, "grad_norm": 0.04007471725344658, "learning_rate": 9.575693370028166e-07, "loss": 0.0005, "step": 97010 }, { "epoch": 1.6396407054071638, "grad_norm": 0.022833475843071938, "learning_rate": 9.567015534333835e-07, "loss": 0.0007, "step": 97020 }, { "epoch": 1.6398097056860144, "grad_norm": 0.035724788904190063, "learning_rate": 9.558341216505063e-07, "loss": 0.0011, "step": 97030 }, { "epoch": 1.639978705964865, "grad_norm": 0.0324830487370491, "learning_rate": 9.549670417296536e-07, "loss": 0.0007, "step": 97040 }, { "epoch": 1.6401477062437153, "grad_norm": 0.025162674486637115, "learning_rate": 9.541003137462685e-07, "loss": 0.0009, "step": 97050 }, { "epoch": 1.6403167065225657, "grad_norm": 0.025426583364605904, "learning_rate": 9.532339377757571e-07, "loss": 0.0005, "step": 97060 }, { "epoch": 1.6404857068014163, "grad_norm": 0.017605816945433617, "learning_rate": 9.523679138935022e-07, "loss": 0.0006, "step": 97070 }, { "epoch": 1.6406547070802668, "grad_norm": 0.04608120024204254, "learning_rate": 9.515022421748482e-07, "loss": 0.0011, "step": 97080 }, { "epoch": 1.6408237073591172, "grad_norm": 0.03623725846409798, "learning_rate": 9.506369226951156e-07, "loss": 0.0003, "step": 97090 }, { "epoch": 1.6409927076379676, "grad_norm": 0.04509522765874863, "learning_rate": 9.497719555295898e-07, "loss": 0.0007, "step": 97100 }, { "epoch": 1.641161707916818, "grad_norm": 0.009332438930869102, "learning_rate": 9.489073407535287e-07, "loss": 0.0005, "step": 97110 }, { "epoch": 1.6413307081956685, "grad_norm": 0.020322060212492943, "learning_rate": 9.480430784421552e-07, "loss": 0.0005, "step": 97120 }, { "epoch": 1.641499708474519, "grad_norm": 0.11147165298461914, "learning_rate": 9.471791686706677e-07, "loss": 0.001, "step": 97130 }, { "epoch": 1.6416687087533695, "grad_norm": 0.027426326647400856, "learning_rate": 9.463156115142275e-07, "loss": 0.0005, "step": 97140 }, { "epoch": 1.6418377090322198, "grad_norm": 0.008892586454749107, "learning_rate": 9.454524070479714e-07, "loss": 0.0009, "step": 97150 }, { "epoch": 1.6420067093110704, "grad_norm": 0.06547423452138901, "learning_rate": 9.445895553469986e-07, "loss": 0.0007, "step": 97160 }, { "epoch": 1.642175709589921, "grad_norm": 0.04201348125934601, "learning_rate": 9.437270564863854e-07, "loss": 0.0011, "step": 97170 }, { "epoch": 1.6423447098687713, "grad_norm": 0.0022331583313643932, "learning_rate": 9.428649105411697e-07, "loss": 0.0005, "step": 97180 }, { "epoch": 1.6425137101476217, "grad_norm": 0.13671652972698212, "learning_rate": 9.420031175863642e-07, "loss": 0.0013, "step": 97190 }, { "epoch": 1.642682710426472, "grad_norm": 0.050300270318984985, "learning_rate": 9.411416776969484e-07, "loss": 0.0006, "step": 97200 }, { "epoch": 1.6428517107053227, "grad_norm": 0.03513916954398155, "learning_rate": 9.40280590947874e-07, "loss": 0.0015, "step": 97210 }, { "epoch": 1.6430207109841732, "grad_norm": 0.016461919993162155, "learning_rate": 9.394198574140567e-07, "loss": 0.0004, "step": 97220 }, { "epoch": 1.6431897112630236, "grad_norm": 0.15564028918743134, "learning_rate": 9.385594771703871e-07, "loss": 0.0013, "step": 97230 }, { "epoch": 1.643358711541874, "grad_norm": 0.08642155677080154, "learning_rate": 9.376994502917197e-07, "loss": 0.001, "step": 97240 }, { "epoch": 1.6435277118207245, "grad_norm": 0.006080037914216518, "learning_rate": 9.368397768528825e-07, "loss": 0.0008, "step": 97250 }, { "epoch": 1.6436967120995751, "grad_norm": 0.14761480689048767, "learning_rate": 9.359804569286729e-07, "loss": 0.0009, "step": 97260 }, { "epoch": 1.6438657123784255, "grad_norm": 0.0032137467060238123, "learning_rate": 9.351214905938521e-07, "loss": 0.0009, "step": 97270 }, { "epoch": 1.6440347126572759, "grad_norm": 0.04017636552453041, "learning_rate": 9.342628779231582e-07, "loss": 0.0008, "step": 97280 }, { "epoch": 1.6442037129361262, "grad_norm": 0.0005415910272859037, "learning_rate": 9.334046189912915e-07, "loss": 0.0004, "step": 97290 }, { "epoch": 1.6443727132149768, "grad_norm": 0.08849450200796127, "learning_rate": 9.325467138729272e-07, "loss": 0.001, "step": 97300 }, { "epoch": 1.6445417134938274, "grad_norm": 0.0690179243683815, "learning_rate": 9.31689162642705e-07, "loss": 0.0008, "step": 97310 }, { "epoch": 1.6447107137726777, "grad_norm": 0.03570980206131935, "learning_rate": 9.308319653752379e-07, "loss": 0.0007, "step": 97320 }, { "epoch": 1.644879714051528, "grad_norm": 0.010233347304165363, "learning_rate": 9.299751221451042e-07, "loss": 0.0003, "step": 97330 }, { "epoch": 1.6450487143303787, "grad_norm": 0.055424634367227554, "learning_rate": 9.291186330268564e-07, "loss": 0.0007, "step": 97340 }, { "epoch": 1.645217714609229, "grad_norm": 0.028542617335915565, "learning_rate": 9.282624980950094e-07, "loss": 0.0005, "step": 97350 }, { "epoch": 1.6453867148880796, "grad_norm": 0.009859766811132431, "learning_rate": 9.274067174240548e-07, "loss": 0.0003, "step": 97360 }, { "epoch": 1.64555571516693, "grad_norm": 0.011371731758117676, "learning_rate": 9.265512910884466e-07, "loss": 0.0004, "step": 97370 }, { "epoch": 1.6457247154457804, "grad_norm": 0.0602949783205986, "learning_rate": 9.256962191626124e-07, "loss": 0.0008, "step": 97380 }, { "epoch": 1.645893715724631, "grad_norm": 0.03658745437860489, "learning_rate": 9.248415017209478e-07, "loss": 0.0007, "step": 97390 }, { "epoch": 1.6460627160034815, "grad_norm": 0.08593559265136719, "learning_rate": 9.23987138837818e-07, "loss": 0.0007, "step": 97400 }, { "epoch": 1.646231716282332, "grad_norm": 0.062391120940446854, "learning_rate": 9.231331305875552e-07, "loss": 0.0009, "step": 97410 }, { "epoch": 1.6464007165611823, "grad_norm": 0.02832197956740856, "learning_rate": 9.222794770444638e-07, "loss": 0.0006, "step": 97420 }, { "epoch": 1.6465697168400328, "grad_norm": 0.025280700996518135, "learning_rate": 9.214261782828133e-07, "loss": 0.0008, "step": 97430 }, { "epoch": 1.6467387171188832, "grad_norm": 0.22469237446784973, "learning_rate": 9.20573234376848e-07, "loss": 0.0012, "step": 97440 }, { "epoch": 1.6469077173977338, "grad_norm": 0.02634529396891594, "learning_rate": 9.197206454007756e-07, "loss": 0.0006, "step": 97450 }, { "epoch": 1.6470767176765841, "grad_norm": 0.018179452046751976, "learning_rate": 9.188684114287772e-07, "loss": 0.0006, "step": 97460 }, { "epoch": 1.6472457179554345, "grad_norm": 0.00996420904994011, "learning_rate": 9.180165325349999e-07, "loss": 0.0007, "step": 97470 }, { "epoch": 1.647414718234285, "grad_norm": 0.04182242974638939, "learning_rate": 9.171650087935624e-07, "loss": 0.0008, "step": 97480 }, { "epoch": 1.6475837185131357, "grad_norm": 0.014716528356075287, "learning_rate": 9.163138402785504e-07, "loss": 0.0008, "step": 97490 }, { "epoch": 1.647752718791986, "grad_norm": 0.013761723414063454, "learning_rate": 9.154630270640214e-07, "loss": 0.0007, "step": 97500 }, { "epoch": 1.6479217190708364, "grad_norm": 0.036597806960344315, "learning_rate": 9.146125692239976e-07, "loss": 0.0005, "step": 97510 }, { "epoch": 1.6480907193496868, "grad_norm": 0.0009444573661312461, "learning_rate": 9.137624668324741e-07, "loss": 0.0009, "step": 97520 }, { "epoch": 1.6482597196285373, "grad_norm": 0.061494581401348114, "learning_rate": 9.129127199634158e-07, "loss": 0.0012, "step": 97530 }, { "epoch": 1.648428719907388, "grad_norm": 0.07248084247112274, "learning_rate": 9.120633286907521e-07, "loss": 0.0006, "step": 97540 }, { "epoch": 1.6485977201862383, "grad_norm": 0.030622024089097977, "learning_rate": 9.112142930883866e-07, "loss": 0.0016, "step": 97550 }, { "epoch": 1.6487667204650887, "grad_norm": 0.055071569979190826, "learning_rate": 9.103656132301869e-07, "loss": 0.0005, "step": 97560 }, { "epoch": 1.6489357207439392, "grad_norm": 0.02567562833428383, "learning_rate": 9.095172891899939e-07, "loss": 0.0005, "step": 97570 }, { "epoch": 1.6491047210227898, "grad_norm": 0.05259372666478157, "learning_rate": 9.086693210416164e-07, "loss": 0.0007, "step": 97580 }, { "epoch": 1.6492737213016402, "grad_norm": 0.0008730893023312092, "learning_rate": 9.078217088588298e-07, "loss": 0.0005, "step": 97590 }, { "epoch": 1.6494427215804905, "grad_norm": 0.013485025614500046, "learning_rate": 9.069744527153812e-07, "loss": 0.0004, "step": 97600 }, { "epoch": 1.649611721859341, "grad_norm": 0.006705063860863447, "learning_rate": 9.061275526849883e-07, "loss": 0.0009, "step": 97610 }, { "epoch": 1.6497807221381915, "grad_norm": 0.07051364332437515, "learning_rate": 9.052810088413322e-07, "loss": 0.0008, "step": 97620 }, { "epoch": 1.649949722417042, "grad_norm": 0.029289480298757553, "learning_rate": 9.044348212580689e-07, "loss": 0.0008, "step": 97630 }, { "epoch": 1.6501187226958924, "grad_norm": 0.016281338408589363, "learning_rate": 9.035889900088179e-07, "loss": 0.0007, "step": 97640 }, { "epoch": 1.6502877229747428, "grad_norm": 0.00940323993563652, "learning_rate": 9.027435151671743e-07, "loss": 0.0021, "step": 97650 }, { "epoch": 1.6504567232535934, "grad_norm": 0.01000247523188591, "learning_rate": 9.018983968066947e-07, "loss": 0.0007, "step": 97660 }, { "epoch": 1.650625723532444, "grad_norm": 0.05016298592090607, "learning_rate": 9.010536350009119e-07, "loss": 0.0004, "step": 97670 }, { "epoch": 1.6507947238112943, "grad_norm": 0.04229207709431648, "learning_rate": 9.002092298233211e-07, "loss": 0.0005, "step": 97680 }, { "epoch": 1.6509637240901447, "grad_norm": 0.03415260091423988, "learning_rate": 8.99365181347393e-07, "loss": 0.0005, "step": 97690 }, { "epoch": 1.651132724368995, "grad_norm": 0.07657230645418167, "learning_rate": 8.985214896465604e-07, "loss": 0.0008, "step": 97700 }, { "epoch": 1.6513017246478456, "grad_norm": 0.0001833623682614416, "learning_rate": 8.97678154794232e-07, "loss": 0.0002, "step": 97710 }, { "epoch": 1.6514707249266962, "grad_norm": 0.03562748432159424, "learning_rate": 8.96835176863779e-07, "loss": 0.0005, "step": 97720 }, { "epoch": 1.6516397252055466, "grad_norm": 0.02505319006741047, "learning_rate": 8.95992555928547e-07, "loss": 0.0004, "step": 97730 }, { "epoch": 1.651808725484397, "grad_norm": 0.11562114953994751, "learning_rate": 8.95150292061846e-07, "loss": 0.0012, "step": 97740 }, { "epoch": 1.6519777257632475, "grad_norm": 0.008282345719635487, "learning_rate": 8.943083853369583e-07, "loss": 0.0004, "step": 97750 }, { "epoch": 1.6521467260420981, "grad_norm": 0.012088058516383171, "learning_rate": 8.934668358271342e-07, "loss": 0.0008, "step": 97760 }, { "epoch": 1.6523157263209485, "grad_norm": 0.01571849174797535, "learning_rate": 8.926256436055914e-07, "loss": 0.0009, "step": 97770 }, { "epoch": 1.6524847265997988, "grad_norm": 0.020298385992646217, "learning_rate": 8.917848087455178e-07, "loss": 0.0005, "step": 97780 }, { "epoch": 1.6526537268786492, "grad_norm": 0.024168308824300766, "learning_rate": 8.909443313200706e-07, "loss": 0.0005, "step": 97790 }, { "epoch": 1.6528227271574998, "grad_norm": 0.010439195670187473, "learning_rate": 8.901042114023766e-07, "loss": 0.0007, "step": 97800 }, { "epoch": 1.6529917274363504, "grad_norm": 0.06465132534503937, "learning_rate": 8.892644490655284e-07, "loss": 0.001, "step": 97810 }, { "epoch": 1.6531607277152007, "grad_norm": 0.015410232357680798, "learning_rate": 8.88425044382591e-07, "loss": 0.0006, "step": 97820 }, { "epoch": 1.653329727994051, "grad_norm": 0.08747339993715286, "learning_rate": 8.875859974265944e-07, "loss": 0.0011, "step": 97830 }, { "epoch": 1.6534987282729017, "grad_norm": 0.04376240819692612, "learning_rate": 8.867473082705424e-07, "loss": 0.0007, "step": 97840 }, { "epoch": 1.6536677285517523, "grad_norm": 0.0737852230668068, "learning_rate": 8.859089769874024e-07, "loss": 0.0008, "step": 97850 }, { "epoch": 1.6538367288306026, "grad_norm": 0.05410739779472351, "learning_rate": 8.850710036501165e-07, "loss": 0.0006, "step": 97860 }, { "epoch": 1.654005729109453, "grad_norm": 0.110353983938694, "learning_rate": 8.842333883315884e-07, "loss": 0.0008, "step": 97870 }, { "epoch": 1.6541747293883033, "grad_norm": 0.00010571058373898268, "learning_rate": 8.83396131104699e-07, "loss": 0.0005, "step": 97880 }, { "epoch": 1.654343729667154, "grad_norm": 0.03712800517678261, "learning_rate": 8.825592320422899e-07, "loss": 0.0012, "step": 97890 }, { "epoch": 1.6545127299460045, "grad_norm": 0.05840837582945824, "learning_rate": 8.817226912171783e-07, "loss": 0.0006, "step": 97900 }, { "epoch": 1.6546817302248549, "grad_norm": 0.024938484653830528, "learning_rate": 8.808865087021451e-07, "loss": 0.0005, "step": 97910 }, { "epoch": 1.6548507305037052, "grad_norm": 0.0135054225102067, "learning_rate": 8.800506845699441e-07, "loss": 0.0003, "step": 97920 }, { "epoch": 1.6550197307825558, "grad_norm": 0.003684627590700984, "learning_rate": 8.79215218893294e-07, "loss": 0.0003, "step": 97930 }, { "epoch": 1.6551887310614064, "grad_norm": 0.05478254705667496, "learning_rate": 8.783801117448853e-07, "loss": 0.0006, "step": 97940 }, { "epoch": 1.6553577313402568, "grad_norm": 0.03860902413725853, "learning_rate": 8.775453631973785e-07, "loss": 0.0003, "step": 97950 }, { "epoch": 1.6555267316191071, "grad_norm": 0.02583891712129116, "learning_rate": 8.76710973323397e-07, "loss": 0.0011, "step": 97960 }, { "epoch": 1.6556957318979575, "grad_norm": 0.0985838994383812, "learning_rate": 8.758769421955388e-07, "loss": 0.0007, "step": 97970 }, { "epoch": 1.655864732176808, "grad_norm": 0.008611098863184452, "learning_rate": 8.750432698863698e-07, "loss": 0.001, "step": 97980 }, { "epoch": 1.6560337324556587, "grad_norm": 0.06516892462968826, "learning_rate": 8.742099564684209e-07, "loss": 0.001, "step": 97990 }, { "epoch": 1.656202732734509, "grad_norm": 0.02409842424094677, "learning_rate": 8.733770020141969e-07, "loss": 0.0014, "step": 98000 }, { "epoch": 1.6563717330133594, "grad_norm": 0.049286551773548126, "learning_rate": 8.725444065961663e-07, "loss": 0.0009, "step": 98010 }, { "epoch": 1.65654073329221, "grad_norm": 0.023762457072734833, "learning_rate": 8.717121702867709e-07, "loss": 0.0008, "step": 98020 }, { "epoch": 1.6567097335710605, "grad_norm": 0.04187716171145439, "learning_rate": 8.708802931584193e-07, "loss": 0.001, "step": 98030 }, { "epoch": 1.656878733849911, "grad_norm": 0.044227421283721924, "learning_rate": 8.700487752834875e-07, "loss": 0.0003, "step": 98040 }, { "epoch": 1.6570477341287613, "grad_norm": 0.049865856766700745, "learning_rate": 8.692176167343231e-07, "loss": 0.0008, "step": 98050 }, { "epoch": 1.6572167344076116, "grad_norm": 0.047756098210811615, "learning_rate": 8.683868175832394e-07, "loss": 0.0006, "step": 98060 }, { "epoch": 1.6573857346864622, "grad_norm": 0.02121659927070141, "learning_rate": 8.675563779025214e-07, "loss": 0.0006, "step": 98070 }, { "epoch": 1.6575547349653128, "grad_norm": 0.024621332064270973, "learning_rate": 8.667262977644197e-07, "loss": 0.0005, "step": 98080 }, { "epoch": 1.6577237352441632, "grad_norm": 0.0006263728719204664, "learning_rate": 8.658965772411576e-07, "loss": 0.0009, "step": 98090 }, { "epoch": 1.6578927355230135, "grad_norm": 0.01369393803179264, "learning_rate": 8.650672164049217e-07, "loss": 0.0007, "step": 98100 }, { "epoch": 1.6580617358018641, "grad_norm": 0.001999722560867667, "learning_rate": 8.642382153278739e-07, "loss": 0.0004, "step": 98110 }, { "epoch": 1.6582307360807147, "grad_norm": 0.002143674297258258, "learning_rate": 8.634095740821385e-07, "loss": 0.0007, "step": 98120 }, { "epoch": 1.658399736359565, "grad_norm": 0.04328879714012146, "learning_rate": 8.625812927398136e-07, "loss": 0.0005, "step": 98130 }, { "epoch": 1.6585687366384154, "grad_norm": 0.022024834528565407, "learning_rate": 8.617533713729609e-07, "loss": 0.001, "step": 98140 }, { "epoch": 1.6587377369172658, "grad_norm": 0.0001287844788748771, "learning_rate": 8.609258100536155e-07, "loss": 0.0008, "step": 98150 }, { "epoch": 1.6589067371961164, "grad_norm": 0.02500726468861103, "learning_rate": 8.600986088537783e-07, "loss": 0.002, "step": 98160 }, { "epoch": 1.659075737474967, "grad_norm": 0.12904953956604004, "learning_rate": 8.592717678454222e-07, "loss": 0.0006, "step": 98170 }, { "epoch": 1.6592447377538173, "grad_norm": 0.04279755800962448, "learning_rate": 8.584452871004834e-07, "loss": 0.0005, "step": 98180 }, { "epoch": 1.6594137380326677, "grad_norm": 0.007707957644015551, "learning_rate": 8.576191666908717e-07, "loss": 0.0007, "step": 98190 }, { "epoch": 1.6595827383115183, "grad_norm": 0.12773577868938446, "learning_rate": 8.56793406688462e-07, "loss": 0.0009, "step": 98200 }, { "epoch": 1.6597517385903686, "grad_norm": 0.02980293706059456, "learning_rate": 8.55968007165101e-07, "loss": 0.0008, "step": 98210 }, { "epoch": 1.6599207388692192, "grad_norm": 0.06709340214729309, "learning_rate": 8.55142968192601e-07, "loss": 0.0008, "step": 98220 }, { "epoch": 1.6600897391480696, "grad_norm": 0.023071642965078354, "learning_rate": 8.54318289842746e-07, "loss": 0.0015, "step": 98230 }, { "epoch": 1.66025873942692, "grad_norm": 0.003490469651296735, "learning_rate": 8.534939721872848e-07, "loss": 0.0013, "step": 98240 }, { "epoch": 1.6604277397057705, "grad_norm": 0.034906335175037384, "learning_rate": 8.526700152979395e-07, "loss": 0.0006, "step": 98250 }, { "epoch": 1.660596739984621, "grad_norm": 0.025669587776064873, "learning_rate": 8.51846419246396e-07, "loss": 0.0029, "step": 98260 }, { "epoch": 1.6607657402634715, "grad_norm": 0.05409591645002365, "learning_rate": 8.510231841043137e-07, "loss": 0.0005, "step": 98270 }, { "epoch": 1.6609347405423218, "grad_norm": 0.01553164143115282, "learning_rate": 8.502003099433154e-07, "loss": 0.0011, "step": 98280 }, { "epoch": 1.6611037408211724, "grad_norm": 0.05354379862546921, "learning_rate": 8.493777968349959e-07, "loss": 0.0006, "step": 98290 }, { "epoch": 1.6612727411000228, "grad_norm": 0.06067023053765297, "learning_rate": 8.485556448509202e-07, "loss": 0.0005, "step": 98300 }, { "epoch": 1.6614417413788733, "grad_norm": 0.029005998745560646, "learning_rate": 8.477338540626157e-07, "loss": 0.0007, "step": 98310 }, { "epoch": 1.6616107416577237, "grad_norm": 0.0032901307567954063, "learning_rate": 8.46912424541586e-07, "loss": 0.0005, "step": 98320 }, { "epoch": 1.661779741936574, "grad_norm": 0.011192691512405872, "learning_rate": 8.460913563592959e-07, "loss": 0.0006, "step": 98330 }, { "epoch": 1.6619487422154247, "grad_norm": 0.02068910375237465, "learning_rate": 8.452706495871837e-07, "loss": 0.0012, "step": 98340 }, { "epoch": 1.6621177424942752, "grad_norm": 0.054953016340732574, "learning_rate": 8.444503042966567e-07, "loss": 0.0005, "step": 98350 }, { "epoch": 1.6622867427731256, "grad_norm": 0.05563584342598915, "learning_rate": 8.436303205590863e-07, "loss": 0.0004, "step": 98360 }, { "epoch": 1.662455743051976, "grad_norm": 0.08429501205682755, "learning_rate": 8.428106984458156e-07, "loss": 0.0006, "step": 98370 }, { "epoch": 1.6626247433308265, "grad_norm": 0.12233336269855499, "learning_rate": 8.419914380281579e-07, "loss": 0.0007, "step": 98380 }, { "epoch": 1.662793743609677, "grad_norm": 0.1364852637052536, "learning_rate": 8.411725393773895e-07, "loss": 0.0006, "step": 98390 }, { "epoch": 1.6629627438885275, "grad_norm": 0.05216464400291443, "learning_rate": 8.403540025647616e-07, "loss": 0.001, "step": 98400 }, { "epoch": 1.6631317441673779, "grad_norm": 0.04671711102128029, "learning_rate": 8.395358276614879e-07, "loss": 0.0006, "step": 98410 }, { "epoch": 1.6633007444462282, "grad_norm": 0.027912594377994537, "learning_rate": 8.387180147387569e-07, "loss": 0.0007, "step": 98420 }, { "epoch": 1.6634697447250788, "grad_norm": 0.0016561595257371664, "learning_rate": 8.379005638677184e-07, "loss": 0.001, "step": 98430 }, { "epoch": 1.6636387450039294, "grad_norm": 0.040859147906303406, "learning_rate": 8.37083475119499e-07, "loss": 0.0004, "step": 98440 }, { "epoch": 1.6638077452827797, "grad_norm": 0.005387297365814447, "learning_rate": 8.362667485651849e-07, "loss": 0.0004, "step": 98450 }, { "epoch": 1.66397674556163, "grad_norm": 0.02775135263800621, "learning_rate": 8.354503842758388e-07, "loss": 0.0002, "step": 98460 }, { "epoch": 1.6641457458404805, "grad_norm": 0.0674198791384697, "learning_rate": 8.346343823224862e-07, "loss": 0.0004, "step": 98470 }, { "epoch": 1.664314746119331, "grad_norm": 0.032847706228494644, "learning_rate": 8.338187427761251e-07, "loss": 0.0008, "step": 98480 }, { "epoch": 1.6644837463981816, "grad_norm": 0.05390644818544388, "learning_rate": 8.330034657077174e-07, "loss": 0.0007, "step": 98490 }, { "epoch": 1.664652746677032, "grad_norm": 0.020645366981625557, "learning_rate": 8.321885511881994e-07, "loss": 0.0015, "step": 98500 }, { "epoch": 1.6648217469558824, "grad_norm": 0.026290608569979668, "learning_rate": 8.313739992884695e-07, "loss": 0.0005, "step": 98510 }, { "epoch": 1.664990747234733, "grad_norm": 0.023746129125356674, "learning_rate": 8.305598100793993e-07, "loss": 0.0004, "step": 98520 }, { "epoch": 1.6651597475135835, "grad_norm": 0.02805907279253006, "learning_rate": 8.297459836318267e-07, "loss": 0.0019, "step": 98530 }, { "epoch": 1.665328747792434, "grad_norm": 0.01339644007384777, "learning_rate": 8.289325200165605e-07, "loss": 0.0005, "step": 98540 }, { "epoch": 1.6654977480712843, "grad_norm": 0.05567881464958191, "learning_rate": 8.281194193043729e-07, "loss": 0.0007, "step": 98550 }, { "epoch": 1.6656667483501346, "grad_norm": 0.00011985190212726593, "learning_rate": 8.273066815660086e-07, "loss": 0.0011, "step": 98560 }, { "epoch": 1.6658357486289852, "grad_norm": 0.06024892255663872, "learning_rate": 8.26494306872182e-07, "loss": 0.0005, "step": 98570 }, { "epoch": 1.6660047489078358, "grad_norm": 0.008531689643859863, "learning_rate": 8.256822952935705e-07, "loss": 0.0003, "step": 98580 }, { "epoch": 1.6661737491866861, "grad_norm": 0.03165486827492714, "learning_rate": 8.248706469008255e-07, "loss": 0.0007, "step": 98590 }, { "epoch": 1.6663427494655365, "grad_norm": 0.07225144654512405, "learning_rate": 8.240593617645621e-07, "loss": 0.0007, "step": 98600 }, { "epoch": 1.666511749744387, "grad_norm": 0.015458598732948303, "learning_rate": 8.232484399553681e-07, "loss": 0.0001, "step": 98610 }, { "epoch": 1.6666807500232377, "grad_norm": 0.00764080137014389, "learning_rate": 8.224378815437955e-07, "loss": 0.0022, "step": 98620 }, { "epoch": 1.666849750302088, "grad_norm": 0.03568202629685402, "learning_rate": 8.216276866003692e-07, "loss": 0.0007, "step": 98630 }, { "epoch": 1.6670187505809384, "grad_norm": 0.07748332619667053, "learning_rate": 8.208178551955776e-07, "loss": 0.0006, "step": 98640 }, { "epoch": 1.6671877508597888, "grad_norm": 0.05307856202125549, "learning_rate": 8.200083873998827e-07, "loss": 0.0006, "step": 98650 }, { "epoch": 1.6673567511386393, "grad_norm": 0.015796871855854988, "learning_rate": 8.191992832837087e-07, "loss": 0.0007, "step": 98660 }, { "epoch": 1.66752575141749, "grad_norm": 0.00500758970156312, "learning_rate": 8.183905429174554e-07, "loss": 0.0006, "step": 98670 }, { "epoch": 1.6676947516963403, "grad_norm": 0.00411376915872097, "learning_rate": 8.175821663714839e-07, "loss": 0.0009, "step": 98680 }, { "epoch": 1.6678637519751907, "grad_norm": 0.018853604793548584, "learning_rate": 8.167741537161289e-07, "loss": 0.0012, "step": 98690 }, { "epoch": 1.6680327522540412, "grad_norm": 0.10769926756620407, "learning_rate": 8.159665050216902e-07, "loss": 0.0007, "step": 98700 }, { "epoch": 1.6682017525328918, "grad_norm": 0.3210642337799072, "learning_rate": 8.151592203584374e-07, "loss": 0.0004, "step": 98710 }, { "epoch": 1.6683707528117422, "grad_norm": 0.0003552972339093685, "learning_rate": 8.143522997966097e-07, "loss": 0.001, "step": 98720 }, { "epoch": 1.6685397530905925, "grad_norm": 0.041418470442295074, "learning_rate": 8.135457434064104e-07, "loss": 0.0012, "step": 98730 }, { "epoch": 1.668708753369443, "grad_norm": 0.04489567130804062, "learning_rate": 8.127395512580155e-07, "loss": 0.0006, "step": 98740 }, { "epoch": 1.6688777536482935, "grad_norm": 0.04523347318172455, "learning_rate": 8.119337234215686e-07, "loss": 0.0015, "step": 98750 }, { "epoch": 1.669046753927144, "grad_norm": 0.02380324713885784, "learning_rate": 8.111282599671783e-07, "loss": 0.0005, "step": 98760 }, { "epoch": 1.6692157542059944, "grad_norm": 0.04562634229660034, "learning_rate": 8.103231609649259e-07, "loss": 0.0013, "step": 98770 }, { "epoch": 1.6693847544848448, "grad_norm": 0.06537719070911407, "learning_rate": 8.095184264848566e-07, "loss": 0.0012, "step": 98780 }, { "epoch": 1.6695537547636954, "grad_norm": 0.019319357350468636, "learning_rate": 8.087140565969881e-07, "loss": 0.0007, "step": 98790 }, { "epoch": 1.669722755042546, "grad_norm": 0.19342520833015442, "learning_rate": 8.079100513713045e-07, "loss": 0.0005, "step": 98800 }, { "epoch": 1.6698917553213963, "grad_norm": 0.012815208174288273, "learning_rate": 8.071064108777566e-07, "loss": 0.0006, "step": 98810 }, { "epoch": 1.6700607556002467, "grad_norm": 0.010864531621336937, "learning_rate": 8.063031351862672e-07, "loss": 0.0004, "step": 98820 }, { "epoch": 1.670229755879097, "grad_norm": 0.013417099602520466, "learning_rate": 8.055002243667232e-07, "loss": 0.0014, "step": 98830 }, { "epoch": 1.6703987561579476, "grad_norm": 0.06046375259757042, "learning_rate": 8.046976784889832e-07, "loss": 0.0005, "step": 98840 }, { "epoch": 1.6705677564367982, "grad_norm": 0.027439545840024948, "learning_rate": 8.038954976228708e-07, "loss": 0.0007, "step": 98850 }, { "epoch": 1.6707367567156486, "grad_norm": 0.007659797091037035, "learning_rate": 8.030936818381818e-07, "loss": 0.0004, "step": 98860 }, { "epoch": 1.670905756994499, "grad_norm": 0.04878820851445198, "learning_rate": 8.02292231204676e-07, "loss": 0.0002, "step": 98870 }, { "epoch": 1.6710747572733495, "grad_norm": 0.026526227593421936, "learning_rate": 8.014911457920854e-07, "loss": 0.0004, "step": 98880 }, { "epoch": 1.6712437575522001, "grad_norm": 0.11598348617553711, "learning_rate": 8.006904256701064e-07, "loss": 0.0005, "step": 98890 }, { "epoch": 1.6714127578310505, "grad_norm": 0.0019160934025421739, "learning_rate": 7.998900709084073e-07, "loss": 0.0006, "step": 98900 }, { "epoch": 1.6715817581099008, "grad_norm": 0.004642430692911148, "learning_rate": 7.990900815766212e-07, "loss": 0.0005, "step": 98910 }, { "epoch": 1.6717507583887512, "grad_norm": 0.06269663572311401, "learning_rate": 7.982904577443517e-07, "loss": 0.0007, "step": 98920 }, { "epoch": 1.6719197586676018, "grad_norm": 0.03629099577665329, "learning_rate": 7.974911994811701e-07, "loss": 0.0003, "step": 98930 }, { "epoch": 1.6720887589464524, "grad_norm": 0.04994253069162369, "learning_rate": 7.966923068566173e-07, "loss": 0.001, "step": 98940 }, { "epoch": 1.6722577592253027, "grad_norm": 0.08595866709947586, "learning_rate": 7.958937799401977e-07, "loss": 0.0007, "step": 98950 }, { "epoch": 1.672426759504153, "grad_norm": 0.04729030281305313, "learning_rate": 7.950956188013903e-07, "loss": 0.0006, "step": 98960 }, { "epoch": 1.6725957597830037, "grad_norm": 0.06552130728960037, "learning_rate": 7.942978235096355e-07, "loss": 0.0011, "step": 98970 }, { "epoch": 1.6727647600618543, "grad_norm": 0.013469538651406765, "learning_rate": 7.935003941343489e-07, "loss": 0.0005, "step": 98980 }, { "epoch": 1.6729337603407046, "grad_norm": 0.028116554021835327, "learning_rate": 7.927033307449072e-07, "loss": 0.0007, "step": 98990 }, { "epoch": 1.673102760619555, "grad_norm": 0.0238230898976326, "learning_rate": 7.919066334106623e-07, "loss": 0.0003, "step": 99000 }, { "epoch": 1.6732717608984053, "grad_norm": 0.02308010868728161, "learning_rate": 7.911103022009276e-07, "loss": 0.0009, "step": 99010 }, { "epoch": 1.673440761177256, "grad_norm": 0.008989348076283932, "learning_rate": 7.903143371849903e-07, "loss": 0.0004, "step": 99020 }, { "epoch": 1.6736097614561065, "grad_norm": 0.0486636646091938, "learning_rate": 7.895187384321006e-07, "loss": 0.0003, "step": 99030 }, { "epoch": 1.6737787617349569, "grad_norm": 0.014975378289818764, "learning_rate": 7.887235060114812e-07, "loss": 0.0012, "step": 99040 }, { "epoch": 1.6739477620138072, "grad_norm": 0.04476180300116539, "learning_rate": 7.879286399923219e-07, "loss": 0.0007, "step": 99050 }, { "epoch": 1.6741167622926578, "grad_norm": 0.03560128062963486, "learning_rate": 7.871341404437776e-07, "loss": 0.0004, "step": 99060 }, { "epoch": 1.6742857625715084, "grad_norm": 0.024541029706597328, "learning_rate": 7.863400074349764e-07, "loss": 0.0003, "step": 99070 }, { "epoch": 1.6744547628503588, "grad_norm": 0.03204498067498207, "learning_rate": 7.85546241035009e-07, "loss": 0.0003, "step": 99080 }, { "epoch": 1.6746237631292091, "grad_norm": 0.14887754619121552, "learning_rate": 7.847528413129391e-07, "loss": 0.0007, "step": 99090 }, { "epoch": 1.6747927634080595, "grad_norm": 0.08677040785551071, "learning_rate": 7.839598083377941e-07, "loss": 0.0005, "step": 99100 }, { "epoch": 1.67496176368691, "grad_norm": 0.05007180571556091, "learning_rate": 7.831671421785736e-07, "loss": 0.0008, "step": 99110 }, { "epoch": 1.6751307639657607, "grad_norm": 0.037230510264635086, "learning_rate": 7.823748429042421e-07, "loss": 0.0006, "step": 99120 }, { "epoch": 1.675299764244611, "grad_norm": 0.036416828632354736, "learning_rate": 7.815829105837353e-07, "loss": 0.0006, "step": 99130 }, { "epoch": 1.6754687645234614, "grad_norm": 0.06408748030662537, "learning_rate": 7.807913452859534e-07, "loss": 0.0004, "step": 99140 }, { "epoch": 1.675637764802312, "grad_norm": 0.004515242297202349, "learning_rate": 7.800001470797675e-07, "loss": 0.0006, "step": 99150 }, { "epoch": 1.6758067650811623, "grad_norm": 0.0739276334643364, "learning_rate": 7.792093160340142e-07, "loss": 0.0008, "step": 99160 }, { "epoch": 1.675975765360013, "grad_norm": 0.03279067948460579, "learning_rate": 7.784188522175018e-07, "loss": 0.0008, "step": 99170 }, { "epoch": 1.6761447656388633, "grad_norm": 0.13736632466316223, "learning_rate": 7.776287556990025e-07, "loss": 0.0006, "step": 99180 }, { "epoch": 1.6763137659177136, "grad_norm": 0.042497534304857254, "learning_rate": 7.768390265472598e-07, "loss": 0.0007, "step": 99190 }, { "epoch": 1.6764827661965642, "grad_norm": 0.015636952593922615, "learning_rate": 7.760496648309829e-07, "loss": 0.0007, "step": 99200 }, { "epoch": 1.6766517664754148, "grad_norm": 0.016732599586248398, "learning_rate": 7.752606706188514e-07, "loss": 0.0008, "step": 99210 }, { "epoch": 1.6768207667542652, "grad_norm": 0.0025693783536553383, "learning_rate": 7.7447204397951e-07, "loss": 0.0002, "step": 99220 }, { "epoch": 1.6769897670331155, "grad_norm": 0.014126485213637352, "learning_rate": 7.736837849815749e-07, "loss": 0.0006, "step": 99230 }, { "epoch": 1.6771587673119661, "grad_norm": 0.007326044607907534, "learning_rate": 7.728958936936265e-07, "loss": 0.001, "step": 99240 }, { "epoch": 1.6773277675908165, "grad_norm": 0.015904217958450317, "learning_rate": 7.721083701842175e-07, "loss": 0.0006, "step": 99250 }, { "epoch": 1.677496767869667, "grad_norm": 0.04559899866580963, "learning_rate": 7.713212145218629e-07, "loss": 0.0007, "step": 99260 }, { "epoch": 1.6776657681485174, "grad_norm": 0.01228527631610632, "learning_rate": 7.705344267750531e-07, "loss": 0.0006, "step": 99270 }, { "epoch": 1.6778347684273678, "grad_norm": 0.08058196306228638, "learning_rate": 7.697480070122388e-07, "loss": 0.0006, "step": 99280 }, { "epoch": 1.6780037687062184, "grad_norm": 0.0017960042459890246, "learning_rate": 7.68961955301844e-07, "loss": 0.0007, "step": 99290 }, { "epoch": 1.678172768985069, "grad_norm": 0.055955979973077774, "learning_rate": 7.681762717122593e-07, "loss": 0.0007, "step": 99300 }, { "epoch": 1.6783417692639193, "grad_norm": 0.03739108517765999, "learning_rate": 7.673909563118431e-07, "loss": 0.0004, "step": 99310 }, { "epoch": 1.6785107695427697, "grad_norm": 0.009558171033859253, "learning_rate": 7.666060091689204e-07, "loss": 0.0004, "step": 99320 }, { "epoch": 1.6786797698216203, "grad_norm": 0.03387034311890602, "learning_rate": 7.658214303517864e-07, "loss": 0.0011, "step": 99330 }, { "epoch": 1.6788487701004706, "grad_norm": 0.009359865449368954, "learning_rate": 7.650372199287038e-07, "loss": 0.0005, "step": 99340 }, { "epoch": 1.6790177703793212, "grad_norm": 0.01976497657597065, "learning_rate": 7.642533779679006e-07, "loss": 0.0007, "step": 99350 }, { "epoch": 1.6791867706581716, "grad_norm": 0.04325069859623909, "learning_rate": 7.634699045375782e-07, "loss": 0.0008, "step": 99360 }, { "epoch": 1.679355770937022, "grad_norm": 0.004810777027159929, "learning_rate": 7.626867997058984e-07, "loss": 0.0005, "step": 99370 }, { "epoch": 1.6795247712158725, "grad_norm": 0.03099067695438862, "learning_rate": 7.619040635409991e-07, "loss": 0.0009, "step": 99380 }, { "epoch": 1.679693771494723, "grad_norm": 0.09227609634399414, "learning_rate": 7.61121696110979e-07, "loss": 0.0005, "step": 99390 }, { "epoch": 1.6798627717735735, "grad_norm": 0.016670966520905495, "learning_rate": 7.6033969748391e-07, "loss": 0.0005, "step": 99400 }, { "epoch": 1.6800317720524238, "grad_norm": 0.6341423392295837, "learning_rate": 7.595580677278286e-07, "loss": 0.0013, "step": 99410 }, { "epoch": 1.6802007723312742, "grad_norm": 0.020962310954928398, "learning_rate": 7.587768069107409e-07, "loss": 0.0008, "step": 99420 }, { "epoch": 1.6803697726101248, "grad_norm": 0.08138630539178848, "learning_rate": 7.579959151006195e-07, "loss": 0.0006, "step": 99430 }, { "epoch": 1.6805387728889754, "grad_norm": 0.03248428553342819, "learning_rate": 7.57215392365408e-07, "loss": 0.0004, "step": 99440 }, { "epoch": 1.6807077731678257, "grad_norm": 8.540959970559925e-05, "learning_rate": 7.564352387730123e-07, "loss": 0.0005, "step": 99450 }, { "epoch": 1.680876773446676, "grad_norm": 0.14268356561660767, "learning_rate": 7.55655454391313e-07, "loss": 0.0023, "step": 99460 }, { "epoch": 1.6810457737255267, "grad_norm": 0.11119076609611511, "learning_rate": 7.548760392881521e-07, "loss": 0.0005, "step": 99470 }, { "epoch": 1.6812147740043772, "grad_norm": 0.07005003094673157, "learning_rate": 7.540969935313441e-07, "loss": 0.0005, "step": 99480 }, { "epoch": 1.6813837742832276, "grad_norm": 0.00016238611715380102, "learning_rate": 7.533183171886705e-07, "loss": 0.0005, "step": 99490 }, { "epoch": 1.681552774562078, "grad_norm": 0.023415926843881607, "learning_rate": 7.52540010327878e-07, "loss": 0.0007, "step": 99500 }, { "epoch": 1.6817217748409283, "grad_norm": 0.05968974903225899, "learning_rate": 7.517620730166836e-07, "loss": 0.0021, "step": 99510 }, { "epoch": 1.681890775119779, "grad_norm": 0.005587894469499588, "learning_rate": 7.509845053227732e-07, "loss": 0.0012, "step": 99520 }, { "epoch": 1.6820597753986295, "grad_norm": 0.10403554141521454, "learning_rate": 7.502073073137972e-07, "loss": 0.0011, "step": 99530 }, { "epoch": 1.6822287756774799, "grad_norm": 0.0056676738895475864, "learning_rate": 7.49430479057377e-07, "loss": 0.0004, "step": 99540 }, { "epoch": 1.6823977759563302, "grad_norm": 0.039203256368637085, "learning_rate": 7.486540206210984e-07, "loss": 0.0008, "step": 99550 }, { "epoch": 1.6825667762351808, "grad_norm": 0.02632479928433895, "learning_rate": 7.478779320725183e-07, "loss": 0.0005, "step": 99560 }, { "epoch": 1.6827357765140314, "grad_norm": 0.007668390870094299, "learning_rate": 7.471022134791611e-07, "loss": 0.0014, "step": 99570 }, { "epoch": 1.6829047767928818, "grad_norm": 0.009151371195912361, "learning_rate": 7.463268649085159e-07, "loss": 0.0006, "step": 99580 }, { "epoch": 1.6830737770717321, "grad_norm": 0.01876378431916237, "learning_rate": 7.455518864280448e-07, "loss": 0.0005, "step": 99590 }, { "epoch": 1.6832427773505825, "grad_norm": 0.03298467770218849, "learning_rate": 7.44777278105171e-07, "loss": 0.0005, "step": 99600 }, { "epoch": 1.683411777629433, "grad_norm": 0.019236354157328606, "learning_rate": 7.440030400072923e-07, "loss": 0.0003, "step": 99610 }, { "epoch": 1.6835807779082836, "grad_norm": 0.03573700040578842, "learning_rate": 7.432291722017693e-07, "loss": 0.0004, "step": 99620 }, { "epoch": 1.683749778187134, "grad_norm": 0.0062813530676066875, "learning_rate": 7.424556747559331e-07, "loss": 0.0008, "step": 99630 }, { "epoch": 1.6839187784659844, "grad_norm": 0.06845460832118988, "learning_rate": 7.416825477370809e-07, "loss": 0.0009, "step": 99640 }, { "epoch": 1.684087778744835, "grad_norm": 0.06802421808242798, "learning_rate": 7.409097912124801e-07, "loss": 0.0005, "step": 99650 }, { "epoch": 1.6842567790236855, "grad_norm": 0.02949458360671997, "learning_rate": 7.40137405249362e-07, "loss": 0.0003, "step": 99660 }, { "epoch": 1.684425779302536, "grad_norm": 0.03888123482465744, "learning_rate": 7.393653899149295e-07, "loss": 0.0008, "step": 99670 }, { "epoch": 1.6845947795813863, "grad_norm": 0.11102794855833054, "learning_rate": 7.38593745276352e-07, "loss": 0.0004, "step": 99680 }, { "epoch": 1.6847637798602366, "grad_norm": 0.0057936361990869045, "learning_rate": 7.378224714007648e-07, "loss": 0.0004, "step": 99690 }, { "epoch": 1.6849327801390872, "grad_norm": 0.002656053751707077, "learning_rate": 7.370515683552731e-07, "loss": 0.0007, "step": 99700 }, { "epoch": 1.6851017804179378, "grad_norm": 0.000269414100330323, "learning_rate": 7.362810362069511e-07, "loss": 0.0003, "step": 99710 }, { "epoch": 1.6852707806967882, "grad_norm": 0.0326961912214756, "learning_rate": 7.355108750228357e-07, "loss": 0.0005, "step": 99720 }, { "epoch": 1.6854397809756385, "grad_norm": 0.07516507059335709, "learning_rate": 7.347410848699371e-07, "loss": 0.0004, "step": 99730 }, { "epoch": 1.685608781254489, "grad_norm": 0.10635815560817719, "learning_rate": 7.339716658152285e-07, "loss": 0.0009, "step": 99740 }, { "epoch": 1.6857777815333397, "grad_norm": 0.09443478286266327, "learning_rate": 7.332026179256557e-07, "loss": 0.0006, "step": 99750 }, { "epoch": 1.68594678181219, "grad_norm": 0.04211077466607094, "learning_rate": 7.32433941268127e-07, "loss": 0.0004, "step": 99760 }, { "epoch": 1.6861157820910404, "grad_norm": 0.01785372570157051, "learning_rate": 7.316656359095236e-07, "loss": 0.0004, "step": 99770 }, { "epoch": 1.6862847823698908, "grad_norm": 0.003708686213940382, "learning_rate": 7.308977019166891e-07, "loss": 0.0003, "step": 99780 }, { "epoch": 1.6864537826487414, "grad_norm": 0.014424859546124935, "learning_rate": 7.301301393564397e-07, "loss": 0.0006, "step": 99790 }, { "epoch": 1.686622782927592, "grad_norm": 0.056485239416360855, "learning_rate": 7.293629482955555e-07, "loss": 0.0007, "step": 99800 }, { "epoch": 1.6867917832064423, "grad_norm": 0.004605097230523825, "learning_rate": 7.285961288007865e-07, "loss": 0.0006, "step": 99810 }, { "epoch": 1.6869607834852927, "grad_norm": 0.012538746930658817, "learning_rate": 7.278296809388507e-07, "loss": 0.0004, "step": 99820 }, { "epoch": 1.6871297837641432, "grad_norm": 0.05235278978943825, "learning_rate": 7.270636047764306e-07, "loss": 0.001, "step": 99830 }, { "epoch": 1.6872987840429938, "grad_norm": 0.03077354095876217, "learning_rate": 7.262979003801806e-07, "loss": 0.0008, "step": 99840 }, { "epoch": 1.6874677843218442, "grad_norm": 0.04643261432647705, "learning_rate": 7.255325678167191e-07, "loss": 0.0009, "step": 99850 }, { "epoch": 1.6876367846006946, "grad_norm": 0.06332317739725113, "learning_rate": 7.24767607152636e-07, "loss": 0.0006, "step": 99860 }, { "epoch": 1.687805784879545, "grad_norm": 0.03759251907467842, "learning_rate": 7.240030184544833e-07, "loss": 0.0007, "step": 99870 }, { "epoch": 1.6879747851583955, "grad_norm": 0.04582984745502472, "learning_rate": 7.232388017887859e-07, "loss": 0.0013, "step": 99880 }, { "epoch": 1.688143785437246, "grad_norm": 0.047386761754751205, "learning_rate": 7.224749572220341e-07, "loss": 0.0002, "step": 99890 }, { "epoch": 1.6883127857160964, "grad_norm": 0.052096910774707794, "learning_rate": 7.217114848206869e-07, "loss": 0.001, "step": 99900 }, { "epoch": 1.6884817859949468, "grad_norm": 0.01506057009100914, "learning_rate": 7.209483846511689e-07, "loss": 0.0044, "step": 99910 }, { "epoch": 1.6886507862737974, "grad_norm": 0.0026974184438586235, "learning_rate": 7.201856567798743e-07, "loss": 0.0008, "step": 99920 }, { "epoch": 1.688819786552648, "grad_norm": 0.019957927986979485, "learning_rate": 7.194233012731633e-07, "loss": 0.0013, "step": 99930 }, { "epoch": 1.6889887868314983, "grad_norm": 0.011490083299577236, "learning_rate": 7.186613181973656e-07, "loss": 0.0005, "step": 99940 }, { "epoch": 1.6891577871103487, "grad_norm": 0.05209843069314957, "learning_rate": 7.178997076187755e-07, "loss": 0.0005, "step": 99950 }, { "epoch": 1.689326787389199, "grad_norm": 0.03485189750790596, "learning_rate": 7.171384696036598e-07, "loss": 0.0006, "step": 99960 }, { "epoch": 1.6894957876680496, "grad_norm": 0.034865107387304306, "learning_rate": 7.163776042182463e-07, "loss": 0.0009, "step": 99970 }, { "epoch": 1.6896647879469002, "grad_norm": 0.0097969900816679, "learning_rate": 7.156171115287374e-07, "loss": 0.0008, "step": 99980 }, { "epoch": 1.6898337882257506, "grad_norm": 0.046122949570417404, "learning_rate": 7.148569916012977e-07, "loss": 0.0006, "step": 99990 }, { "epoch": 1.690002788504601, "grad_norm": 0.002712082350626588, "learning_rate": 7.140972445020617e-07, "loss": 0.0006, "step": 100000 }, { "epoch": 1.6901717887834515, "grad_norm": 0.021689051762223244, "learning_rate": 7.133378702971305e-07, "loss": 0.0006, "step": 100010 }, { "epoch": 1.6903407890623021, "grad_norm": 0.00048819032963365316, "learning_rate": 7.125788690525753e-07, "loss": 0.0005, "step": 100020 }, { "epoch": 1.6905097893411525, "grad_norm": 0.07447975873947144, "learning_rate": 7.118202408344299e-07, "loss": 0.0011, "step": 100030 }, { "epoch": 1.6906787896200028, "grad_norm": 0.06627485901117325, "learning_rate": 7.110619857087015e-07, "loss": 0.0004, "step": 100040 }, { "epoch": 1.6908477898988532, "grad_norm": 0.05524977296590805, "learning_rate": 7.103041037413599e-07, "loss": 0.0014, "step": 100050 }, { "epoch": 1.6910167901777038, "grad_norm": 0.028608134016394615, "learning_rate": 7.095465949983449e-07, "loss": 0.0011, "step": 100060 }, { "epoch": 1.6911857904565544, "grad_norm": 0.055802833288908005, "learning_rate": 7.087894595455642e-07, "loss": 0.0004, "step": 100070 }, { "epoch": 1.6913547907354047, "grad_norm": 0.018083810806274414, "learning_rate": 7.080326974488932e-07, "loss": 0.0009, "step": 100080 }, { "epoch": 1.691523791014255, "grad_norm": 0.016348091885447502, "learning_rate": 7.072763087741713e-07, "loss": 0.0005, "step": 100090 }, { "epoch": 1.6916927912931057, "grad_norm": 0.010338032618165016, "learning_rate": 7.065202935872089e-07, "loss": 0.0008, "step": 100100 }, { "epoch": 1.691861791571956, "grad_norm": 0.15109117329120636, "learning_rate": 7.057646519537847e-07, "loss": 0.0005, "step": 100110 }, { "epoch": 1.6920307918508066, "grad_norm": 0.009320233948528767, "learning_rate": 7.050093839396405e-07, "loss": 0.001, "step": 100120 }, { "epoch": 1.692199792129657, "grad_norm": 0.02333456091582775, "learning_rate": 7.042544896104913e-07, "loss": 0.0006, "step": 100130 }, { "epoch": 1.6923687924085073, "grad_norm": 0.05184491351246834, "learning_rate": 7.034999690320132e-07, "loss": 0.0012, "step": 100140 }, { "epoch": 1.692537792687358, "grad_norm": 0.008726450614631176, "learning_rate": 7.027458222698563e-07, "loss": 0.0007, "step": 100150 }, { "epoch": 1.6927067929662085, "grad_norm": 0.04120657965540886, "learning_rate": 7.019920493896321e-07, "loss": 0.001, "step": 100160 }, { "epoch": 1.6928757932450589, "grad_norm": 0.018746282905340195, "learning_rate": 7.012386504569252e-07, "loss": 0.0005, "step": 100170 }, { "epoch": 1.6930447935239092, "grad_norm": 0.02525946870446205, "learning_rate": 7.004856255372827e-07, "loss": 0.0018, "step": 100180 }, { "epoch": 1.6932137938027598, "grad_norm": 0.036460552364587784, "learning_rate": 6.997329746962234e-07, "loss": 0.0008, "step": 100190 }, { "epoch": 1.6933827940816102, "grad_norm": 0.010049732401967049, "learning_rate": 6.989806979992297e-07, "loss": 0.0003, "step": 100200 }, { "epoch": 1.6935517943604608, "grad_norm": 0.0019592733588069677, "learning_rate": 6.982287955117551e-07, "loss": 0.0003, "step": 100210 }, { "epoch": 1.6937207946393111, "grad_norm": 0.054332565516233444, "learning_rate": 6.974772672992164e-07, "loss": 0.0003, "step": 100220 }, { "epoch": 1.6938897949181615, "grad_norm": 0.07857688516378403, "learning_rate": 6.967261134270032e-07, "loss": 0.0006, "step": 100230 }, { "epoch": 1.694058795197012, "grad_norm": 0.05319392308592796, "learning_rate": 6.959753339604669e-07, "loss": 0.0006, "step": 100240 }, { "epoch": 1.6942277954758627, "grad_norm": 0.05939016863703728, "learning_rate": 6.952249289649293e-07, "loss": 0.0004, "step": 100250 }, { "epoch": 1.694396795754713, "grad_norm": 0.049175795167684555, "learning_rate": 6.94474898505681e-07, "loss": 0.0003, "step": 100260 }, { "epoch": 1.6945657960335634, "grad_norm": 0.028834955766797066, "learning_rate": 6.937252426479779e-07, "loss": 0.0003, "step": 100270 }, { "epoch": 1.6947347963124137, "grad_norm": 0.003831252222880721, "learning_rate": 6.929759614570419e-07, "loss": 0.0007, "step": 100280 }, { "epoch": 1.6949037965912643, "grad_norm": 0.049640364944934845, "learning_rate": 6.922270549980664e-07, "loss": 0.0009, "step": 100290 }, { "epoch": 1.695072796870115, "grad_norm": 0.03364095836877823, "learning_rate": 6.914785233362076e-07, "loss": 0.0015, "step": 100300 }, { "epoch": 1.6952417971489653, "grad_norm": 0.015599294565618038, "learning_rate": 6.907303665365938e-07, "loss": 0.001, "step": 100310 }, { "epoch": 1.6954107974278156, "grad_norm": 0.009604704566299915, "learning_rate": 6.89982584664316e-07, "loss": 0.0003, "step": 100320 }, { "epoch": 1.6955797977066662, "grad_norm": 0.04983891546726227, "learning_rate": 6.892351777844359e-07, "loss": 0.0004, "step": 100330 }, { "epoch": 1.6957487979855168, "grad_norm": 0.10597651451826096, "learning_rate": 6.884881459619825e-07, "loss": 0.0007, "step": 100340 }, { "epoch": 1.6959177982643672, "grad_norm": 0.03459309786558151, "learning_rate": 6.877414892619488e-07, "loss": 0.0003, "step": 100350 }, { "epoch": 1.6960867985432175, "grad_norm": 0.025150327011942863, "learning_rate": 6.869952077492998e-07, "loss": 0.0007, "step": 100360 }, { "epoch": 1.696255798822068, "grad_norm": 0.0011559352278709412, "learning_rate": 6.862493014889643e-07, "loss": 0.0009, "step": 100370 }, { "epoch": 1.6964247991009185, "grad_norm": 0.08916772156953812, "learning_rate": 6.855037705458406e-07, "loss": 0.0007, "step": 100380 }, { "epoch": 1.696593799379769, "grad_norm": 0.05255361273884773, "learning_rate": 6.847586149847924e-07, "loss": 0.0009, "step": 100390 }, { "epoch": 1.6967627996586194, "grad_norm": 0.019203945994377136, "learning_rate": 6.840138348706538e-07, "loss": 0.0005, "step": 100400 }, { "epoch": 1.6969317999374698, "grad_norm": 0.00024325685808435082, "learning_rate": 6.832694302682213e-07, "loss": 0.0008, "step": 100410 }, { "epoch": 1.6971008002163204, "grad_norm": 0.004759735893458128, "learning_rate": 6.825254012422649e-07, "loss": 0.0009, "step": 100420 }, { "epoch": 1.697269800495171, "grad_norm": 0.006870803888887167, "learning_rate": 6.817817478575162e-07, "loss": 0.0004, "step": 100430 }, { "epoch": 1.6974388007740213, "grad_norm": 0.023119403049349785, "learning_rate": 6.810384701786776e-07, "loss": 0.0015, "step": 100440 }, { "epoch": 1.6976078010528717, "grad_norm": 0.03685407713055611, "learning_rate": 6.802955682704187e-07, "loss": 0.0011, "step": 100450 }, { "epoch": 1.697776801331722, "grad_norm": 0.031681958585977554, "learning_rate": 6.795530421973745e-07, "loss": 0.0007, "step": 100460 }, { "epoch": 1.6979458016105726, "grad_norm": 0.07114414125680923, "learning_rate": 6.788108920241481e-07, "loss": 0.001, "step": 100470 }, { "epoch": 1.6981148018894232, "grad_norm": 0.04379222169518471, "learning_rate": 6.78069117815312e-07, "loss": 0.0006, "step": 100480 }, { "epoch": 1.6982838021682736, "grad_norm": 0.010722734965384007, "learning_rate": 6.773277196354017e-07, "loss": 0.0011, "step": 100490 }, { "epoch": 1.698452802447124, "grad_norm": 0.025447919964790344, "learning_rate": 6.765866975489244e-07, "loss": 0.0005, "step": 100500 }, { "epoch": 1.6986218027259745, "grad_norm": 0.0570237897336483, "learning_rate": 6.758460516203513e-07, "loss": 0.0008, "step": 100510 }, { "epoch": 1.698790803004825, "grad_norm": 0.01165765430778265, "learning_rate": 6.751057819141233e-07, "loss": 0.0003, "step": 100520 }, { "epoch": 1.6989598032836755, "grad_norm": 0.033633843064308167, "learning_rate": 6.743658884946464e-07, "loss": 0.0007, "step": 100530 }, { "epoch": 1.6991288035625258, "grad_norm": 0.17939716577529907, "learning_rate": 6.736263714262958e-07, "loss": 0.0009, "step": 100540 }, { "epoch": 1.6992978038413762, "grad_norm": 0.032884031534194946, "learning_rate": 6.728872307734119e-07, "loss": 0.0006, "step": 100550 }, { "epoch": 1.6994668041202268, "grad_norm": 0.051502492278814316, "learning_rate": 6.721484666003053e-07, "loss": 0.0009, "step": 100560 }, { "epoch": 1.6996358043990774, "grad_norm": 0.07070668786764145, "learning_rate": 6.714100789712502e-07, "loss": 0.0012, "step": 100570 }, { "epoch": 1.6998048046779277, "grad_norm": 0.1215224340558052, "learning_rate": 6.706720679504908e-07, "loss": 0.0004, "step": 100580 }, { "epoch": 1.699973804956778, "grad_norm": 0.05705857276916504, "learning_rate": 6.699344336022379e-07, "loss": 0.0004, "step": 100590 }, { "epoch": 1.7001428052356287, "grad_norm": 0.02882068045437336, "learning_rate": 6.691971759906685e-07, "loss": 0.0005, "step": 100600 }, { "epoch": 1.7003118055144792, "grad_norm": 0.08917008340358734, "learning_rate": 6.684602951799291e-07, "loss": 0.0007, "step": 100610 }, { "epoch": 1.7004808057933296, "grad_norm": 0.01354215107858181, "learning_rate": 6.677237912341294e-07, "loss": 0.0006, "step": 100620 }, { "epoch": 1.70064980607218, "grad_norm": 0.05526946112513542, "learning_rate": 6.669876642173512e-07, "loss": 0.0008, "step": 100630 }, { "epoch": 1.7008188063510303, "grad_norm": 0.027818327769637108, "learning_rate": 6.662519141936397e-07, "loss": 0.0006, "step": 100640 }, { "epoch": 1.700987806629881, "grad_norm": 0.03925297036767006, "learning_rate": 6.655165412270087e-07, "loss": 0.0004, "step": 100650 }, { "epoch": 1.7011568069087315, "grad_norm": 0.009000157006084919, "learning_rate": 6.647815453814393e-07, "loss": 0.0007, "step": 100660 }, { "epoch": 1.7013258071875819, "grad_norm": 0.04803379997611046, "learning_rate": 6.640469267208821e-07, "loss": 0.0006, "step": 100670 }, { "epoch": 1.7014948074664322, "grad_norm": 0.0002350498689338565, "learning_rate": 6.633126853092487e-07, "loss": 0.0004, "step": 100680 }, { "epoch": 1.7016638077452828, "grad_norm": 0.042979419231414795, "learning_rate": 6.625788212104245e-07, "loss": 0.0002, "step": 100690 }, { "epoch": 1.7018328080241334, "grad_norm": 0.010143440216779709, "learning_rate": 6.618453344882575e-07, "loss": 0.0003, "step": 100700 }, { "epoch": 1.7020018083029838, "grad_norm": 0.003690978977829218, "learning_rate": 6.611122252065655e-07, "loss": 0.0002, "step": 100710 }, { "epoch": 1.7021708085818341, "grad_norm": 0.05425015464425087, "learning_rate": 6.603794934291319e-07, "loss": 0.0009, "step": 100720 }, { "epoch": 1.7023398088606845, "grad_norm": 0.0006868285126984119, "learning_rate": 6.596471392197096e-07, "loss": 0.001, "step": 100730 }, { "epoch": 1.702508809139535, "grad_norm": 0.0235914159566164, "learning_rate": 6.589151626420137e-07, "loss": 0.0003, "step": 100740 }, { "epoch": 1.7026778094183856, "grad_norm": 0.055669207125902176, "learning_rate": 6.581835637597334e-07, "loss": 0.0013, "step": 100750 }, { "epoch": 1.702846809697236, "grad_norm": 0.0040846276096999645, "learning_rate": 6.574523426365187e-07, "loss": 0.0006, "step": 100760 }, { "epoch": 1.7030158099760864, "grad_norm": 0.001606640755198896, "learning_rate": 6.56721499335991e-07, "loss": 0.0006, "step": 100770 }, { "epoch": 1.703184810254937, "grad_norm": 0.018381789326667786, "learning_rate": 6.559910339217357e-07, "loss": 0.0008, "step": 100780 }, { "epoch": 1.7033538105337875, "grad_norm": 0.03388458117842674, "learning_rate": 6.55260946457309e-07, "loss": 0.0007, "step": 100790 }, { "epoch": 1.703522810812638, "grad_norm": 0.03354884311556816, "learning_rate": 6.54531237006229e-07, "loss": 0.0006, "step": 100800 }, { "epoch": 1.7036918110914883, "grad_norm": 0.0004501251969486475, "learning_rate": 6.538019056319872e-07, "loss": 0.0008, "step": 100810 }, { "epoch": 1.7038608113703386, "grad_norm": 0.007855558767914772, "learning_rate": 6.530729523980361e-07, "loss": 0.0005, "step": 100820 }, { "epoch": 1.7040298116491892, "grad_norm": 0.01765887252986431, "learning_rate": 6.523443773677995e-07, "loss": 0.0004, "step": 100830 }, { "epoch": 1.7041988119280398, "grad_norm": 0.0445299856364727, "learning_rate": 6.516161806046667e-07, "loss": 0.0009, "step": 100840 }, { "epoch": 1.7043678122068902, "grad_norm": 0.010799041017889977, "learning_rate": 6.508883621719952e-07, "loss": 0.0024, "step": 100850 }, { "epoch": 1.7045368124857405, "grad_norm": 0.08953937143087387, "learning_rate": 6.50160922133109e-07, "loss": 0.0003, "step": 100860 }, { "epoch": 1.704705812764591, "grad_norm": 0.061672527343034744, "learning_rate": 6.494338605512968e-07, "loss": 0.0008, "step": 100870 }, { "epoch": 1.7048748130434417, "grad_norm": 0.009565744549036026, "learning_rate": 6.487071774898185e-07, "loss": 0.0007, "step": 100880 }, { "epoch": 1.705043813322292, "grad_norm": 0.010980235412716866, "learning_rate": 6.479808730118975e-07, "loss": 0.0004, "step": 100890 }, { "epoch": 1.7052128136011424, "grad_norm": 0.024364009499549866, "learning_rate": 6.472549471807277e-07, "loss": 0.0007, "step": 100900 }, { "epoch": 1.7053818138799928, "grad_norm": 0.03408200293779373, "learning_rate": 6.465294000594663e-07, "loss": 0.0004, "step": 100910 }, { "epoch": 1.7055508141588434, "grad_norm": 0.07476289570331573, "learning_rate": 6.458042317112407e-07, "loss": 0.0005, "step": 100920 }, { "epoch": 1.705719814437694, "grad_norm": 0.03566889092326164, "learning_rate": 6.450794421991425e-07, "loss": 0.0005, "step": 100930 }, { "epoch": 1.7058888147165443, "grad_norm": 0.004053400829434395, "learning_rate": 6.44355031586234e-07, "loss": 0.0005, "step": 100940 }, { "epoch": 1.7060578149953947, "grad_norm": 0.008549575693905354, "learning_rate": 6.4363099993554e-07, "loss": 0.0007, "step": 100950 }, { "epoch": 1.7062268152742452, "grad_norm": 0.020716724917292595, "learning_rate": 6.429073473100578e-07, "loss": 0.0003, "step": 100960 }, { "epoch": 1.7063958155530956, "grad_norm": 0.0033090277574956417, "learning_rate": 6.421840737727453e-07, "loss": 0.0004, "step": 100970 }, { "epoch": 1.7065648158319462, "grad_norm": 0.08368370682001114, "learning_rate": 6.414611793865339e-07, "loss": 0.0006, "step": 100980 }, { "epoch": 1.7067338161107966, "grad_norm": 0.019599411636590958, "learning_rate": 6.407386642143165e-07, "loss": 0.0006, "step": 100990 }, { "epoch": 1.706902816389647, "grad_norm": 0.06469276547431946, "learning_rate": 6.400165283189574e-07, "loss": 0.0004, "step": 101000 }, { "epoch": 1.7070718166684975, "grad_norm": 0.014812301844358444, "learning_rate": 6.392947717632836e-07, "loss": 0.0003, "step": 101010 }, { "epoch": 1.707240816947348, "grad_norm": 0.01817789115011692, "learning_rate": 6.38573394610093e-07, "loss": 0.0005, "step": 101020 }, { "epoch": 1.7074098172261984, "grad_norm": 0.02831515297293663, "learning_rate": 6.37852396922149e-07, "loss": 0.0007, "step": 101030 }, { "epoch": 1.7075788175050488, "grad_norm": 0.010831360705196857, "learning_rate": 6.371317787621822e-07, "loss": 0.0011, "step": 101040 }, { "epoch": 1.7077478177838994, "grad_norm": 0.06960634887218475, "learning_rate": 6.364115401928883e-07, "loss": 0.001, "step": 101050 }, { "epoch": 1.7079168180627498, "grad_norm": 0.0023254172410815954, "learning_rate": 6.356916812769332e-07, "loss": 0.0003, "step": 101060 }, { "epoch": 1.7080858183416003, "grad_norm": 0.10252980887889862, "learning_rate": 6.349722020769466e-07, "loss": 0.0008, "step": 101070 }, { "epoch": 1.7082548186204507, "grad_norm": 0.04966580495238304, "learning_rate": 6.342531026555288e-07, "loss": 0.0006, "step": 101080 }, { "epoch": 1.708423818899301, "grad_norm": 0.026467200368642807, "learning_rate": 6.335343830752422e-07, "loss": 0.0008, "step": 101090 }, { "epoch": 1.7085928191781516, "grad_norm": 0.04453878104686737, "learning_rate": 6.328160433986202e-07, "loss": 0.0007, "step": 101100 }, { "epoch": 1.7087618194570022, "grad_norm": 0.005278876982629299, "learning_rate": 6.320980836881635e-07, "loss": 0.0005, "step": 101110 }, { "epoch": 1.7089308197358526, "grad_norm": 0.03430505841970444, "learning_rate": 6.313805040063348e-07, "loss": 0.0003, "step": 101120 }, { "epoch": 1.709099820014703, "grad_norm": 0.09736597537994385, "learning_rate": 6.306633044155702e-07, "loss": 0.0006, "step": 101130 }, { "epoch": 1.7092688202935535, "grad_norm": 0.011779002845287323, "learning_rate": 6.299464849782666e-07, "loss": 0.0005, "step": 101140 }, { "epoch": 1.709437820572404, "grad_norm": 0.0022404491901397705, "learning_rate": 6.292300457567934e-07, "loss": 0.0006, "step": 101150 }, { "epoch": 1.7096068208512545, "grad_norm": 0.4547003209590912, "learning_rate": 6.285139868134821e-07, "loss": 0.0015, "step": 101160 }, { "epoch": 1.7097758211301048, "grad_norm": 0.027612561360001564, "learning_rate": 6.277983082106353e-07, "loss": 0.0006, "step": 101170 }, { "epoch": 1.7099448214089552, "grad_norm": 0.026960104703903198, "learning_rate": 6.270830100105185e-07, "loss": 0.0001, "step": 101180 }, { "epoch": 1.7101138216878058, "grad_norm": 0.14393088221549988, "learning_rate": 6.263680922753684e-07, "loss": 0.0003, "step": 101190 }, { "epoch": 1.7102828219666564, "grad_norm": 0.0568784661591053, "learning_rate": 6.256535550673837e-07, "loss": 0.0005, "step": 101200 }, { "epoch": 1.7104518222455067, "grad_norm": 0.01398561056703329, "learning_rate": 6.249393984487339e-07, "loss": 0.0018, "step": 101210 }, { "epoch": 1.710620822524357, "grad_norm": 0.10412480682134628, "learning_rate": 6.242256224815551e-07, "loss": 0.0009, "step": 101220 }, { "epoch": 1.7107898228032075, "grad_norm": 0.04285868629813194, "learning_rate": 6.23512227227947e-07, "loss": 0.0012, "step": 101230 }, { "epoch": 1.710958823082058, "grad_norm": 0.002318931743502617, "learning_rate": 6.227992127499799e-07, "loss": 0.0002, "step": 101240 }, { "epoch": 1.7111278233609086, "grad_norm": 0.030637163668870926, "learning_rate": 6.220865791096908e-07, "loss": 0.0006, "step": 101250 }, { "epoch": 1.711296823639759, "grad_norm": 0.041567422449588776, "learning_rate": 6.213743263690791e-07, "loss": 0.0006, "step": 101260 }, { "epoch": 1.7114658239186094, "grad_norm": 0.0006766514852643013, "learning_rate": 6.206624545901174e-07, "loss": 0.0006, "step": 101270 }, { "epoch": 1.71163482419746, "grad_norm": 0.09084150940179825, "learning_rate": 6.199509638347395e-07, "loss": 0.0006, "step": 101280 }, { "epoch": 1.7118038244763105, "grad_norm": 0.028928222134709358, "learning_rate": 6.192398541648504e-07, "loss": 0.0015, "step": 101290 }, { "epoch": 1.7119728247551609, "grad_norm": 0.10853814333677292, "learning_rate": 6.185291256423182e-07, "loss": 0.0006, "step": 101300 }, { "epoch": 1.7121418250340112, "grad_norm": 0.03964819014072418, "learning_rate": 6.178187783289818e-07, "loss": 0.0003, "step": 101310 }, { "epoch": 1.7123108253128616, "grad_norm": 0.05229168385267258, "learning_rate": 6.171088122866431e-07, "loss": 0.0003, "step": 101320 }, { "epoch": 1.7124798255917122, "grad_norm": 0.025795824825763702, "learning_rate": 6.163992275770741e-07, "loss": 0.0015, "step": 101330 }, { "epoch": 1.7126488258705628, "grad_norm": 0.05593524128198624, "learning_rate": 6.156900242620101e-07, "loss": 0.0006, "step": 101340 }, { "epoch": 1.7128178261494131, "grad_norm": 0.06450933963060379, "learning_rate": 6.14981202403157e-07, "loss": 0.0008, "step": 101350 }, { "epoch": 1.7129868264282635, "grad_norm": 0.021435856819152832, "learning_rate": 6.142727620621857e-07, "loss": 0.0005, "step": 101360 }, { "epoch": 1.713155826707114, "grad_norm": 0.004841253627091646, "learning_rate": 6.135647033007325e-07, "loss": 0.0004, "step": 101370 }, { "epoch": 1.7133248269859647, "grad_norm": 0.03038441203534603, "learning_rate": 6.12857026180404e-07, "loss": 0.0005, "step": 101380 }, { "epoch": 1.713493827264815, "grad_norm": 0.0590740442276001, "learning_rate": 6.121497307627699e-07, "loss": 0.0004, "step": 101390 }, { "epoch": 1.7136628275436654, "grad_norm": 0.0016183584230020642, "learning_rate": 6.114428171093695e-07, "loss": 0.0006, "step": 101400 }, { "epoch": 1.7138318278225158, "grad_norm": 0.029670948162674904, "learning_rate": 6.107362852817056e-07, "loss": 0.0005, "step": 101410 }, { "epoch": 1.7140008281013663, "grad_norm": 0.03551269322633743, "learning_rate": 6.100301353412519e-07, "loss": 0.0005, "step": 101420 }, { "epoch": 1.714169828380217, "grad_norm": 0.0018679883796721697, "learning_rate": 6.093243673494459e-07, "loss": 0.0007, "step": 101430 }, { "epoch": 1.7143388286590673, "grad_norm": 0.010143287479877472, "learning_rate": 6.086189813676946e-07, "loss": 0.0003, "step": 101440 }, { "epoch": 1.7145078289379176, "grad_norm": 0.04105092212557793, "learning_rate": 6.079139774573672e-07, "loss": 0.0004, "step": 101450 }, { "epoch": 1.7146768292167682, "grad_norm": 0.004793565254658461, "learning_rate": 6.072093556798053e-07, "loss": 0.0008, "step": 101460 }, { "epoch": 1.7148458294956188, "grad_norm": 0.060135677456855774, "learning_rate": 6.065051160963121e-07, "loss": 0.0008, "step": 101470 }, { "epoch": 1.7150148297744692, "grad_norm": 0.10621100664138794, "learning_rate": 6.058012587681617e-07, "loss": 0.0006, "step": 101480 }, { "epoch": 1.7151838300533195, "grad_norm": 0.03052833490073681, "learning_rate": 6.050977837565914e-07, "loss": 0.0005, "step": 101490 }, { "epoch": 1.71535283033217, "grad_norm": 0.023036569356918335, "learning_rate": 6.043946911228082e-07, "loss": 0.0021, "step": 101500 }, { "epoch": 1.7155218306110205, "grad_norm": 0.009780052118003368, "learning_rate": 6.036919809279834e-07, "loss": 0.0006, "step": 101510 }, { "epoch": 1.715690830889871, "grad_norm": 0.03306550905108452, "learning_rate": 6.029896532332575e-07, "loss": 0.0008, "step": 101520 }, { "epoch": 1.7158598311687214, "grad_norm": 0.0446292869746685, "learning_rate": 6.022877080997353e-07, "loss": 0.0008, "step": 101530 }, { "epoch": 1.7160288314475718, "grad_norm": 0.03361291065812111, "learning_rate": 6.015861455884902e-07, "loss": 0.0009, "step": 101540 }, { "epoch": 1.7161978317264224, "grad_norm": 0.0014617099659517407, "learning_rate": 6.008849657605609e-07, "loss": 0.0005, "step": 101550 }, { "epoch": 1.716366832005273, "grad_norm": 0.08785160630941391, "learning_rate": 6.001841686769544e-07, "loss": 0.0006, "step": 101560 }, { "epoch": 1.7165358322841233, "grad_norm": 0.09249861538410187, "learning_rate": 5.994837543986421e-07, "loss": 0.0012, "step": 101570 }, { "epoch": 1.7167048325629737, "grad_norm": 0.025042526423931122, "learning_rate": 5.98783722986564e-07, "loss": 0.0005, "step": 101580 }, { "epoch": 1.716873832841824, "grad_norm": 0.005246470682322979, "learning_rate": 5.980840745016275e-07, "loss": 0.0007, "step": 101590 }, { "epoch": 1.7170428331206746, "grad_norm": 0.08312425017356873, "learning_rate": 5.97384809004703e-07, "loss": 0.0006, "step": 101600 }, { "epoch": 1.7172118333995252, "grad_norm": 0.06092371046543121, "learning_rate": 5.966859265566316e-07, "loss": 0.0007, "step": 101610 }, { "epoch": 1.7173808336783756, "grad_norm": 0.03240653872489929, "learning_rate": 5.959874272182192e-07, "loss": 0.0009, "step": 101620 }, { "epoch": 1.717549833957226, "grad_norm": 0.024100584909319878, "learning_rate": 5.952893110502395e-07, "loss": 0.0006, "step": 101630 }, { "epoch": 1.7177188342360765, "grad_norm": 0.05034208297729492, "learning_rate": 5.945915781134298e-07, "loss": 0.0008, "step": 101640 }, { "epoch": 1.717887834514927, "grad_norm": 0.07382944971323013, "learning_rate": 5.938942284684984e-07, "loss": 0.0008, "step": 101650 }, { "epoch": 1.7180568347937775, "grad_norm": 0.005421789828687906, "learning_rate": 5.931972621761167e-07, "loss": 0.0008, "step": 101660 }, { "epoch": 1.7182258350726278, "grad_norm": 0.14722028374671936, "learning_rate": 5.925006792969251e-07, "loss": 0.0005, "step": 101670 }, { "epoch": 1.7183948353514782, "grad_norm": 0.0022031988482922316, "learning_rate": 5.918044798915285e-07, "loss": 0.0006, "step": 101680 }, { "epoch": 1.7185638356303288, "grad_norm": 0.043418727815151215, "learning_rate": 5.911086640205016e-07, "loss": 0.001, "step": 101690 }, { "epoch": 1.7187328359091794, "grad_norm": 0.10383982211351395, "learning_rate": 5.904132317443812e-07, "loss": 0.001, "step": 101700 }, { "epoch": 1.7189018361880297, "grad_norm": 0.046413641422986984, "learning_rate": 5.897181831236753e-07, "loss": 0.0007, "step": 101710 }, { "epoch": 1.71907083646688, "grad_norm": 0.02331531047821045, "learning_rate": 5.890235182188553e-07, "loss": 0.0006, "step": 101720 }, { "epoch": 1.7192398367457307, "grad_norm": 0.03394658863544464, "learning_rate": 5.883292370903615e-07, "loss": 0.0003, "step": 101730 }, { "epoch": 1.7194088370245812, "grad_norm": 0.032232072204351425, "learning_rate": 5.876353397985984e-07, "loss": 0.0005, "step": 101740 }, { "epoch": 1.7195778373034316, "grad_norm": 0.07648573070764542, "learning_rate": 5.8694182640394e-07, "loss": 0.0004, "step": 101750 }, { "epoch": 1.719746837582282, "grad_norm": 0.07018204033374786, "learning_rate": 5.862486969667236e-07, "loss": 0.0004, "step": 101760 }, { "epoch": 1.7199158378611323, "grad_norm": 0.002022471046075225, "learning_rate": 5.855559515472564e-07, "loss": 0.0012, "step": 101770 }, { "epoch": 1.720084838139983, "grad_norm": 0.004247077275067568, "learning_rate": 5.848635902058092e-07, "loss": 0.0005, "step": 101780 }, { "epoch": 1.7202538384188335, "grad_norm": 0.0623776949942112, "learning_rate": 5.841716130026215e-07, "loss": 0.0007, "step": 101790 }, { "epoch": 1.7204228386976839, "grad_norm": 0.01923542656004429, "learning_rate": 5.834800199978985e-07, "loss": 0.0003, "step": 101800 }, { "epoch": 1.7205918389765342, "grad_norm": 0.00485391728579998, "learning_rate": 5.827888112518132e-07, "loss": 0.0006, "step": 101810 }, { "epoch": 1.7207608392553848, "grad_norm": 0.019686343148350716, "learning_rate": 5.820979868245025e-07, "loss": 0.0006, "step": 101820 }, { "epoch": 1.7209298395342354, "grad_norm": 0.013536707498133183, "learning_rate": 5.814075467760727e-07, "loss": 0.0003, "step": 101830 }, { "epoch": 1.7210988398130858, "grad_norm": 0.0900881439447403, "learning_rate": 5.807174911665942e-07, "loss": 0.0009, "step": 101840 }, { "epoch": 1.7212678400919361, "grad_norm": 0.013546904549002647, "learning_rate": 5.80027820056106e-07, "loss": 0.0003, "step": 101850 }, { "epoch": 1.7214368403707865, "grad_norm": 0.06801117211580276, "learning_rate": 5.793385335046137e-07, "loss": 0.0006, "step": 101860 }, { "epoch": 1.721605840649637, "grad_norm": 0.0018033263040706515, "learning_rate": 5.786496315720864e-07, "loss": 0.0004, "step": 101870 }, { "epoch": 1.7217748409284876, "grad_norm": 0.039153531193733215, "learning_rate": 5.779611143184638e-07, "loss": 0.0006, "step": 101880 }, { "epoch": 1.721943841207338, "grad_norm": 0.04686809703707695, "learning_rate": 5.772729818036488e-07, "loss": 0.0005, "step": 101890 }, { "epoch": 1.7221128414861884, "grad_norm": 0.048774782568216324, "learning_rate": 5.765852340875144e-07, "loss": 0.0003, "step": 101900 }, { "epoch": 1.722281841765039, "grad_norm": 0.1338806450366974, "learning_rate": 5.758978712298952e-07, "loss": 0.0007, "step": 101910 }, { "epoch": 1.7224508420438893, "grad_norm": 0.06597351282835007, "learning_rate": 5.752108932905976e-07, "loss": 0.0008, "step": 101920 }, { "epoch": 1.72261984232274, "grad_norm": 0.02449699118733406, "learning_rate": 5.745243003293893e-07, "loss": 0.0005, "step": 101930 }, { "epoch": 1.7227888426015903, "grad_norm": 0.029172487556934357, "learning_rate": 5.738380924060105e-07, "loss": 0.0008, "step": 101940 }, { "epoch": 1.7229578428804406, "grad_norm": 0.04729709029197693, "learning_rate": 5.731522695801617e-07, "loss": 0.0004, "step": 101950 }, { "epoch": 1.7231268431592912, "grad_norm": 0.010576958768069744, "learning_rate": 5.724668319115151e-07, "loss": 0.0005, "step": 101960 }, { "epoch": 1.7232958434381418, "grad_norm": 0.03171493485569954, "learning_rate": 5.717817794597047e-07, "loss": 0.0003, "step": 101970 }, { "epoch": 1.7234648437169922, "grad_norm": 0.01054026186466217, "learning_rate": 5.71097112284335e-07, "loss": 0.0007, "step": 101980 }, { "epoch": 1.7236338439958425, "grad_norm": 0.018904397264122963, "learning_rate": 5.704128304449758e-07, "loss": 0.0004, "step": 101990 }, { "epoch": 1.723802844274693, "grad_norm": 0.02453923597931862, "learning_rate": 5.697289340011613e-07, "loss": 0.0005, "step": 102000 }, { "epoch": 1.7239718445535435, "grad_norm": 0.004252988379448652, "learning_rate": 5.690454230123948e-07, "loss": 0.0009, "step": 102010 }, { "epoch": 1.724140844832394, "grad_norm": 0.027058754116296768, "learning_rate": 5.683622975381458e-07, "loss": 0.0007, "step": 102020 }, { "epoch": 1.7243098451112444, "grad_norm": 0.03157680481672287, "learning_rate": 5.676795576378479e-07, "loss": 0.0005, "step": 102030 }, { "epoch": 1.7244788453900948, "grad_norm": 0.044976986944675446, "learning_rate": 5.669972033709048e-07, "loss": 0.0007, "step": 102040 }, { "epoch": 1.7246478456689454, "grad_norm": 0.045679911971092224, "learning_rate": 5.663152347966821e-07, "loss": 0.0015, "step": 102050 }, { "epoch": 1.724816845947796, "grad_norm": 0.00455823726952076, "learning_rate": 5.656336519745165e-07, "loss": 0.0004, "step": 102060 }, { "epoch": 1.7249858462266463, "grad_norm": 0.019595401361584663, "learning_rate": 5.649524549637075e-07, "loss": 0.001, "step": 102070 }, { "epoch": 1.7251548465054967, "grad_norm": 0.07024908810853958, "learning_rate": 5.642716438235241e-07, "loss": 0.0005, "step": 102080 }, { "epoch": 1.7253238467843472, "grad_norm": 0.03039000928401947, "learning_rate": 5.635912186131981e-07, "loss": 0.0006, "step": 102090 }, { "epoch": 1.7254928470631976, "grad_norm": 0.04661163315176964, "learning_rate": 5.629111793919322e-07, "loss": 0.0005, "step": 102100 }, { "epoch": 1.7256618473420482, "grad_norm": 0.02858460694551468, "learning_rate": 5.622315262188905e-07, "loss": 0.0006, "step": 102110 }, { "epoch": 1.7258308476208986, "grad_norm": 0.06292527168989182, "learning_rate": 5.61552259153208e-07, "loss": 0.0006, "step": 102120 }, { "epoch": 1.725999847899749, "grad_norm": 0.1382041871547699, "learning_rate": 5.608733782539844e-07, "loss": 0.0009, "step": 102130 }, { "epoch": 1.7261688481785995, "grad_norm": 0.03715429827570915, "learning_rate": 5.60194883580284e-07, "loss": 0.0006, "step": 102140 }, { "epoch": 1.72633784845745, "grad_norm": 0.008654680103063583, "learning_rate": 5.595167751911412e-07, "loss": 0.0006, "step": 102150 }, { "epoch": 1.7265068487363004, "grad_norm": 0.004395073279738426, "learning_rate": 5.588390531455528e-07, "loss": 0.0004, "step": 102160 }, { "epoch": 1.7266758490151508, "grad_norm": 0.019985094666481018, "learning_rate": 5.581617175024845e-07, "loss": 0.0003, "step": 102170 }, { "epoch": 1.7268448492940012, "grad_norm": 0.06431503593921661, "learning_rate": 5.57484768320869e-07, "loss": 0.0007, "step": 102180 }, { "epoch": 1.7270138495728518, "grad_norm": 0.06975045055150986, "learning_rate": 5.568082056596019e-07, "loss": 0.0005, "step": 102190 }, { "epoch": 1.7271828498517023, "grad_norm": 0.027523482218384743, "learning_rate": 5.561320295775486e-07, "loss": 0.0009, "step": 102200 }, { "epoch": 1.7273518501305527, "grad_norm": 0.014709588140249252, "learning_rate": 5.554562401335412e-07, "loss": 0.0008, "step": 102210 }, { "epoch": 1.727520850409403, "grad_norm": 0.08284450322389603, "learning_rate": 5.547808373863739e-07, "loss": 0.0006, "step": 102220 }, { "epoch": 1.7276898506882536, "grad_norm": 0.09165903925895691, "learning_rate": 5.541058213948125e-07, "loss": 0.0008, "step": 102230 }, { "epoch": 1.7278588509671042, "grad_norm": 0.017165465280413628, "learning_rate": 5.534311922175845e-07, "loss": 0.0007, "step": 102240 }, { "epoch": 1.7280278512459546, "grad_norm": 0.002477036789059639, "learning_rate": 5.527569499133878e-07, "loss": 0.0002, "step": 102250 }, { "epoch": 1.728196851524805, "grad_norm": 0.021751822903752327, "learning_rate": 5.520830945408828e-07, "loss": 0.0003, "step": 102260 }, { "epoch": 1.7283658518036553, "grad_norm": 0.021902253851294518, "learning_rate": 5.514096261587004e-07, "loss": 0.0008, "step": 102270 }, { "epoch": 1.728534852082506, "grad_norm": 0.052564337849617004, "learning_rate": 5.507365448254332e-07, "loss": 0.0014, "step": 102280 }, { "epoch": 1.7287038523613565, "grad_norm": 0.02939448691904545, "learning_rate": 5.500638505996453e-07, "loss": 0.0008, "step": 102290 }, { "epoch": 1.7288728526402068, "grad_norm": 0.06470446288585663, "learning_rate": 5.493915435398611e-07, "loss": 0.0008, "step": 102300 }, { "epoch": 1.7290418529190572, "grad_norm": 0.02330961637198925, "learning_rate": 5.487196237045777e-07, "loss": 0.0031, "step": 102310 }, { "epoch": 1.7292108531979078, "grad_norm": 0.01982557401061058, "learning_rate": 5.480480911522528e-07, "loss": 0.0008, "step": 102320 }, { "epoch": 1.7293798534767584, "grad_norm": 0.012928773649036884, "learning_rate": 5.473769459413158e-07, "loss": 0.0009, "step": 102330 }, { "epoch": 1.7295488537556087, "grad_norm": 0.0316120982170105, "learning_rate": 5.467061881301567e-07, "loss": 0.0002, "step": 102340 }, { "epoch": 1.729717854034459, "grad_norm": 0.04284735396504402, "learning_rate": 5.460358177771352e-07, "loss": 0.0004, "step": 102350 }, { "epoch": 1.7298868543133095, "grad_norm": 0.04509047791361809, "learning_rate": 5.453658349405794e-07, "loss": 0.0009, "step": 102360 }, { "epoch": 1.73005585459216, "grad_norm": 0.004043929278850555, "learning_rate": 5.446962396787775e-07, "loss": 0.0005, "step": 102370 }, { "epoch": 1.7302248548710106, "grad_norm": 0.061174239963293076, "learning_rate": 5.440270320499897e-07, "loss": 0.0005, "step": 102380 }, { "epoch": 1.730393855149861, "grad_norm": 0.005769838113337755, "learning_rate": 5.433582121124393e-07, "loss": 0.0004, "step": 102390 }, { "epoch": 1.7305628554287114, "grad_norm": 0.09978872537612915, "learning_rate": 5.426897799243186e-07, "loss": 0.0013, "step": 102400 }, { "epoch": 1.730731855707562, "grad_norm": 0.042144156992435455, "learning_rate": 5.420217355437824e-07, "loss": 0.0004, "step": 102410 }, { "epoch": 1.7309008559864125, "grad_norm": 0.012190748006105423, "learning_rate": 5.413540790289556e-07, "loss": 0.0008, "step": 102420 }, { "epoch": 1.7310698562652629, "grad_norm": 0.11674518883228302, "learning_rate": 5.406868104379253e-07, "loss": 0.0012, "step": 102430 }, { "epoch": 1.7312388565441132, "grad_norm": 0.0697534829378128, "learning_rate": 5.400199298287495e-07, "loss": 0.0011, "step": 102440 }, { "epoch": 1.7314078568229636, "grad_norm": 0.05596902593970299, "learning_rate": 5.393534372594478e-07, "loss": 0.0012, "step": 102450 }, { "epoch": 1.7315768571018142, "grad_norm": 0.0021226536482572556, "learning_rate": 5.38687332788011e-07, "loss": 0.0005, "step": 102460 }, { "epoch": 1.7317458573806648, "grad_norm": 0.058090031147003174, "learning_rate": 5.3802161647239e-07, "loss": 0.0008, "step": 102470 }, { "epoch": 1.7319148576595151, "grad_norm": 0.0030619546305388212, "learning_rate": 5.373562883705086e-07, "loss": 0.0005, "step": 102480 }, { "epoch": 1.7320838579383655, "grad_norm": 0.02905389480292797, "learning_rate": 5.366913485402509e-07, "loss": 0.0007, "step": 102490 }, { "epoch": 1.732252858217216, "grad_norm": 0.0476466603577137, "learning_rate": 5.360267970394722e-07, "loss": 0.0006, "step": 102500 }, { "epoch": 1.7324218584960667, "grad_norm": 0.02050813101232052, "learning_rate": 5.353626339259893e-07, "loss": 0.0006, "step": 102510 }, { "epoch": 1.732590858774917, "grad_norm": 0.11159972101449966, "learning_rate": 5.346988592575903e-07, "loss": 0.001, "step": 102520 }, { "epoch": 1.7327598590537674, "grad_norm": 0.007609162945300341, "learning_rate": 5.340354730920244e-07, "loss": 0.0005, "step": 102530 }, { "epoch": 1.7329288593326178, "grad_norm": 0.04392042011022568, "learning_rate": 5.333724754870112e-07, "loss": 0.0009, "step": 102540 }, { "epoch": 1.7330978596114683, "grad_norm": 0.04385608434677124, "learning_rate": 5.327098665002334e-07, "loss": 0.0019, "step": 102550 }, { "epoch": 1.733266859890319, "grad_norm": 0.032793279737234116, "learning_rate": 5.320476461893415e-07, "loss": 0.0008, "step": 102560 }, { "epoch": 1.7334358601691693, "grad_norm": 0.18722468614578247, "learning_rate": 5.313858146119516e-07, "loss": 0.0011, "step": 102570 }, { "epoch": 1.7336048604480196, "grad_norm": 0.10890011489391327, "learning_rate": 5.307243718256483e-07, "loss": 0.0015, "step": 102580 }, { "epoch": 1.7337738607268702, "grad_norm": 0.0011250500101596117, "learning_rate": 5.300633178879777e-07, "loss": 0.0003, "step": 102590 }, { "epoch": 1.7339428610057208, "grad_norm": 0.0004868998075835407, "learning_rate": 5.294026528564567e-07, "loss": 0.0008, "step": 102600 }, { "epoch": 1.7341118612845712, "grad_norm": 0.017471345141530037, "learning_rate": 5.287423767885647e-07, "loss": 0.0007, "step": 102610 }, { "epoch": 1.7342808615634215, "grad_norm": 0.01358699705451727, "learning_rate": 5.280824897417497e-07, "loss": 0.0005, "step": 102620 }, { "epoch": 1.734449861842272, "grad_norm": 0.009207590483129025, "learning_rate": 5.274229917734258e-07, "loss": 0.0017, "step": 102630 }, { "epoch": 1.7346188621211225, "grad_norm": 0.01364581286907196, "learning_rate": 5.267638829409704e-07, "loss": 0.0004, "step": 102640 }, { "epoch": 1.734787862399973, "grad_norm": 0.0013375000562518835, "learning_rate": 5.261051633017322e-07, "loss": 0.0004, "step": 102650 }, { "epoch": 1.7349568626788234, "grad_norm": 0.043894656002521515, "learning_rate": 5.254468329130197e-07, "loss": 0.0006, "step": 102660 }, { "epoch": 1.7351258629576738, "grad_norm": 0.015651630237698555, "learning_rate": 5.247888918321137e-07, "loss": 0.0007, "step": 102670 }, { "epoch": 1.7352948632365244, "grad_norm": 0.0647960677742958, "learning_rate": 5.241313401162562e-07, "loss": 0.0008, "step": 102680 }, { "epoch": 1.735463863515375, "grad_norm": 0.014756627380847931, "learning_rate": 5.234741778226593e-07, "loss": 0.0007, "step": 102690 }, { "epoch": 1.7356328637942253, "grad_norm": 0.06088197976350784, "learning_rate": 5.228174050084966e-07, "loss": 0.0006, "step": 102700 }, { "epoch": 1.7358018640730757, "grad_norm": 0.05784652754664421, "learning_rate": 5.221610217309136e-07, "loss": 0.0007, "step": 102710 }, { "epoch": 1.735970864351926, "grad_norm": 0.02115379087626934, "learning_rate": 5.215050280470163e-07, "loss": 0.0001, "step": 102720 }, { "epoch": 1.7361398646307766, "grad_norm": 0.16476072371006012, "learning_rate": 5.208494240138812e-07, "loss": 0.0007, "step": 102730 }, { "epoch": 1.7363088649096272, "grad_norm": 0.008767795749008656, "learning_rate": 5.20194209688547e-07, "loss": 0.0006, "step": 102740 }, { "epoch": 1.7364778651884776, "grad_norm": 0.020127803087234497, "learning_rate": 5.195393851280223e-07, "loss": 0.0005, "step": 102750 }, { "epoch": 1.736646865467328, "grad_norm": 0.011776608414947987, "learning_rate": 5.18884950389279e-07, "loss": 0.0002, "step": 102760 }, { "epoch": 1.7368158657461785, "grad_norm": 0.012264423072338104, "learning_rate": 5.182309055292573e-07, "loss": 0.0004, "step": 102770 }, { "epoch": 1.736984866025029, "grad_norm": 0.026176534593105316, "learning_rate": 5.17577250604861e-07, "loss": 0.0002, "step": 102780 }, { "epoch": 1.7371538663038795, "grad_norm": 0.008904360234737396, "learning_rate": 5.169239856729624e-07, "loss": 0.0007, "step": 102790 }, { "epoch": 1.7373228665827298, "grad_norm": 0.03166089206933975, "learning_rate": 5.162711107903973e-07, "loss": 0.001, "step": 102800 }, { "epoch": 1.7374918668615802, "grad_norm": 0.04781964793801308, "learning_rate": 5.156186260139706e-07, "loss": 0.0009, "step": 102810 }, { "epoch": 1.7376608671404308, "grad_norm": 0.007664358243346214, "learning_rate": 5.149665314004498e-07, "loss": 0.0004, "step": 102820 }, { "epoch": 1.7378298674192814, "grad_norm": 0.0008122368599288166, "learning_rate": 5.143148270065723e-07, "loss": 0.0002, "step": 102830 }, { "epoch": 1.7379988676981317, "grad_norm": 0.17051158845424652, "learning_rate": 5.136635128890371e-07, "loss": 0.001, "step": 102840 }, { "epoch": 1.738167867976982, "grad_norm": 0.015453736297786236, "learning_rate": 5.130125891045146e-07, "loss": 0.0007, "step": 102850 }, { "epoch": 1.7383368682558327, "grad_norm": 0.06761077046394348, "learning_rate": 5.123620557096354e-07, "loss": 0.0013, "step": 102860 }, { "epoch": 1.738505868534683, "grad_norm": 0.011229383759200573, "learning_rate": 5.11711912761001e-07, "loss": 0.0004, "step": 102870 }, { "epoch": 1.7386748688135336, "grad_norm": 0.10608507692813873, "learning_rate": 5.110621603151766e-07, "loss": 0.0039, "step": 102880 }, { "epoch": 1.738843869092384, "grad_norm": 0.02043667435646057, "learning_rate": 5.104127984286933e-07, "loss": 0.0005, "step": 102890 }, { "epoch": 1.7390128693712343, "grad_norm": 0.017715172842144966, "learning_rate": 5.097638271580501e-07, "loss": 0.0004, "step": 102900 }, { "epoch": 1.739181869650085, "grad_norm": 0.03839319571852684, "learning_rate": 5.091152465597082e-07, "loss": 0.0003, "step": 102910 }, { "epoch": 1.7393508699289355, "grad_norm": 0.04127620533108711, "learning_rate": 5.084670566901001e-07, "loss": 0.0008, "step": 102920 }, { "epoch": 1.7395198702077859, "grad_norm": 0.034130506217479706, "learning_rate": 5.07819257605619e-07, "loss": 0.0005, "step": 102930 }, { "epoch": 1.7396888704866362, "grad_norm": 0.027376290410757065, "learning_rate": 5.071718493626277e-07, "loss": 0.0008, "step": 102940 }, { "epoch": 1.7398578707654868, "grad_norm": 0.07222705334424973, "learning_rate": 5.065248320174543e-07, "loss": 0.0004, "step": 102950 }, { "epoch": 1.7400268710443372, "grad_norm": 0.006656379904597998, "learning_rate": 5.058782056263912e-07, "loss": 0.0007, "step": 102960 }, { "epoch": 1.7401958713231878, "grad_norm": 0.03969889506697655, "learning_rate": 5.052319702456987e-07, "loss": 0.0006, "step": 102970 }, { "epoch": 1.7403648716020381, "grad_norm": 0.02160169929265976, "learning_rate": 5.045861259316038e-07, "loss": 0.0004, "step": 102980 }, { "epoch": 1.7405338718808885, "grad_norm": 0.0028784871101379395, "learning_rate": 5.039406727402951e-07, "loss": 0.0003, "step": 102990 }, { "epoch": 1.740702872159739, "grad_norm": 0.0740542784333229, "learning_rate": 5.03295610727933e-07, "loss": 0.0005, "step": 103000 }, { "epoch": 1.7408718724385897, "grad_norm": 0.09806045144796371, "learning_rate": 5.02650939950639e-07, "loss": 0.0007, "step": 103010 }, { "epoch": 1.74104087271744, "grad_norm": 0.015504520386457443, "learning_rate": 5.020066604645041e-07, "loss": 0.0005, "step": 103020 }, { "epoch": 1.7412098729962904, "grad_norm": 0.011831060983240604, "learning_rate": 5.01362772325582e-07, "loss": 0.0004, "step": 103030 }, { "epoch": 1.741378873275141, "grad_norm": 0.0006339370738714933, "learning_rate": 5.007192755898965e-07, "loss": 0.0015, "step": 103040 }, { "epoch": 1.7415478735539913, "grad_norm": 0.0036010006442666054, "learning_rate": 5.000761703134321e-07, "loss": 0.0007, "step": 103050 }, { "epoch": 1.741716873832842, "grad_norm": 0.010716896504163742, "learning_rate": 4.994334565521447e-07, "loss": 0.0004, "step": 103060 }, { "epoch": 1.7418858741116923, "grad_norm": 0.030913732945919037, "learning_rate": 4.987911343619517e-07, "loss": 0.001, "step": 103070 }, { "epoch": 1.7420548743905426, "grad_norm": 0.02269650623202324, "learning_rate": 4.981492037987396e-07, "loss": 0.0016, "step": 103080 }, { "epoch": 1.7422238746693932, "grad_norm": 0.07766445726156235, "learning_rate": 4.975076649183574e-07, "loss": 0.0047, "step": 103090 }, { "epoch": 1.7423928749482438, "grad_norm": 0.13688509166240692, "learning_rate": 4.968665177766247e-07, "loss": 0.0011, "step": 103100 }, { "epoch": 1.7425618752270942, "grad_norm": 0.06837814301252365, "learning_rate": 4.962257624293221e-07, "loss": 0.0005, "step": 103110 }, { "epoch": 1.7427308755059445, "grad_norm": 0.015745289623737335, "learning_rate": 4.955853989321996e-07, "loss": 0.0005, "step": 103120 }, { "epoch": 1.7428998757847949, "grad_norm": 0.07937926054000854, "learning_rate": 4.949454273409726e-07, "loss": 0.0005, "step": 103130 }, { "epoch": 1.7430688760636455, "grad_norm": 0.021244054660201073, "learning_rate": 4.9430584771132e-07, "loss": 0.0003, "step": 103140 }, { "epoch": 1.743237876342496, "grad_norm": 0.0003001017321366817, "learning_rate": 4.936666600988893e-07, "loss": 0.0006, "step": 103150 }, { "epoch": 1.7434068766213464, "grad_norm": 0.054227881133556366, "learning_rate": 4.930278645592928e-07, "loss": 0.0009, "step": 103160 }, { "epoch": 1.7435758769001968, "grad_norm": 0.039106860756874084, "learning_rate": 4.923894611481095e-07, "loss": 0.0004, "step": 103170 }, { "epoch": 1.7437448771790474, "grad_norm": 0.028639808297157288, "learning_rate": 4.917514499208825e-07, "loss": 0.0007, "step": 103180 }, { "epoch": 1.743913877457898, "grad_norm": 0.005487741436809301, "learning_rate": 4.911138309331232e-07, "loss": 0.0021, "step": 103190 }, { "epoch": 1.7440828777367483, "grad_norm": 0.07214861363172531, "learning_rate": 4.904766042403053e-07, "loss": 0.0004, "step": 103200 }, { "epoch": 1.7442518780155987, "grad_norm": 0.09120302647352219, "learning_rate": 4.898397698978736e-07, "loss": 0.0007, "step": 103210 }, { "epoch": 1.744420878294449, "grad_norm": 0.010890722274780273, "learning_rate": 4.892033279612329e-07, "loss": 0.0004, "step": 103220 }, { "epoch": 1.7445898785732996, "grad_norm": 0.06599367409944534, "learning_rate": 4.885672784857586e-07, "loss": 0.0013, "step": 103230 }, { "epoch": 1.7447588788521502, "grad_norm": 0.02376548759639263, "learning_rate": 4.879316215267887e-07, "loss": 0.0007, "step": 103240 }, { "epoch": 1.7449278791310006, "grad_norm": 0.0024631957057863474, "learning_rate": 4.872963571396305e-07, "loss": 0.0007, "step": 103250 }, { "epoch": 1.745096879409851, "grad_norm": 0.02037734165787697, "learning_rate": 4.866614853795526e-07, "loss": 0.0003, "step": 103260 }, { "epoch": 1.7452658796887015, "grad_norm": 0.08212215453386307, "learning_rate": 4.860270063017935e-07, "loss": 0.0013, "step": 103270 }, { "epoch": 1.745434879967552, "grad_norm": 0.05925295501947403, "learning_rate": 4.853929199615554e-07, "loss": 0.0008, "step": 103280 }, { "epoch": 1.7456038802464025, "grad_norm": 0.0582982636988163, "learning_rate": 4.847592264140078e-07, "loss": 0.0011, "step": 103290 }, { "epoch": 1.7457728805252528, "grad_norm": 0.0348796546459198, "learning_rate": 4.841259257142833e-07, "loss": 0.0009, "step": 103300 }, { "epoch": 1.7459418808041032, "grad_norm": 0.006674604490399361, "learning_rate": 4.834930179174829e-07, "loss": 0.0003, "step": 103310 }, { "epoch": 1.7461108810829538, "grad_norm": 0.006219710223376751, "learning_rate": 4.828605030786743e-07, "loss": 0.0008, "step": 103320 }, { "epoch": 1.7462798813618043, "grad_norm": 0.11030562967061996, "learning_rate": 4.822283812528871e-07, "loss": 0.0011, "step": 103330 }, { "epoch": 1.7464488816406547, "grad_norm": 0.1262752115726471, "learning_rate": 4.815966524951193e-07, "loss": 0.0007, "step": 103340 }, { "epoch": 1.746617881919505, "grad_norm": 0.03812302276492119, "learning_rate": 4.809653168603362e-07, "loss": 0.0006, "step": 103350 }, { "epoch": 1.7467868821983557, "grad_norm": 0.018701814115047455, "learning_rate": 4.803343744034649e-07, "loss": 0.0004, "step": 103360 }, { "epoch": 1.7469558824772062, "grad_norm": 0.021062275394797325, "learning_rate": 4.797038251794023e-07, "loss": 0.0004, "step": 103370 }, { "epoch": 1.7471248827560566, "grad_norm": 0.020336691290140152, "learning_rate": 4.790736692430071e-07, "loss": 0.001, "step": 103380 }, { "epoch": 1.747293883034907, "grad_norm": 0.05079956725239754, "learning_rate": 4.78443906649107e-07, "loss": 0.0015, "step": 103390 }, { "epoch": 1.7474628833137573, "grad_norm": 0.010214217007160187, "learning_rate": 4.778145374524956e-07, "loss": 0.0002, "step": 103400 }, { "epoch": 1.747631883592608, "grad_norm": 0.10870637744665146, "learning_rate": 4.771855617079285e-07, "loss": 0.0006, "step": 103410 }, { "epoch": 1.7478008838714585, "grad_norm": 0.04904106631875038, "learning_rate": 4.765569794701325e-07, "loss": 0.0014, "step": 103420 }, { "epoch": 1.7479698841503089, "grad_norm": 0.009648908860981464, "learning_rate": 4.759287907937948e-07, "loss": 0.0003, "step": 103430 }, { "epoch": 1.7481388844291592, "grad_norm": 0.00835387408733368, "learning_rate": 4.7530099573357303e-07, "loss": 0.0007, "step": 103440 }, { "epoch": 1.7483078847080098, "grad_norm": 0.1508789360523224, "learning_rate": 4.7467359434408613e-07, "loss": 0.0012, "step": 103450 }, { "epoch": 1.7484768849868604, "grad_norm": 0.03186517953872681, "learning_rate": 4.740465866799232e-07, "loss": 0.0007, "step": 103460 }, { "epoch": 1.7486458852657107, "grad_norm": 0.01163002010434866, "learning_rate": 4.734199727956357e-07, "loss": 0.0002, "step": 103470 }, { "epoch": 1.748814885544561, "grad_norm": 0.016560060903429985, "learning_rate": 4.727937527457427e-07, "loss": 0.0004, "step": 103480 }, { "epoch": 1.7489838858234115, "grad_norm": 0.0025057720486074686, "learning_rate": 4.7216792658472677e-07, "loss": 0.0011, "step": 103490 }, { "epoch": 1.749152886102262, "grad_norm": 0.004498408176004887, "learning_rate": 4.715424943670405e-07, "loss": 0.0005, "step": 103500 }, { "epoch": 1.7493218863811126, "grad_norm": 0.014925013296306133, "learning_rate": 4.7091745614709693e-07, "loss": 0.0007, "step": 103510 }, { "epoch": 1.749490886659963, "grad_norm": 0.09344600141048431, "learning_rate": 4.7029281197927876e-07, "loss": 0.0011, "step": 103520 }, { "epoch": 1.7496598869388134, "grad_norm": 0.08888610452413559, "learning_rate": 4.6966856191793253e-07, "loss": 0.0005, "step": 103530 }, { "epoch": 1.749828887217664, "grad_norm": 0.015614978969097137, "learning_rate": 4.6904470601737253e-07, "loss": 0.0005, "step": 103540 }, { "epoch": 1.7499978874965145, "grad_norm": 0.024644412100315094, "learning_rate": 4.684212443318753e-07, "loss": 0.0008, "step": 103550 }, { "epoch": 1.7501668877753649, "grad_norm": 0.03356558084487915, "learning_rate": 4.67798176915687e-07, "loss": 0.0005, "step": 103560 }, { "epoch": 1.7503358880542152, "grad_norm": 0.042815741151571274, "learning_rate": 4.6717550382301466e-07, "loss": 0.0009, "step": 103570 }, { "epoch": 1.7505048883330656, "grad_norm": 0.0005397217464633286, "learning_rate": 4.665532251080368e-07, "loss": 0.0002, "step": 103580 }, { "epoch": 1.7506738886119162, "grad_norm": 0.029403207823634148, "learning_rate": 4.6593134082489276e-07, "loss": 0.0004, "step": 103590 }, { "epoch": 1.7508428888907668, "grad_norm": 0.038796812295913696, "learning_rate": 4.65309851027691e-07, "loss": 0.0004, "step": 103600 }, { "epoch": 1.7510118891696171, "grad_norm": 0.0008636804413981736, "learning_rate": 4.646887557705021e-07, "loss": 0.0004, "step": 103610 }, { "epoch": 1.7511808894484675, "grad_norm": 0.05733103305101395, "learning_rate": 4.6406805510736676e-07, "loss": 0.0006, "step": 103620 }, { "epoch": 1.751349889727318, "grad_norm": 0.03309517353773117, "learning_rate": 4.6344774909228617e-07, "loss": 0.0005, "step": 103630 }, { "epoch": 1.7515188900061687, "grad_norm": 0.0010934839956462383, "learning_rate": 4.6282783777923226e-07, "loss": 0.0004, "step": 103640 }, { "epoch": 1.751687890285019, "grad_norm": 0.004879363812506199, "learning_rate": 4.622083212221401e-07, "loss": 0.0004, "step": 103650 }, { "epoch": 1.7518568905638694, "grad_norm": 0.009129153564572334, "learning_rate": 4.6158919947490887e-07, "loss": 0.0004, "step": 103660 }, { "epoch": 1.7520258908427198, "grad_norm": 0.02794717065989971, "learning_rate": 4.609704725914077e-07, "loss": 0.0007, "step": 103670 }, { "epoch": 1.7521948911215703, "grad_norm": 0.0331835113465786, "learning_rate": 4.6035214062546686e-07, "loss": 0.0011, "step": 103680 }, { "epoch": 1.752363891400421, "grad_norm": 0.022332506254315376, "learning_rate": 4.597342036308855e-07, "loss": 0.0005, "step": 103690 }, { "epoch": 1.7525328916792713, "grad_norm": 0.12581118941307068, "learning_rate": 4.5911666166142564e-07, "loss": 0.001, "step": 103700 }, { "epoch": 1.7527018919581216, "grad_norm": 0.004481620155274868, "learning_rate": 4.584995147708171e-07, "loss": 0.0004, "step": 103710 }, { "epoch": 1.7528708922369722, "grad_norm": 0.009676892310380936, "learning_rate": 4.5788276301275635e-07, "loss": 0.0008, "step": 103720 }, { "epoch": 1.7530398925158228, "grad_norm": 0.0342324897646904, "learning_rate": 4.572664064409005e-07, "loss": 0.0005, "step": 103730 }, { "epoch": 1.7532088927946732, "grad_norm": 0.0059454115107655525, "learning_rate": 4.566504451088782e-07, "loss": 0.0007, "step": 103740 }, { "epoch": 1.7533778930735235, "grad_norm": 0.11812199652194977, "learning_rate": 4.5603487907028066e-07, "loss": 0.0006, "step": 103750 }, { "epoch": 1.753546893352374, "grad_norm": 0.0014133210061118007, "learning_rate": 4.554197083786638e-07, "loss": 0.0004, "step": 103760 }, { "epoch": 1.7537158936312245, "grad_norm": 0.004159275442361832, "learning_rate": 4.5480493308755205e-07, "loss": 0.0003, "step": 103770 }, { "epoch": 1.753884893910075, "grad_norm": 0.030234072357416153, "learning_rate": 4.541905532504326e-07, "loss": 0.0002, "step": 103780 }, { "epoch": 1.7540538941889254, "grad_norm": 0.025236107409000397, "learning_rate": 4.535765689207605e-07, "loss": 0.0003, "step": 103790 }, { "epoch": 1.7542228944677758, "grad_norm": 0.04440271481871605, "learning_rate": 4.529629801519542e-07, "loss": 0.0006, "step": 103800 }, { "epoch": 1.7543918947466264, "grad_norm": 0.2087211161851883, "learning_rate": 4.5234978699740026e-07, "loss": 0.0027, "step": 103810 }, { "epoch": 1.7545608950254767, "grad_norm": 0.04368753731250763, "learning_rate": 4.5173698951044787e-07, "loss": 0.0007, "step": 103820 }, { "epoch": 1.7547298953043273, "grad_norm": 0.04231741651892662, "learning_rate": 4.511245877444154e-07, "loss": 0.0004, "step": 103830 }, { "epoch": 1.7548988955831777, "grad_norm": 0.019522041082382202, "learning_rate": 4.505125817525824e-07, "loss": 0.0015, "step": 103840 }, { "epoch": 1.755067895862028, "grad_norm": 0.017136752605438232, "learning_rate": 4.499009715881986e-07, "loss": 0.0002, "step": 103850 }, { "epoch": 1.7552368961408786, "grad_norm": 0.0033868923783302307, "learning_rate": 4.4928975730447466e-07, "loss": 0.0005, "step": 103860 }, { "epoch": 1.7554058964197292, "grad_norm": 0.030208753421902657, "learning_rate": 4.4867893895459203e-07, "loss": 0.0005, "step": 103870 }, { "epoch": 1.7555748966985796, "grad_norm": 0.0030300342477858067, "learning_rate": 4.4806851659169204e-07, "loss": 0.0011, "step": 103880 }, { "epoch": 1.75574389697743, "grad_norm": 0.06566812098026276, "learning_rate": 4.474584902688861e-07, "loss": 0.0014, "step": 103890 }, { "epoch": 1.7559128972562805, "grad_norm": 0.04958728328347206, "learning_rate": 4.4684886003924897e-07, "loss": 0.0005, "step": 103900 }, { "epoch": 1.7560818975351309, "grad_norm": 0.01267607044428587, "learning_rate": 4.462396259558222e-07, "loss": 0.0009, "step": 103910 }, { "epoch": 1.7562508978139815, "grad_norm": 0.06525146216154099, "learning_rate": 4.4563078807161044e-07, "loss": 0.0005, "step": 103920 }, { "epoch": 1.7564198980928318, "grad_norm": 0.006330079864710569, "learning_rate": 4.4502234643958695e-07, "loss": 0.0008, "step": 103930 }, { "epoch": 1.7565888983716822, "grad_norm": 0.054142214357852936, "learning_rate": 4.4441430111268945e-07, "loss": 0.0003, "step": 103940 }, { "epoch": 1.7567578986505328, "grad_norm": 0.018102606758475304, "learning_rate": 4.4380665214381945e-07, "loss": 0.0008, "step": 103950 }, { "epoch": 1.7569268989293834, "grad_norm": 0.04914461821317673, "learning_rate": 4.4319939958584636e-07, "loss": 0.0006, "step": 103960 }, { "epoch": 1.7570958992082337, "grad_norm": 0.07141398638486862, "learning_rate": 4.425925434916034e-07, "loss": 0.001, "step": 103970 }, { "epoch": 1.757264899487084, "grad_norm": 0.08057130128145218, "learning_rate": 4.419860839138906e-07, "loss": 0.0005, "step": 103980 }, { "epoch": 1.7574338997659344, "grad_norm": 0.02311614342033863, "learning_rate": 4.4138002090547194e-07, "loss": 0.0003, "step": 103990 }, { "epoch": 1.757602900044785, "grad_norm": 0.059669192880392075, "learning_rate": 4.407743545190796e-07, "loss": 0.0011, "step": 104000 }, { "epoch": 1.7577719003236356, "grad_norm": 0.03703858703374863, "learning_rate": 4.4016908480740696e-07, "loss": 0.0008, "step": 104010 }, { "epoch": 1.757940900602486, "grad_norm": 0.11064209789037704, "learning_rate": 4.3956421182311806e-07, "loss": 0.001, "step": 104020 }, { "epoch": 1.7581099008813363, "grad_norm": 0.002106801373884082, "learning_rate": 4.38959735618838e-07, "loss": 0.0004, "step": 104030 }, { "epoch": 1.758278901160187, "grad_norm": 0.016959697008132935, "learning_rate": 4.383556562471603e-07, "loss": 0.0005, "step": 104040 }, { "epoch": 1.7584479014390375, "grad_norm": 0.023819249123334885, "learning_rate": 4.3775197376064117e-07, "loss": 0.0006, "step": 104050 }, { "epoch": 1.7586169017178879, "grad_norm": 0.07967918366193771, "learning_rate": 4.371486882118053e-07, "loss": 0.0005, "step": 104060 }, { "epoch": 1.7587859019967382, "grad_norm": 0.014442991465330124, "learning_rate": 4.365457996531408e-07, "loss": 0.0006, "step": 104070 }, { "epoch": 1.7589549022755886, "grad_norm": 0.022880321368575096, "learning_rate": 4.359433081371023e-07, "loss": 0.0007, "step": 104080 }, { "epoch": 1.7591239025544392, "grad_norm": 0.050609149038791656, "learning_rate": 4.3534121371611007e-07, "loss": 0.0007, "step": 104090 }, { "epoch": 1.7592929028332898, "grad_norm": 0.03655462712049484, "learning_rate": 4.3473951644254775e-07, "loss": 0.0005, "step": 104100 }, { "epoch": 1.7594619031121401, "grad_norm": 0.004376859869807959, "learning_rate": 4.341382163687663e-07, "loss": 0.0005, "step": 104110 }, { "epoch": 1.7596309033909905, "grad_norm": 0.01908200979232788, "learning_rate": 4.335373135470833e-07, "loss": 0.0005, "step": 104120 }, { "epoch": 1.759799903669841, "grad_norm": 0.05158529058098793, "learning_rate": 4.329368080297786e-07, "loss": 0.0007, "step": 104130 }, { "epoch": 1.7599689039486917, "grad_norm": 0.17117096483707428, "learning_rate": 4.3233669986909975e-07, "loss": 0.0012, "step": 104140 }, { "epoch": 1.760137904227542, "grad_norm": 0.009141642600297928, "learning_rate": 4.317369891172579e-07, "loss": 0.0005, "step": 104150 }, { "epoch": 1.7603069045063924, "grad_norm": 0.0012519872980192304, "learning_rate": 4.311376758264324e-07, "loss": 0.0006, "step": 104160 }, { "epoch": 1.7604759047852427, "grad_norm": 0.019536923617124557, "learning_rate": 4.3053876004876595e-07, "loss": 0.0006, "step": 104170 }, { "epoch": 1.7606449050640933, "grad_norm": 0.0017009720904752612, "learning_rate": 4.299402418363663e-07, "loss": 0.0006, "step": 104180 }, { "epoch": 1.760813905342944, "grad_norm": 0.07516901195049286, "learning_rate": 4.29342121241309e-07, "loss": 0.0005, "step": 104190 }, { "epoch": 1.7609829056217943, "grad_norm": 0.08406435698270798, "learning_rate": 4.28744398315632e-07, "loss": 0.0006, "step": 104200 }, { "epoch": 1.7611519059006446, "grad_norm": 0.002859867876395583, "learning_rate": 4.2814707311134074e-07, "loss": 0.0007, "step": 104210 }, { "epoch": 1.7613209061794952, "grad_norm": 0.0836159959435463, "learning_rate": 4.275501456804049e-07, "loss": 0.0003, "step": 104220 }, { "epoch": 1.7614899064583458, "grad_norm": 0.07688683271408081, "learning_rate": 4.269536160747612e-07, "loss": 0.0004, "step": 104230 }, { "epoch": 1.7616589067371962, "grad_norm": 0.01429643202573061, "learning_rate": 4.263574843463086e-07, "loss": 0.0007, "step": 104240 }, { "epoch": 1.7618279070160465, "grad_norm": 0.08189475536346436, "learning_rate": 4.257617505469158e-07, "loss": 0.0006, "step": 104250 }, { "epoch": 1.7619969072948969, "grad_norm": 0.03205665573477745, "learning_rate": 4.251664147284129e-07, "loss": 0.0005, "step": 104260 }, { "epoch": 1.7621659075737475, "grad_norm": 0.06763836741447449, "learning_rate": 4.245714769425979e-07, "loss": 0.0005, "step": 104270 }, { "epoch": 1.762334907852598, "grad_norm": 0.05251099169254303, "learning_rate": 4.2397693724123167e-07, "loss": 0.0004, "step": 104280 }, { "epoch": 1.7625039081314484, "grad_norm": 0.07325220108032227, "learning_rate": 4.233827956760439e-07, "loss": 0.0007, "step": 104290 }, { "epoch": 1.7626729084102988, "grad_norm": 0.05792950093746185, "learning_rate": 4.227890522987271e-07, "loss": 0.0011, "step": 104300 }, { "epoch": 1.7628419086891494, "grad_norm": 0.04769614338874817, "learning_rate": 4.2219570716094007e-07, "loss": 0.0016, "step": 104310 }, { "epoch": 1.763010908968, "grad_norm": 0.0192970372736454, "learning_rate": 4.2160276031430637e-07, "loss": 0.0003, "step": 104320 }, { "epoch": 1.7631799092468503, "grad_norm": 0.06704641878604889, "learning_rate": 4.210102118104159e-07, "loss": 0.0007, "step": 104330 }, { "epoch": 1.7633489095257007, "grad_norm": 0.018758367747068405, "learning_rate": 4.204180617008219e-07, "loss": 0.0007, "step": 104340 }, { "epoch": 1.763517909804551, "grad_norm": 0.03824960067868233, "learning_rate": 4.1982631003704586e-07, "loss": 0.0006, "step": 104350 }, { "epoch": 1.7636869100834016, "grad_norm": 0.020262107253074646, "learning_rate": 4.192349568705717e-07, "loss": 0.0006, "step": 104360 }, { "epoch": 1.7638559103622522, "grad_norm": 0.07570063322782516, "learning_rate": 4.1864400225285094e-07, "loss": 0.0007, "step": 104370 }, { "epoch": 1.7640249106411026, "grad_norm": 0.04012012109160423, "learning_rate": 4.1805344623529863e-07, "loss": 0.0006, "step": 104380 }, { "epoch": 1.764193910919953, "grad_norm": 0.06360950320959091, "learning_rate": 4.174632888692975e-07, "loss": 0.0007, "step": 104390 }, { "epoch": 1.7643629111988035, "grad_norm": 0.043980613350868225, "learning_rate": 4.1687353020619217e-07, "loss": 0.0007, "step": 104400 }, { "epoch": 1.764531911477654, "grad_norm": 0.0020944224670529366, "learning_rate": 4.1628417029729586e-07, "loss": 0.0007, "step": 104410 }, { "epoch": 1.7647009117565045, "grad_norm": 0.03159353509545326, "learning_rate": 4.1569520919388593e-07, "loss": 0.0007, "step": 104420 }, { "epoch": 1.7648699120353548, "grad_norm": 0.06862014532089233, "learning_rate": 4.1510664694720314e-07, "loss": 0.0009, "step": 104430 }, { "epoch": 1.7650389123142052, "grad_norm": 0.020865771919488907, "learning_rate": 4.145184836084576e-07, "loss": 0.0006, "step": 104440 }, { "epoch": 1.7652079125930558, "grad_norm": 0.03221869096159935, "learning_rate": 4.1393071922882e-07, "loss": 0.0004, "step": 104450 }, { "epoch": 1.7653769128719063, "grad_norm": 0.04893992096185684, "learning_rate": 4.133433538594306e-07, "loss": 0.0005, "step": 104460 }, { "epoch": 1.7655459131507567, "grad_norm": 0.08351850509643555, "learning_rate": 4.127563875513918e-07, "loss": 0.0004, "step": 104470 }, { "epoch": 1.765714913429607, "grad_norm": 0.03808083385229111, "learning_rate": 4.121698203557728e-07, "loss": 0.0006, "step": 104480 }, { "epoch": 1.7658839137084577, "grad_norm": 0.01868014596402645, "learning_rate": 4.115836523236083e-07, "loss": 0.0006, "step": 104490 }, { "epoch": 1.7660529139873082, "grad_norm": 0.0008707979577593505, "learning_rate": 4.1099788350589806e-07, "loss": 0.0004, "step": 104500 }, { "epoch": 1.7662219142661586, "grad_norm": 0.07478159666061401, "learning_rate": 4.1041251395360525e-07, "loss": 0.0005, "step": 104510 }, { "epoch": 1.766390914545009, "grad_norm": 0.009744617156684399, "learning_rate": 4.098275437176613e-07, "loss": 0.0006, "step": 104520 }, { "epoch": 1.7665599148238593, "grad_norm": 0.04124493896961212, "learning_rate": 4.0924297284896054e-07, "loss": 0.0011, "step": 104530 }, { "epoch": 1.76672891510271, "grad_norm": 0.007389638107270002, "learning_rate": 4.08658801398365e-07, "loss": 0.0003, "step": 104540 }, { "epoch": 1.7668979153815605, "grad_norm": 0.00717326020821929, "learning_rate": 4.0807502941669797e-07, "loss": 0.0004, "step": 104550 }, { "epoch": 1.7670669156604109, "grad_norm": 0.09308916330337524, "learning_rate": 4.074916569547527e-07, "loss": 0.0003, "step": 104560 }, { "epoch": 1.7672359159392612, "grad_norm": 0.06012408062815666, "learning_rate": 4.0690868406328355e-07, "loss": 0.0011, "step": 104570 }, { "epoch": 1.7674049162181118, "grad_norm": 0.00715381745249033, "learning_rate": 4.063261107930139e-07, "loss": 0.0004, "step": 104580 }, { "epoch": 1.7675739164969624, "grad_norm": 0.018232326954603195, "learning_rate": 4.0574393719462866e-07, "loss": 0.0006, "step": 104590 }, { "epoch": 1.7677429167758127, "grad_norm": 0.007518917787820101, "learning_rate": 4.0516216331878135e-07, "loss": 0.0005, "step": 104600 }, { "epoch": 1.767911917054663, "grad_norm": 0.02275843173265457, "learning_rate": 4.04580789216088e-07, "loss": 0.0005, "step": 104610 }, { "epoch": 1.7680809173335135, "grad_norm": 0.02745683863759041, "learning_rate": 4.039998149371316e-07, "loss": 0.0007, "step": 104620 }, { "epoch": 1.768249917612364, "grad_norm": 0.025723101571202278, "learning_rate": 4.034192405324583e-07, "loss": 0.0003, "step": 104630 }, { "epoch": 1.7684189178912146, "grad_norm": 0.020526999607682228, "learning_rate": 4.0283906605258325e-07, "loss": 0.0007, "step": 104640 }, { "epoch": 1.768587918170065, "grad_norm": 0.002178657567128539, "learning_rate": 4.022592915479828e-07, "loss": 0.0015, "step": 104650 }, { "epoch": 1.7687569184489154, "grad_norm": 0.06424304097890854, "learning_rate": 4.0167991706909994e-07, "loss": 0.0009, "step": 104660 }, { "epoch": 1.768925918727766, "grad_norm": 0.04882480204105377, "learning_rate": 4.011009426663437e-07, "loss": 0.0011, "step": 104670 }, { "epoch": 1.7690949190066163, "grad_norm": 0.05100816860795021, "learning_rate": 4.0052236839008885e-07, "loss": 0.0012, "step": 104680 }, { "epoch": 1.769263919285467, "grad_norm": 0.028061311691999435, "learning_rate": 3.999441942906718e-07, "loss": 0.0003, "step": 104690 }, { "epoch": 1.7694329195643173, "grad_norm": 0.05454196035861969, "learning_rate": 3.9936642041839726e-07, "loss": 0.0016, "step": 104700 }, { "epoch": 1.7696019198431676, "grad_norm": 0.02162751741707325, "learning_rate": 3.9878904682353617e-07, "loss": 0.0007, "step": 104710 }, { "epoch": 1.7697709201220182, "grad_norm": 0.04085154831409454, "learning_rate": 3.982120735563205e-07, "loss": 0.0006, "step": 104720 }, { "epoch": 1.7699399204008688, "grad_norm": 0.05033764988183975, "learning_rate": 3.9763550066695135e-07, "loss": 0.0006, "step": 104730 }, { "epoch": 1.7701089206797191, "grad_norm": 0.0951123908162117, "learning_rate": 3.9705932820559176e-07, "loss": 0.004, "step": 104740 }, { "epoch": 1.7702779209585695, "grad_norm": 0.06430260092020035, "learning_rate": 3.964835562223729e-07, "loss": 0.0003, "step": 104750 }, { "epoch": 1.77044692123742, "grad_norm": 0.060577888041734695, "learning_rate": 3.95908184767389e-07, "loss": 0.0006, "step": 104760 }, { "epoch": 1.7706159215162705, "grad_norm": 0.012775205075740814, "learning_rate": 3.9533321389070066e-07, "loss": 0.0003, "step": 104770 }, { "epoch": 1.770784921795121, "grad_norm": 0.029996542260050774, "learning_rate": 3.947586436423323e-07, "loss": 0.0004, "step": 104780 }, { "epoch": 1.7709539220739714, "grad_norm": 0.02309521473944187, "learning_rate": 3.9418447407227555e-07, "loss": 0.0006, "step": 104790 }, { "epoch": 1.7711229223528218, "grad_norm": 0.015215246006846428, "learning_rate": 3.936107052304844e-07, "loss": 0.0007, "step": 104800 }, { "epoch": 1.7712919226316723, "grad_norm": 0.007620797958225012, "learning_rate": 3.9303733716688106e-07, "loss": 0.0013, "step": 104810 }, { "epoch": 1.771460922910523, "grad_norm": 0.0433606281876564, "learning_rate": 3.924643699313502e-07, "loss": 0.0007, "step": 104820 }, { "epoch": 1.7716299231893733, "grad_norm": 0.04617132619023323, "learning_rate": 3.9189180357374346e-07, "loss": 0.0005, "step": 104830 }, { "epoch": 1.7717989234682237, "grad_norm": 0.014687767252326012, "learning_rate": 3.9131963814387606e-07, "loss": 0.0007, "step": 104840 }, { "epoch": 1.7719679237470742, "grad_norm": 0.04283357411623001, "learning_rate": 3.9074787369152986e-07, "loss": 0.0005, "step": 104850 }, { "epoch": 1.7721369240259246, "grad_norm": 0.03274627402424812, "learning_rate": 3.9017651026645175e-07, "loss": 0.0007, "step": 104860 }, { "epoch": 1.7723059243047752, "grad_norm": 0.0861838236451149, "learning_rate": 3.896055479183514e-07, "loss": 0.0022, "step": 104870 }, { "epoch": 1.7724749245836255, "grad_norm": 0.014093455858528614, "learning_rate": 3.890349866969062e-07, "loss": 0.0007, "step": 104880 }, { "epoch": 1.772643924862476, "grad_norm": 0.02075990103185177, "learning_rate": 3.884648266517588e-07, "loss": 0.0006, "step": 104890 }, { "epoch": 1.7728129251413265, "grad_norm": 0.013891511596739292, "learning_rate": 3.878950678325144e-07, "loss": 0.0004, "step": 104900 }, { "epoch": 1.772981925420177, "grad_norm": 0.10854381322860718, "learning_rate": 3.8732571028874566e-07, "loss": 0.0008, "step": 104910 }, { "epoch": 1.7731509256990274, "grad_norm": 0.06576929986476898, "learning_rate": 3.8675675406998793e-07, "loss": 0.0009, "step": 104920 }, { "epoch": 1.7733199259778778, "grad_norm": 0.03649206459522247, "learning_rate": 3.861881992257449e-07, "loss": 0.0004, "step": 104930 }, { "epoch": 1.7734889262567282, "grad_norm": 0.029594246298074722, "learning_rate": 3.8562004580548374e-07, "loss": 0.0018, "step": 104940 }, { "epoch": 1.7736579265355787, "grad_norm": 0.018652699887752533, "learning_rate": 3.850522938586343e-07, "loss": 0.0003, "step": 104950 }, { "epoch": 1.7738269268144293, "grad_norm": 0.03187130391597748, "learning_rate": 3.844849434345965e-07, "loss": 0.0007, "step": 104960 }, { "epoch": 1.7739959270932797, "grad_norm": 0.04595885053277016, "learning_rate": 3.839179945827304e-07, "loss": 0.0006, "step": 104970 }, { "epoch": 1.77416492737213, "grad_norm": 0.0030543392058461905, "learning_rate": 3.833514473523653e-07, "loss": 0.0008, "step": 104980 }, { "epoch": 1.7743339276509806, "grad_norm": 0.18576397001743317, "learning_rate": 3.827853017927913e-07, "loss": 0.0011, "step": 104990 }, { "epoch": 1.7745029279298312, "grad_norm": 0.03337743878364563, "learning_rate": 3.822195579532678e-07, "loss": 0.0012, "step": 105000 }, { "epoch": 1.7746719282086816, "grad_norm": 0.09377391636371613, "learning_rate": 3.816542158830161e-07, "loss": 0.0006, "step": 105010 }, { "epoch": 1.774840928487532, "grad_norm": 0.0215314794331789, "learning_rate": 3.81089275631224e-07, "loss": 0.0004, "step": 105020 }, { "epoch": 1.7750099287663823, "grad_norm": 0.13656103610992432, "learning_rate": 3.805247372470439e-07, "loss": 0.0006, "step": 105030 }, { "epoch": 1.775178929045233, "grad_norm": 0.012975458987057209, "learning_rate": 3.7996060077959306e-07, "loss": 0.0013, "step": 105040 }, { "epoch": 1.7753479293240835, "grad_norm": 0.028509151190519333, "learning_rate": 3.793968662779557e-07, "loss": 0.0004, "step": 105050 }, { "epoch": 1.7755169296029338, "grad_norm": 0.16324865818023682, "learning_rate": 3.788335337911775e-07, "loss": 0.0009, "step": 105060 }, { "epoch": 1.7756859298817842, "grad_norm": 0.002793596126139164, "learning_rate": 3.7827060336827206e-07, "loss": 0.0007, "step": 105070 }, { "epoch": 1.7758549301606348, "grad_norm": 0.02703750506043434, "learning_rate": 3.777080750582174e-07, "loss": 0.0007, "step": 105080 }, { "epoch": 1.7760239304394854, "grad_norm": 0.009869939647614956, "learning_rate": 3.771459489099549e-07, "loss": 0.0005, "step": 105090 }, { "epoch": 1.7761929307183357, "grad_norm": 0.02329307049512863, "learning_rate": 3.7658422497239443e-07, "loss": 0.001, "step": 105100 }, { "epoch": 1.776361930997186, "grad_norm": 0.023844977840781212, "learning_rate": 3.7602290329440573e-07, "loss": 0.0002, "step": 105110 }, { "epoch": 1.7765309312760365, "grad_norm": 0.011266546323895454, "learning_rate": 3.754619839248297e-07, "loss": 0.0009, "step": 105120 }, { "epoch": 1.776699931554887, "grad_norm": 0.025293419137597084, "learning_rate": 3.749014669124662e-07, "loss": 0.0008, "step": 105130 }, { "epoch": 1.7768689318337376, "grad_norm": 0.029278069734573364, "learning_rate": 3.7434135230608513e-07, "loss": 0.0004, "step": 105140 }, { "epoch": 1.777037932112588, "grad_norm": 0.16719502210617065, "learning_rate": 3.737816401544175e-07, "loss": 0.0003, "step": 105150 }, { "epoch": 1.7772069323914383, "grad_norm": 9.560196485836059e-05, "learning_rate": 3.732223305061622e-07, "loss": 0.0002, "step": 105160 }, { "epoch": 1.777375932670289, "grad_norm": 0.0026868858840316534, "learning_rate": 3.726634234099813e-07, "loss": 0.0004, "step": 105170 }, { "epoch": 1.7775449329491395, "grad_norm": 0.013223793357610703, "learning_rate": 3.721049189145021e-07, "loss": 0.001, "step": 105180 }, { "epoch": 1.7777139332279899, "grad_norm": 0.0006191139691509306, "learning_rate": 3.715468170683184e-07, "loss": 0.0006, "step": 105190 }, { "epoch": 1.7778829335068402, "grad_norm": 0.2048901915550232, "learning_rate": 3.709891179199859e-07, "loss": 0.0006, "step": 105200 }, { "epoch": 1.7780519337856906, "grad_norm": 0.03425382822751999, "learning_rate": 3.7043182151802915e-07, "loss": 0.0005, "step": 105210 }, { "epoch": 1.7782209340645412, "grad_norm": 0.05309786647558212, "learning_rate": 3.6987492791093427e-07, "loss": 0.0007, "step": 105220 }, { "epoch": 1.7783899343433918, "grad_norm": 0.0020808845292776823, "learning_rate": 3.6931843714715474e-07, "loss": 0.0011, "step": 105230 }, { "epoch": 1.7785589346222421, "grad_norm": 0.02424847148358822, "learning_rate": 3.687623492751063e-07, "loss": 0.0008, "step": 105240 }, { "epoch": 1.7787279349010925, "grad_norm": 0.0008779786294326186, "learning_rate": 3.6820666434317255e-07, "loss": 0.0005, "step": 105250 }, { "epoch": 1.778896935179943, "grad_norm": 0.008909628726541996, "learning_rate": 3.676513823997008e-07, "loss": 0.0008, "step": 105260 }, { "epoch": 1.7790659354587937, "grad_norm": 0.011672201566398144, "learning_rate": 3.6709650349300353e-07, "loss": 0.0001, "step": 105270 }, { "epoch": 1.779234935737644, "grad_norm": 0.014921033754944801, "learning_rate": 3.665420276713566e-07, "loss": 0.0003, "step": 105280 }, { "epoch": 1.7794039360164944, "grad_norm": 0.005454959347844124, "learning_rate": 3.6598795498300365e-07, "loss": 0.0013, "step": 105290 }, { "epoch": 1.7795729362953447, "grad_norm": 0.03779270127415657, "learning_rate": 3.6543428547615056e-07, "loss": 0.0031, "step": 105300 }, { "epoch": 1.7797419365741953, "grad_norm": 0.011112458072602749, "learning_rate": 3.648810191989699e-07, "loss": 0.0004, "step": 105310 }, { "epoch": 1.779910936853046, "grad_norm": 0.0141755947843194, "learning_rate": 3.643281561995976e-07, "loss": 0.0017, "step": 105320 }, { "epoch": 1.7800799371318963, "grad_norm": 0.009296818636357784, "learning_rate": 3.6377569652613684e-07, "loss": 0.0006, "step": 105330 }, { "epoch": 1.7802489374107466, "grad_norm": 0.11248309910297394, "learning_rate": 3.632236402266526e-07, "loss": 0.0015, "step": 105340 }, { "epoch": 1.7804179376895972, "grad_norm": 0.06976965814828873, "learning_rate": 3.6267198734917855e-07, "loss": 0.0007, "step": 105350 }, { "epoch": 1.7805869379684478, "grad_norm": 0.020244039595127106, "learning_rate": 3.621207379417091e-07, "loss": 0.0005, "step": 105360 }, { "epoch": 1.7807559382472982, "grad_norm": 0.019706254824995995, "learning_rate": 3.6156989205220704e-07, "loss": 0.0004, "step": 105370 }, { "epoch": 1.7809249385261485, "grad_norm": 0.013626663014292717, "learning_rate": 3.6101944972859736e-07, "loss": 0.0003, "step": 105380 }, { "epoch": 1.7810939388049989, "grad_norm": 0.02177486941218376, "learning_rate": 3.604694110187723e-07, "loss": 0.0004, "step": 105390 }, { "epoch": 1.7812629390838495, "grad_norm": 0.004014752805233002, "learning_rate": 3.5991977597058746e-07, "loss": 0.0004, "step": 105400 }, { "epoch": 1.7814319393627, "grad_norm": 0.0028710479382425547, "learning_rate": 3.5937054463186406e-07, "loss": 0.001, "step": 105410 }, { "epoch": 1.7816009396415504, "grad_norm": 0.0014161871513351798, "learning_rate": 3.5882171705038715e-07, "loss": 0.0005, "step": 105420 }, { "epoch": 1.7817699399204008, "grad_norm": 0.0008943057619035244, "learning_rate": 3.5827329327390803e-07, "loss": 0.0003, "step": 105430 }, { "epoch": 1.7819389401992514, "grad_norm": 0.022660084068775177, "learning_rate": 3.5772527335014184e-07, "loss": 0.0009, "step": 105440 }, { "epoch": 1.782107940478102, "grad_norm": 0.020562347024679184, "learning_rate": 3.571776573267699e-07, "loss": 0.0006, "step": 105450 }, { "epoch": 1.7822769407569523, "grad_norm": 0.007296978496015072, "learning_rate": 3.566304452514363e-07, "loss": 0.0008, "step": 105460 }, { "epoch": 1.7824459410358027, "grad_norm": 0.019784986972808838, "learning_rate": 3.5608363717175134e-07, "loss": 0.0006, "step": 105470 }, { "epoch": 1.782614941314653, "grad_norm": 0.10019789636135101, "learning_rate": 3.5553723313529074e-07, "loss": 0.0007, "step": 105480 }, { "epoch": 1.7827839415935036, "grad_norm": 0.021866897121071815, "learning_rate": 3.549912331895938e-07, "loss": 0.0007, "step": 105490 }, { "epoch": 1.7829529418723542, "grad_norm": 0.00043900907621718943, "learning_rate": 3.5444563738216533e-07, "loss": 0.0007, "step": 105500 }, { "epoch": 1.7831219421512046, "grad_norm": 0.0011269195238128304, "learning_rate": 3.539004457604739e-07, "loss": 0.0011, "step": 105510 }, { "epoch": 1.783290942430055, "grad_norm": 0.25038066506385803, "learning_rate": 3.533556583719555e-07, "loss": 0.0013, "step": 105520 }, { "epoch": 1.7834599427089055, "grad_norm": 0.011651636101305485, "learning_rate": 3.5281127526400785e-07, "loss": 0.0009, "step": 105530 }, { "epoch": 1.783628942987756, "grad_norm": 0.03919496014714241, "learning_rate": 3.522672964839957e-07, "loss": 0.0004, "step": 105540 }, { "epoch": 1.7837979432666065, "grad_norm": 0.012245673686265945, "learning_rate": 3.517237220792469e-07, "loss": 0.0005, "step": 105550 }, { "epoch": 1.7839669435454568, "grad_norm": 0.018710102885961533, "learning_rate": 3.5118055209705625e-07, "loss": 0.0002, "step": 105560 }, { "epoch": 1.7841359438243072, "grad_norm": 0.1540943682193756, "learning_rate": 3.5063778658468105e-07, "loss": 0.001, "step": 105570 }, { "epoch": 1.7843049441031578, "grad_norm": 0.018390536308288574, "learning_rate": 3.500954255893463e-07, "loss": 0.0007, "step": 105580 }, { "epoch": 1.7844739443820083, "grad_norm": 0.14161762595176697, "learning_rate": 3.495534691582375e-07, "loss": 0.0007, "step": 105590 }, { "epoch": 1.7846429446608587, "grad_norm": 0.010210787877440453, "learning_rate": 3.4901191733850983e-07, "loss": 0.0005, "step": 105600 }, { "epoch": 1.784811944939709, "grad_norm": 0.008424220606684685, "learning_rate": 3.484707701772788e-07, "loss": 0.0005, "step": 105610 }, { "epoch": 1.7849809452185597, "grad_norm": 0.08034904301166534, "learning_rate": 3.479300277216285e-07, "loss": 0.0011, "step": 105620 }, { "epoch": 1.78514994549741, "grad_norm": 0.02755795232951641, "learning_rate": 3.473896900186052e-07, "loss": 0.0004, "step": 105630 }, { "epoch": 1.7853189457762606, "grad_norm": 0.017700070515275, "learning_rate": 3.468497571152218e-07, "loss": 0.0007, "step": 105640 }, { "epoch": 1.785487946055111, "grad_norm": 0.00878935493528843, "learning_rate": 3.4631022905845413e-07, "loss": 0.0013, "step": 105650 }, { "epoch": 1.7856569463339613, "grad_norm": 0.05933452025055885, "learning_rate": 3.457711058952451e-07, "loss": 0.0007, "step": 105660 }, { "epoch": 1.785825946612812, "grad_norm": 0.00013369330554269254, "learning_rate": 3.4523238767249943e-07, "loss": 0.0008, "step": 105670 }, { "epoch": 1.7859949468916625, "grad_norm": 0.0019009606912732124, "learning_rate": 3.446940744370886e-07, "loss": 0.0007, "step": 105680 }, { "epoch": 1.7861639471705129, "grad_norm": 0.007505466230213642, "learning_rate": 3.441561662358495e-07, "loss": 0.0004, "step": 105690 }, { "epoch": 1.7863329474493632, "grad_norm": 0.02765263058245182, "learning_rate": 3.436186631155813e-07, "loss": 0.0004, "step": 105700 }, { "epoch": 1.7865019477282138, "grad_norm": 0.049307625740766525, "learning_rate": 3.430815651230507e-07, "loss": 0.0003, "step": 105710 }, { "epoch": 1.7866709480070642, "grad_norm": 0.021655231714248657, "learning_rate": 3.425448723049868e-07, "loss": 0.0004, "step": 105720 }, { "epoch": 1.7868399482859147, "grad_norm": 0.0027275453321635723, "learning_rate": 3.4200858470808564e-07, "loss": 0.0004, "step": 105730 }, { "epoch": 1.787008948564765, "grad_norm": 0.0030428189784288406, "learning_rate": 3.4147270237900543e-07, "loss": 0.0007, "step": 105740 }, { "epoch": 1.7871779488436155, "grad_norm": 0.013712714426219463, "learning_rate": 3.409372253643717e-07, "loss": 0.0005, "step": 105750 }, { "epoch": 1.787346949122466, "grad_norm": 0.012625922448933125, "learning_rate": 3.404021537107721e-07, "loss": 0.0007, "step": 105760 }, { "epoch": 1.7875159494013166, "grad_norm": 0.08924346417188644, "learning_rate": 3.398674874647623e-07, "loss": 0.0006, "step": 105770 }, { "epoch": 1.787684949680167, "grad_norm": 0.030749188736081123, "learning_rate": 3.3933322667285937e-07, "loss": 0.0003, "step": 105780 }, { "epoch": 1.7878539499590174, "grad_norm": 0.04751649126410484, "learning_rate": 3.3879937138154793e-07, "loss": 0.0015, "step": 105790 }, { "epoch": 1.788022950237868, "grad_norm": 0.020827028900384903, "learning_rate": 3.3826592163727413e-07, "loss": 0.0007, "step": 105800 }, { "epoch": 1.7881919505167183, "grad_norm": 0.039099644869565964, "learning_rate": 3.3773287748645245e-07, "loss": 0.0025, "step": 105810 }, { "epoch": 1.788360950795569, "grad_norm": 0.050598226487636566, "learning_rate": 3.372002389754597e-07, "loss": 0.0006, "step": 105820 }, { "epoch": 1.7885299510744193, "grad_norm": 0.02448190376162529, "learning_rate": 3.366680061506372e-07, "loss": 0.0012, "step": 105830 }, { "epoch": 1.7886989513532696, "grad_norm": 0.05488467589020729, "learning_rate": 3.3613617905829286e-07, "loss": 0.0003, "step": 105840 }, { "epoch": 1.7888679516321202, "grad_norm": 0.015062323771417141, "learning_rate": 3.3560475774469857e-07, "loss": 0.0002, "step": 105850 }, { "epoch": 1.7890369519109708, "grad_norm": 0.03238752484321594, "learning_rate": 3.35073742256089e-07, "loss": 0.0005, "step": 105860 }, { "epoch": 1.7892059521898211, "grad_norm": 0.021235447376966476, "learning_rate": 3.3454313263866725e-07, "loss": 0.0004, "step": 105870 }, { "epoch": 1.7893749524686715, "grad_norm": 0.016415433958172798, "learning_rate": 3.3401292893859625e-07, "loss": 0.0006, "step": 105880 }, { "epoch": 1.7895439527475219, "grad_norm": 0.0194083284586668, "learning_rate": 3.334831312020087e-07, "loss": 0.0004, "step": 105890 }, { "epoch": 1.7897129530263725, "grad_norm": 0.04980658367276192, "learning_rate": 3.3295373947499764e-07, "loss": 0.0007, "step": 105900 }, { "epoch": 1.789881953305223, "grad_norm": 0.04084394499659538, "learning_rate": 3.3242475380362404e-07, "loss": 0.0004, "step": 105910 }, { "epoch": 1.7900509535840734, "grad_norm": 0.041083622723817825, "learning_rate": 3.3189617423391163e-07, "loss": 0.0011, "step": 105920 }, { "epoch": 1.7902199538629238, "grad_norm": 0.1475612074136734, "learning_rate": 3.313680008118497e-07, "loss": 0.0007, "step": 105930 }, { "epoch": 1.7903889541417743, "grad_norm": 0.12210751324892044, "learning_rate": 3.308402335833916e-07, "loss": 0.0006, "step": 105940 }, { "epoch": 1.790557954420625, "grad_norm": 0.02229374274611473, "learning_rate": 3.3031287259445544e-07, "loss": 0.0008, "step": 105950 }, { "epoch": 1.7907269546994753, "grad_norm": 0.014257742092013359, "learning_rate": 3.297859178909252e-07, "loss": 0.0004, "step": 105960 }, { "epoch": 1.7908959549783257, "grad_norm": 0.012109160423278809, "learning_rate": 3.292593695186469e-07, "loss": 0.0003, "step": 105970 }, { "epoch": 1.791064955257176, "grad_norm": 0.0008053976343944669, "learning_rate": 3.287332275234351e-07, "loss": 0.0004, "step": 105980 }, { "epoch": 1.7912339555360266, "grad_norm": 0.06195811927318573, "learning_rate": 3.2820749195106416e-07, "loss": 0.0011, "step": 105990 }, { "epoch": 1.7914029558148772, "grad_norm": 0.024041438475251198, "learning_rate": 3.276821628472776e-07, "loss": 0.0004, "step": 106000 }, { "epoch": 1.7915719560937275, "grad_norm": 0.0122147835791111, "learning_rate": 3.2715724025777994e-07, "loss": 0.0002, "step": 106010 }, { "epoch": 1.791740956372578, "grad_norm": 0.06569260358810425, "learning_rate": 3.2663272422824297e-07, "loss": 0.0009, "step": 106020 }, { "epoch": 1.7919099566514285, "grad_norm": 0.11315417289733887, "learning_rate": 3.261086148043019e-07, "loss": 0.0006, "step": 106030 }, { "epoch": 1.792078956930279, "grad_norm": 0.0007930688443593681, "learning_rate": 3.255849120315574e-07, "loss": 0.0009, "step": 106040 }, { "epoch": 1.7922479572091294, "grad_norm": 0.05178851634263992, "learning_rate": 3.250616159555731e-07, "loss": 0.0005, "step": 106050 }, { "epoch": 1.7924169574879798, "grad_norm": 0.03907632455229759, "learning_rate": 3.245387266218797e-07, "loss": 0.0005, "step": 106060 }, { "epoch": 1.7925859577668302, "grad_norm": 0.007023259066045284, "learning_rate": 3.240162440759692e-07, "loss": 0.0003, "step": 106070 }, { "epoch": 1.7927549580456807, "grad_norm": 0.09764275699853897, "learning_rate": 3.2349416836330193e-07, "loss": 0.0005, "step": 106080 }, { "epoch": 1.7929239583245313, "grad_norm": 0.040002115070819855, "learning_rate": 3.2297249952929935e-07, "loss": 0.0005, "step": 106090 }, { "epoch": 1.7930929586033817, "grad_norm": 0.05797765776515007, "learning_rate": 3.2245123761935115e-07, "loss": 0.0005, "step": 106100 }, { "epoch": 1.793261958882232, "grad_norm": 0.02930748462677002, "learning_rate": 3.2193038267880726e-07, "loss": 0.0008, "step": 106110 }, { "epoch": 1.7934309591610826, "grad_norm": 0.02905907668173313, "learning_rate": 3.21409934752987e-07, "loss": 0.0006, "step": 106120 }, { "epoch": 1.7935999594399332, "grad_norm": 0.021418744698166847, "learning_rate": 3.2088989388716965e-07, "loss": 0.0003, "step": 106130 }, { "epoch": 1.7937689597187836, "grad_norm": 0.0026306742802262306, "learning_rate": 3.203702601266034e-07, "loss": 0.0006, "step": 106140 }, { "epoch": 1.793937959997634, "grad_norm": 0.2730619013309479, "learning_rate": 3.198510335164967e-07, "loss": 0.0005, "step": 106150 }, { "epoch": 1.7941069602764843, "grad_norm": 0.07091565430164337, "learning_rate": 3.193322141020272e-07, "loss": 0.0005, "step": 106160 }, { "epoch": 1.794275960555335, "grad_norm": 0.00048270318075083196, "learning_rate": 3.1881380192833266e-07, "loss": 0.0005, "step": 106170 }, { "epoch": 1.7944449608341855, "grad_norm": 0.010864262469112873, "learning_rate": 3.182957970405187e-07, "loss": 0.0005, "step": 106180 }, { "epoch": 1.7946139611130358, "grad_norm": 0.008652624674141407, "learning_rate": 3.1777819948365265e-07, "loss": 0.0004, "step": 106190 }, { "epoch": 1.7947829613918862, "grad_norm": 0.07487659901380539, "learning_rate": 3.1726100930277005e-07, "loss": 0.0005, "step": 106200 }, { "epoch": 1.7949519616707368, "grad_norm": 0.01952086202800274, "learning_rate": 3.1674422654286717e-07, "loss": 0.0003, "step": 106210 }, { "epoch": 1.7951209619495874, "grad_norm": 0.08285342156887054, "learning_rate": 3.162278512489081e-07, "loss": 0.0009, "step": 106220 }, { "epoch": 1.7952899622284377, "grad_norm": 0.08504673093557358, "learning_rate": 3.157118834658196e-07, "loss": 0.0003, "step": 106230 }, { "epoch": 1.795458962507288, "grad_norm": 0.01815224252641201, "learning_rate": 3.1519632323849247e-07, "loss": 0.0002, "step": 106240 }, { "epoch": 1.7956279627861385, "grad_norm": 0.01604323461651802, "learning_rate": 3.146811706117847e-07, "loss": 0.0007, "step": 106250 }, { "epoch": 1.795796963064989, "grad_norm": 0.008513304404914379, "learning_rate": 3.1416642563051493e-07, "loss": 0.0005, "step": 106260 }, { "epoch": 1.7959659633438396, "grad_norm": 0.07370144128799438, "learning_rate": 3.136520883394706e-07, "loss": 0.001, "step": 106270 }, { "epoch": 1.79613496362269, "grad_norm": 0.0054110633209347725, "learning_rate": 3.131381587833998e-07, "loss": 0.0005, "step": 106280 }, { "epoch": 1.7963039639015403, "grad_norm": 0.024059509858489037, "learning_rate": 3.12624637007018e-07, "loss": 0.0002, "step": 106290 }, { "epoch": 1.796472964180391, "grad_norm": 0.066287562251091, "learning_rate": 3.121115230550026e-07, "loss": 0.002, "step": 106300 }, { "epoch": 1.7966419644592415, "grad_norm": 0.02924017608165741, "learning_rate": 3.115988169719991e-07, "loss": 0.0006, "step": 106310 }, { "epoch": 1.7968109647380919, "grad_norm": 0.001341874129138887, "learning_rate": 3.110865188026135e-07, "loss": 0.0003, "step": 106320 }, { "epoch": 1.7969799650169422, "grad_norm": 0.1358218640089035, "learning_rate": 3.105746285914202e-07, "loss": 0.0009, "step": 106330 }, { "epoch": 1.7971489652957926, "grad_norm": 0.0023471498861908913, "learning_rate": 3.1006314638295396e-07, "loss": 0.0003, "step": 106340 }, { "epoch": 1.7973179655746432, "grad_norm": 0.012569617480039597, "learning_rate": 3.0955207222171766e-07, "loss": 0.0036, "step": 106350 }, { "epoch": 1.7974869658534938, "grad_norm": 0.01816113479435444, "learning_rate": 3.090414061521768e-07, "loss": 0.001, "step": 106360 }, { "epoch": 1.7976559661323441, "grad_norm": 0.027073049917817116, "learning_rate": 3.0853114821876193e-07, "loss": 0.0015, "step": 106370 }, { "epoch": 1.7978249664111945, "grad_norm": 0.019245292991399765, "learning_rate": 3.080212984658676e-07, "loss": 0.0004, "step": 106380 }, { "epoch": 1.797993966690045, "grad_norm": 0.02849625051021576, "learning_rate": 3.0751185693785324e-07, "loss": 0.0009, "step": 106390 }, { "epoch": 1.7981629669688957, "grad_norm": 0.013218315318226814, "learning_rate": 3.0700282367904287e-07, "loss": 0.0012, "step": 106400 }, { "epoch": 1.798331967247746, "grad_norm": 0.01673104241490364, "learning_rate": 3.0649419873372553e-07, "loss": 0.0003, "step": 106410 }, { "epoch": 1.7985009675265964, "grad_norm": 0.012420453131198883, "learning_rate": 3.0598598214615303e-07, "loss": 0.0005, "step": 106420 }, { "epoch": 1.7986699678054467, "grad_norm": 0.004321121610701084, "learning_rate": 3.0547817396054393e-07, "loss": 0.0003, "step": 106430 }, { "epoch": 1.7988389680842973, "grad_norm": 0.02657952904701233, "learning_rate": 3.0497077422107836e-07, "loss": 0.0009, "step": 106440 }, { "epoch": 1.799007968363148, "grad_norm": 0.038907743990421295, "learning_rate": 3.0446378297190325e-07, "loss": 0.0002, "step": 106450 }, { "epoch": 1.7991769686419983, "grad_norm": 0.0133865587413311, "learning_rate": 3.0395720025713007e-07, "loss": 0.0005, "step": 106460 }, { "epoch": 1.7993459689208486, "grad_norm": 0.00025637217913754284, "learning_rate": 3.034510261208329e-07, "loss": 0.0004, "step": 106470 }, { "epoch": 1.7995149691996992, "grad_norm": 0.04737827554345131, "learning_rate": 3.029452606070521e-07, "loss": 0.0009, "step": 106480 }, { "epoch": 1.7996839694785498, "grad_norm": 0.002496320754289627, "learning_rate": 3.024399037597908e-07, "loss": 0.0003, "step": 106490 }, { "epoch": 1.7998529697574002, "grad_norm": 0.052021387964487076, "learning_rate": 3.0193495562301886e-07, "loss": 0.0008, "step": 106500 }, { "epoch": 1.8000219700362505, "grad_norm": 0.03761506825685501, "learning_rate": 3.0143041624066784e-07, "loss": 0.0005, "step": 106510 }, { "epoch": 1.800190970315101, "grad_norm": 0.03732273355126381, "learning_rate": 3.0092628565663583e-07, "loss": 0.0003, "step": 106520 }, { "epoch": 1.8003599705939515, "grad_norm": 0.019498972222208977, "learning_rate": 3.0042256391478454e-07, "loss": 0.0006, "step": 106530 }, { "epoch": 1.800528970872802, "grad_norm": 0.013651588931679726, "learning_rate": 2.999192510589405e-07, "loss": 0.0003, "step": 106540 }, { "epoch": 1.8006979711516524, "grad_norm": 0.06348342448472977, "learning_rate": 2.994163471328931e-07, "loss": 0.0004, "step": 106550 }, { "epoch": 1.8008669714305028, "grad_norm": 0.0472821369767189, "learning_rate": 2.989138521803997e-07, "loss": 0.0008, "step": 106560 }, { "epoch": 1.8010359717093534, "grad_norm": 0.006114867981523275, "learning_rate": 2.9841176624517743e-07, "loss": 0.0005, "step": 106570 }, { "epoch": 1.8012049719882037, "grad_norm": 0.06382883340120316, "learning_rate": 2.979100893709108e-07, "loss": 0.0004, "step": 106580 }, { "epoch": 1.8013739722670543, "grad_norm": 0.0012848296901211143, "learning_rate": 2.9740882160124995e-07, "loss": 0.0005, "step": 106590 }, { "epoch": 1.8015429725459047, "grad_norm": 0.0248939897865057, "learning_rate": 2.9690796297980493e-07, "loss": 0.0006, "step": 106600 }, { "epoch": 1.801711972824755, "grad_norm": 0.031102487817406654, "learning_rate": 2.964075135501543e-07, "loss": 0.0006, "step": 106610 }, { "epoch": 1.8018809731036056, "grad_norm": 0.026568105444312096, "learning_rate": 2.9590747335584034e-07, "loss": 0.0002, "step": 106620 }, { "epoch": 1.8020499733824562, "grad_norm": 0.026500539854168892, "learning_rate": 2.954078424403672e-07, "loss": 0.0009, "step": 106630 }, { "epoch": 1.8022189736613066, "grad_norm": 0.0744604840874672, "learning_rate": 2.9490862084720686e-07, "loss": 0.0009, "step": 106640 }, { "epoch": 1.802387973940157, "grad_norm": 0.005288366694003344, "learning_rate": 2.9440980861979216e-07, "loss": 0.0006, "step": 106650 }, { "epoch": 1.8025569742190075, "grad_norm": 0.072980135679245, "learning_rate": 2.939114058015241e-07, "loss": 0.0004, "step": 106660 }, { "epoch": 1.8027259744978579, "grad_norm": 0.03152094781398773, "learning_rate": 2.934134124357646e-07, "loss": 0.0007, "step": 106670 }, { "epoch": 1.8028949747767085, "grad_norm": 0.04416177421808243, "learning_rate": 2.929158285658429e-07, "loss": 0.0006, "step": 106680 }, { "epoch": 1.8030639750555588, "grad_norm": 0.00499266292899847, "learning_rate": 2.924186542350499e-07, "loss": 0.0005, "step": 106690 }, { "epoch": 1.8032329753344092, "grad_norm": 0.07045271247625351, "learning_rate": 2.9192188948664267e-07, "loss": 0.0011, "step": 106700 }, { "epoch": 1.8034019756132598, "grad_norm": 0.01703091524541378, "learning_rate": 2.914255343638428e-07, "loss": 0.0006, "step": 106710 }, { "epoch": 1.8035709758921104, "grad_norm": 0.05974980443716049, "learning_rate": 2.9092958890983405e-07, "loss": 0.0007, "step": 106720 }, { "epoch": 1.8037399761709607, "grad_norm": 0.011746728792786598, "learning_rate": 2.90434053167768e-07, "loss": 0.0007, "step": 106730 }, { "epoch": 1.803908976449811, "grad_norm": 0.022092394530773163, "learning_rate": 2.8993892718075735e-07, "loss": 0.0003, "step": 106740 }, { "epoch": 1.8040779767286614, "grad_norm": 0.04053737595677376, "learning_rate": 2.8944421099188104e-07, "loss": 0.0006, "step": 106750 }, { "epoch": 1.804246977007512, "grad_norm": 0.0704391822218895, "learning_rate": 2.889499046441807e-07, "loss": 0.002, "step": 106760 }, { "epoch": 1.8044159772863626, "grad_norm": 0.06557278335094452, "learning_rate": 2.884560081806653e-07, "loss": 0.0006, "step": 106770 }, { "epoch": 1.804584977565213, "grad_norm": 0.05407819151878357, "learning_rate": 2.879625216443044e-07, "loss": 0.0004, "step": 106780 }, { "epoch": 1.8047539778440633, "grad_norm": 0.05492717772722244, "learning_rate": 2.874694450780341e-07, "loss": 0.0009, "step": 106790 }, { "epoch": 1.804922978122914, "grad_norm": 0.028640110045671463, "learning_rate": 2.8697677852475513e-07, "loss": 0.0005, "step": 106800 }, { "epoch": 1.8050919784017645, "grad_norm": 0.0022897396702319384, "learning_rate": 2.864845220273327e-07, "loss": 0.0008, "step": 106810 }, { "epoch": 1.8052609786806149, "grad_norm": 0.0251806378364563, "learning_rate": 2.85992675628593e-07, "loss": 0.001, "step": 106820 }, { "epoch": 1.8054299789594652, "grad_norm": 7.517525227740407e-05, "learning_rate": 2.8550123937133136e-07, "loss": 0.0002, "step": 106830 }, { "epoch": 1.8055989792383156, "grad_norm": 0.08418340981006622, "learning_rate": 2.8501021329830347e-07, "loss": 0.0006, "step": 106840 }, { "epoch": 1.8057679795171662, "grad_norm": 0.003870246931910515, "learning_rate": 2.8451959745223187e-07, "loss": 0.0009, "step": 106850 }, { "epoch": 1.8059369797960168, "grad_norm": 0.008406764827668667, "learning_rate": 2.8402939187580247e-07, "loss": 0.0002, "step": 106860 }, { "epoch": 1.8061059800748671, "grad_norm": 0.04729627072811127, "learning_rate": 2.8353959661166555e-07, "loss": 0.0004, "step": 106870 }, { "epoch": 1.8062749803537175, "grad_norm": 6.623937952099368e-05, "learning_rate": 2.8305021170243474e-07, "loss": 0.0007, "step": 106880 }, { "epoch": 1.806443980632568, "grad_norm": 0.04600764438509941, "learning_rate": 2.825612371906905e-07, "loss": 0.0004, "step": 106890 }, { "epoch": 1.8066129809114186, "grad_norm": 0.09864575415849686, "learning_rate": 2.8207267311897435e-07, "loss": 0.001, "step": 106900 }, { "epoch": 1.806781981190269, "grad_norm": 0.04171300306916237, "learning_rate": 2.815845195297956e-07, "loss": 0.0008, "step": 106910 }, { "epoch": 1.8069509814691194, "grad_norm": 0.010658209212124348, "learning_rate": 2.810967764656242e-07, "loss": 0.0006, "step": 106920 }, { "epoch": 1.8071199817479697, "grad_norm": 0.037350140511989594, "learning_rate": 2.8060944396889723e-07, "loss": 0.0013, "step": 106930 }, { "epoch": 1.8072889820268203, "grad_norm": 0.03345376253128052, "learning_rate": 2.8012252208201363e-07, "loss": 0.0007, "step": 106940 }, { "epoch": 1.807457982305671, "grad_norm": 0.045432355254888535, "learning_rate": 2.796360108473395e-07, "loss": 0.001, "step": 106950 }, { "epoch": 1.8076269825845213, "grad_norm": 0.17052718997001648, "learning_rate": 2.791499103072037e-07, "loss": 0.0007, "step": 106960 }, { "epoch": 1.8077959828633716, "grad_norm": 0.02307099848985672, "learning_rate": 2.786642205038981e-07, "loss": 0.0014, "step": 106970 }, { "epoch": 1.8079649831422222, "grad_norm": 0.038824472576379776, "learning_rate": 2.781789414796804e-07, "loss": 0.0008, "step": 106980 }, { "epoch": 1.8081339834210728, "grad_norm": 0.2283657193183899, "learning_rate": 2.7769407327677246e-07, "loss": 0.0006, "step": 106990 }, { "epoch": 1.8083029836999231, "grad_norm": 0.006276116240769625, "learning_rate": 2.772096159373616e-07, "loss": 0.0004, "step": 107000 }, { "epoch": 1.8084719839787735, "grad_norm": 0.04557156190276146, "learning_rate": 2.767255695035953e-07, "loss": 0.0005, "step": 107010 }, { "epoch": 1.8086409842576239, "grad_norm": 0.006434998009353876, "learning_rate": 2.7624193401759035e-07, "loss": 0.0004, "step": 107020 }, { "epoch": 1.8088099845364745, "grad_norm": 0.03410731256008148, "learning_rate": 2.7575870952142316e-07, "loss": 0.0003, "step": 107030 }, { "epoch": 1.808978984815325, "grad_norm": 0.047531042248010635, "learning_rate": 2.7527589605713836e-07, "loss": 0.0005, "step": 107040 }, { "epoch": 1.8091479850941754, "grad_norm": 0.026933960616588593, "learning_rate": 2.7479349366674236e-07, "loss": 0.0008, "step": 107050 }, { "epoch": 1.8093169853730258, "grad_norm": 0.05856289342045784, "learning_rate": 2.743115023922066e-07, "loss": 0.0006, "step": 107060 }, { "epoch": 1.8094859856518763, "grad_norm": 0.06930646300315857, "learning_rate": 2.7382992227546644e-07, "loss": 0.0009, "step": 107070 }, { "epoch": 1.809654985930727, "grad_norm": 0.037896398454904556, "learning_rate": 2.733487533584223e-07, "loss": 0.0007, "step": 107080 }, { "epoch": 1.8098239862095773, "grad_norm": 0.04459800943732262, "learning_rate": 2.728679956829372e-07, "loss": 0.0004, "step": 107090 }, { "epoch": 1.8099929864884277, "grad_norm": 0.032940663397312164, "learning_rate": 2.723876492908406e-07, "loss": 0.0016, "step": 107100 }, { "epoch": 1.810161986767278, "grad_norm": 0.0005108294426463544, "learning_rate": 2.719077142239235e-07, "loss": 0.0004, "step": 107110 }, { "epoch": 1.8103309870461286, "grad_norm": 0.07016304135322571, "learning_rate": 2.714281905239441e-07, "loss": 0.0005, "step": 107120 }, { "epoch": 1.8104999873249792, "grad_norm": 0.022397033870220184, "learning_rate": 2.7094907823262184e-07, "loss": 0.0009, "step": 107130 }, { "epoch": 1.8106689876038295, "grad_norm": 0.01936495676636696, "learning_rate": 2.7047037739164337e-07, "loss": 0.0004, "step": 107140 }, { "epoch": 1.81083798788268, "grad_norm": 0.03775428980588913, "learning_rate": 2.6999208804265653e-07, "loss": 0.0007, "step": 107150 }, { "epoch": 1.8110069881615305, "grad_norm": 0.0064462157897651196, "learning_rate": 2.695142102272752e-07, "loss": 0.0009, "step": 107160 }, { "epoch": 1.811175988440381, "grad_norm": 0.0017412303714081645, "learning_rate": 2.690367439870778e-07, "loss": 0.0004, "step": 107170 }, { "epoch": 1.8113449887192314, "grad_norm": 0.036754146218299866, "learning_rate": 2.685596893636061e-07, "loss": 0.0003, "step": 107180 }, { "epoch": 1.8115139889980818, "grad_norm": 0.0605829693377018, "learning_rate": 2.6808304639836466e-07, "loss": 0.0007, "step": 107190 }, { "epoch": 1.8116829892769322, "grad_norm": 0.0008872547186911106, "learning_rate": 2.676068151328265e-07, "loss": 0.0004, "step": 107200 }, { "epoch": 1.8118519895557827, "grad_norm": 0.015495926141738892, "learning_rate": 2.671309956084228e-07, "loss": 0.0004, "step": 107210 }, { "epoch": 1.8120209898346333, "grad_norm": 0.0967402532696724, "learning_rate": 2.666555878665544e-07, "loss": 0.0004, "step": 107220 }, { "epoch": 1.8121899901134837, "grad_norm": 0.0003819911216851324, "learning_rate": 2.661805919485838e-07, "loss": 0.0003, "step": 107230 }, { "epoch": 1.812358990392334, "grad_norm": 0.011226209811866283, "learning_rate": 2.657060078958368e-07, "loss": 0.0006, "step": 107240 }, { "epoch": 1.8125279906711846, "grad_norm": 0.11130131781101227, "learning_rate": 2.652318357496064e-07, "loss": 0.0007, "step": 107250 }, { "epoch": 1.8126969909500352, "grad_norm": 0.11554577201604843, "learning_rate": 2.6475807555114586e-07, "loss": 0.0007, "step": 107260 }, { "epoch": 1.8128659912288856, "grad_norm": 0.028465701267123222, "learning_rate": 2.6428472734167597e-07, "loss": 0.0003, "step": 107270 }, { "epoch": 1.813034991507736, "grad_norm": 0.014900845475494862, "learning_rate": 2.638117911623794e-07, "loss": 0.0003, "step": 107280 }, { "epoch": 1.8132039917865863, "grad_norm": 0.12082474678754807, "learning_rate": 2.6333926705440494e-07, "loss": 0.0008, "step": 107290 }, { "epoch": 1.813372992065437, "grad_norm": 0.10809744894504547, "learning_rate": 2.628671550588635e-07, "loss": 0.0007, "step": 107300 }, { "epoch": 1.8135419923442875, "grad_norm": 0.016735605895519257, "learning_rate": 2.6239545521683175e-07, "loss": 0.0011, "step": 107310 }, { "epoch": 1.8137109926231378, "grad_norm": 0.004057494457811117, "learning_rate": 2.619241675693496e-07, "loss": 0.0004, "step": 107320 }, { "epoch": 1.8138799929019882, "grad_norm": 0.2128661423921585, "learning_rate": 2.614532921574214e-07, "loss": 0.0014, "step": 107330 }, { "epoch": 1.8140489931808388, "grad_norm": 0.004425691440701485, "learning_rate": 2.609828290220151e-07, "loss": 0.0011, "step": 107340 }, { "epoch": 1.8142179934596894, "grad_norm": 0.0008100151899270713, "learning_rate": 2.6051277820406396e-07, "loss": 0.0008, "step": 107350 }, { "epoch": 1.8143869937385397, "grad_norm": 0.012658101506531239, "learning_rate": 2.600431397444647e-07, "loss": 0.0007, "step": 107360 }, { "epoch": 1.81455599401739, "grad_norm": 0.023131320253014565, "learning_rate": 2.5957391368407746e-07, "loss": 0.0003, "step": 107370 }, { "epoch": 1.8147249942962405, "grad_norm": 0.10059446096420288, "learning_rate": 2.591051000637279e-07, "loss": 0.0007, "step": 107380 }, { "epoch": 1.814893994575091, "grad_norm": 0.02371380105614662, "learning_rate": 2.586366989242051e-07, "loss": 0.0008, "step": 107390 }, { "epoch": 1.8150629948539416, "grad_norm": 0.005978689529001713, "learning_rate": 2.5816871030626135e-07, "loss": 0.0016, "step": 107400 }, { "epoch": 1.815231995132792, "grad_norm": 0.030084148049354553, "learning_rate": 2.5770113425061526e-07, "loss": 0.0007, "step": 107410 }, { "epoch": 1.8154009954116423, "grad_norm": 0.030426248908042908, "learning_rate": 2.572339707979465e-07, "loss": 0.0006, "step": 107420 }, { "epoch": 1.815569995690493, "grad_norm": 0.016602622345089912, "learning_rate": 2.567672199889026e-07, "loss": 0.0005, "step": 107430 }, { "epoch": 1.8157389959693433, "grad_norm": 0.009840616956353188, "learning_rate": 2.563008818640911e-07, "loss": 0.0003, "step": 107440 }, { "epoch": 1.8159079962481939, "grad_norm": 0.04440809041261673, "learning_rate": 2.558349564640872e-07, "loss": 0.0006, "step": 107450 }, { "epoch": 1.8160769965270442, "grad_norm": 0.026042316108942032, "learning_rate": 2.55369443829428e-07, "loss": 0.0006, "step": 107460 }, { "epoch": 1.8162459968058946, "grad_norm": 0.017470931634306908, "learning_rate": 2.549043440006149e-07, "loss": 0.0004, "step": 107470 }, { "epoch": 1.8164149970847452, "grad_norm": 0.014459982514381409, "learning_rate": 2.5443965701811514e-07, "loss": 0.0006, "step": 107480 }, { "epoch": 1.8165839973635958, "grad_norm": 0.018677975982427597, "learning_rate": 2.5397538292235734e-07, "loss": 0.0004, "step": 107490 }, { "epoch": 1.8167529976424461, "grad_norm": 0.007446676958352327, "learning_rate": 2.5351152175373704e-07, "loss": 0.0016, "step": 107500 }, { "epoch": 1.8169219979212965, "grad_norm": 0.025096848607063293, "learning_rate": 2.5304807355261086e-07, "loss": 0.0006, "step": 107510 }, { "epoch": 1.817090998200147, "grad_norm": 0.009793787263333797, "learning_rate": 2.525850383593026e-07, "loss": 0.0008, "step": 107520 }, { "epoch": 1.8172599984789974, "grad_norm": 0.01451643742620945, "learning_rate": 2.521224162140967e-07, "loss": 0.001, "step": 107530 }, { "epoch": 1.817428998757848, "grad_norm": 0.09454888105392456, "learning_rate": 2.516602071572449e-07, "loss": 0.0009, "step": 107540 }, { "epoch": 1.8175979990366984, "grad_norm": 0.021753238514065742, "learning_rate": 2.511984112289617e-07, "loss": 0.0011, "step": 107550 }, { "epoch": 1.8177669993155487, "grad_norm": 0.0140855573117733, "learning_rate": 2.507370284694249e-07, "loss": 0.0006, "step": 107560 }, { "epoch": 1.8179359995943993, "grad_norm": 0.0358390286564827, "learning_rate": 2.5027605891877747e-07, "loss": 0.0006, "step": 107570 }, { "epoch": 1.81810499987325, "grad_norm": 0.07012967020273209, "learning_rate": 2.4981550261712617e-07, "loss": 0.0007, "step": 107580 }, { "epoch": 1.8182740001521003, "grad_norm": 0.02137385495007038, "learning_rate": 2.493553596045406e-07, "loss": 0.0006, "step": 107590 }, { "epoch": 1.8184430004309506, "grad_norm": 0.009358204901218414, "learning_rate": 2.488956299210571e-07, "loss": 0.0005, "step": 107600 }, { "epoch": 1.8186120007098012, "grad_norm": 0.03795533999800682, "learning_rate": 2.484363136066725e-07, "loss": 0.0011, "step": 107610 }, { "epoch": 1.8187810009886516, "grad_norm": 0.023416364565491676, "learning_rate": 2.4797741070135107e-07, "loss": 0.0004, "step": 107620 }, { "epoch": 1.8189500012675022, "grad_norm": 0.011213819496333599, "learning_rate": 2.475189212450185e-07, "loss": 0.0006, "step": 107630 }, { "epoch": 1.8191190015463525, "grad_norm": 0.08420540392398834, "learning_rate": 2.470608452775669e-07, "loss": 0.0024, "step": 107640 }, { "epoch": 1.819288001825203, "grad_norm": 0.0453086718916893, "learning_rate": 2.4660318283884934e-07, "loss": 0.0008, "step": 107650 }, { "epoch": 1.8194570021040535, "grad_norm": 0.006805508863180876, "learning_rate": 2.4614593396868613e-07, "loss": 0.0003, "step": 107660 }, { "epoch": 1.819626002382904, "grad_norm": 0.10280157625675201, "learning_rate": 2.456890987068594e-07, "loss": 0.0008, "step": 107670 }, { "epoch": 1.8197950026617544, "grad_norm": 0.004269697703421116, "learning_rate": 2.452326770931168e-07, "loss": 0.0004, "step": 107680 }, { "epoch": 1.8199640029406048, "grad_norm": 0.01963418908417225, "learning_rate": 2.447766691671677e-07, "loss": 0.0008, "step": 107690 }, { "epoch": 1.8201330032194551, "grad_norm": 0.04317108541727066, "learning_rate": 2.4432107496868915e-07, "loss": 0.0005, "step": 107700 }, { "epoch": 1.8203020034983057, "grad_norm": 0.0014225431950762868, "learning_rate": 2.438658945373179e-07, "loss": 0.0008, "step": 107710 }, { "epoch": 1.8204710037771563, "grad_norm": 0.006587664596736431, "learning_rate": 2.4341112791265777e-07, "loss": 0.0008, "step": 107720 }, { "epoch": 1.8206400040560067, "grad_norm": 0.03664252161979675, "learning_rate": 2.4295677513427653e-07, "loss": 0.0004, "step": 107730 }, { "epoch": 1.820809004334857, "grad_norm": 0.003876063507050276, "learning_rate": 2.425028362417031e-07, "loss": 0.0005, "step": 107740 }, { "epoch": 1.8209780046137076, "grad_norm": 0.07400530576705933, "learning_rate": 2.420493112744343e-07, "loss": 0.001, "step": 107750 }, { "epoch": 1.8211470048925582, "grad_norm": 0.019484998658299446, "learning_rate": 2.41596200271928e-07, "loss": 0.0008, "step": 107760 }, { "epoch": 1.8213160051714086, "grad_norm": 0.0583035871386528, "learning_rate": 2.411435032736076e-07, "loss": 0.0006, "step": 107770 }, { "epoch": 1.821485005450259, "grad_norm": 0.030501442030072212, "learning_rate": 2.4069122031885883e-07, "loss": 0.0005, "step": 107780 }, { "epoch": 1.8216540057291093, "grad_norm": 0.016799982637166977, "learning_rate": 2.402393514470347e-07, "loss": 0.0006, "step": 107790 }, { "epoch": 1.8218230060079599, "grad_norm": 0.028180431574583054, "learning_rate": 2.397878966974471e-07, "loss": 0.0002, "step": 107800 }, { "epoch": 1.8219920062868105, "grad_norm": 0.03444041684269905, "learning_rate": 2.393368561093773e-07, "loss": 0.0006, "step": 107810 }, { "epoch": 1.8221610065656608, "grad_norm": 0.009577547200024128, "learning_rate": 2.3888622972206623e-07, "loss": 0.0004, "step": 107820 }, { "epoch": 1.8223300068445112, "grad_norm": 0.007195120677351952, "learning_rate": 2.3843601757472193e-07, "loss": 0.0002, "step": 107830 }, { "epoch": 1.8224990071233618, "grad_norm": 0.0045670741237699986, "learning_rate": 2.3798621970651415e-07, "loss": 0.0005, "step": 107840 }, { "epoch": 1.8226680074022124, "grad_norm": 0.07623469084501266, "learning_rate": 2.3753683615657775e-07, "loss": 0.0007, "step": 107850 }, { "epoch": 1.8228370076810627, "grad_norm": 0.0028391906525939703, "learning_rate": 2.3708786696401087e-07, "loss": 0.0006, "step": 107860 }, { "epoch": 1.823006007959913, "grad_norm": 0.04393308609724045, "learning_rate": 2.3663931216787727e-07, "loss": 0.0009, "step": 107870 }, { "epoch": 1.8231750082387634, "grad_norm": 0.003135798731818795, "learning_rate": 2.3619117180720187e-07, "loss": 0.0014, "step": 107880 }, { "epoch": 1.823344008517614, "grad_norm": 0.015100251883268356, "learning_rate": 2.357434459209762e-07, "loss": 0.0007, "step": 107890 }, { "epoch": 1.8235130087964646, "grad_norm": 0.07303640991449356, "learning_rate": 2.3529613454815303e-07, "loss": 0.0006, "step": 107900 }, { "epoch": 1.823682009075315, "grad_norm": 0.0011348397238180041, "learning_rate": 2.348492377276529e-07, "loss": 0.0006, "step": 107910 }, { "epoch": 1.8238510093541653, "grad_norm": 0.01658669300377369, "learning_rate": 2.3440275549835633e-07, "loss": 0.0004, "step": 107920 }, { "epoch": 1.824020009633016, "grad_norm": 0.01174141000956297, "learning_rate": 2.3395668789910897e-07, "loss": 0.0013, "step": 107930 }, { "epoch": 1.8241890099118665, "grad_norm": 0.054488569498062134, "learning_rate": 2.3351103496872251e-07, "loss": 0.0005, "step": 107940 }, { "epoch": 1.8243580101907169, "grad_norm": 0.007784908637404442, "learning_rate": 2.3306579674597096e-07, "loss": 0.0005, "step": 107950 }, { "epoch": 1.8245270104695672, "grad_norm": 0.09560495615005493, "learning_rate": 2.3262097326958998e-07, "loss": 0.001, "step": 107960 }, { "epoch": 1.8246960107484176, "grad_norm": 0.06250361353158951, "learning_rate": 2.321765645782842e-07, "loss": 0.0008, "step": 107970 }, { "epoch": 1.8248650110272682, "grad_norm": 0.0241071954369545, "learning_rate": 2.3173257071071708e-07, "loss": 0.0008, "step": 107980 }, { "epoch": 1.8250340113061188, "grad_norm": 0.09130355715751648, "learning_rate": 2.3128899170551888e-07, "loss": 0.0006, "step": 107990 }, { "epoch": 1.8252030115849691, "grad_norm": 0.01994970068335533, "learning_rate": 2.3084582760128427e-07, "loss": 0.0005, "step": 108000 }, { "epoch": 1.8253720118638195, "grad_norm": 0.02133479341864586, "learning_rate": 2.3040307843656906e-07, "loss": 0.0006, "step": 108010 }, { "epoch": 1.82554101214267, "grad_norm": 0.014401676133275032, "learning_rate": 2.299607442498958e-07, "loss": 0.0006, "step": 108020 }, { "epoch": 1.8257100124215206, "grad_norm": 0.01538936235010624, "learning_rate": 2.295188250797481e-07, "loss": 0.0006, "step": 108030 }, { "epoch": 1.825879012700371, "grad_norm": 0.01944626122713089, "learning_rate": 2.2907732096457746e-07, "loss": 0.0007, "step": 108040 }, { "epoch": 1.8260480129792214, "grad_norm": 0.020264191552996635, "learning_rate": 2.2863623194279428e-07, "loss": 0.0005, "step": 108050 }, { "epoch": 1.8262170132580717, "grad_norm": 0.07285454869270325, "learning_rate": 2.2819555805277783e-07, "loss": 0.0008, "step": 108060 }, { "epoch": 1.8263860135369223, "grad_norm": 0.04085834324359894, "learning_rate": 2.277552993328669e-07, "loss": 0.0005, "step": 108070 }, { "epoch": 1.826555013815773, "grad_norm": 0.010012646205723286, "learning_rate": 2.2731545582136804e-07, "loss": 0.0004, "step": 108080 }, { "epoch": 1.8267240140946233, "grad_norm": 0.013109447434544563, "learning_rate": 2.2687602755654736e-07, "loss": 0.0006, "step": 108090 }, { "epoch": 1.8268930143734736, "grad_norm": 0.03321567177772522, "learning_rate": 2.264370145766398e-07, "loss": 0.0015, "step": 108100 }, { "epoch": 1.8270620146523242, "grad_norm": 0.0005492489435710013, "learning_rate": 2.259984169198398e-07, "loss": 0.0004, "step": 108110 }, { "epoch": 1.8272310149311748, "grad_norm": 0.005728592164814472, "learning_rate": 2.2556023462430798e-07, "loss": 0.0005, "step": 108120 }, { "epoch": 1.8274000152100252, "grad_norm": 0.08615639805793762, "learning_rate": 2.2512246772816825e-07, "loss": 0.0009, "step": 108130 }, { "epoch": 1.8275690154888755, "grad_norm": 0.03961627185344696, "learning_rate": 2.2468511626950905e-07, "loss": 0.0003, "step": 108140 }, { "epoch": 1.8277380157677259, "grad_norm": 0.027792375534772873, "learning_rate": 2.2424818028638163e-07, "loss": 0.0005, "step": 108150 }, { "epoch": 1.8279070160465765, "grad_norm": 0.02066863141953945, "learning_rate": 2.238116598168022e-07, "loss": 0.0004, "step": 108160 }, { "epoch": 1.828076016325427, "grad_norm": 0.024340318515896797, "learning_rate": 2.2337555489874817e-07, "loss": 0.0006, "step": 108170 }, { "epoch": 1.8282450166042774, "grad_norm": 0.018854424357414246, "learning_rate": 2.2293986557016535e-07, "loss": 0.0007, "step": 108180 }, { "epoch": 1.8284140168831278, "grad_norm": 0.04398643970489502, "learning_rate": 2.225045918689589e-07, "loss": 0.0003, "step": 108190 }, { "epoch": 1.8285830171619784, "grad_norm": 0.012175377458333969, "learning_rate": 2.2206973383300136e-07, "loss": 0.0005, "step": 108200 }, { "epoch": 1.828752017440829, "grad_norm": 0.04109586402773857, "learning_rate": 2.2163529150012574e-07, "loss": 0.0007, "step": 108210 }, { "epoch": 1.8289210177196793, "grad_norm": 0.07205361127853394, "learning_rate": 2.2120126490813187e-07, "loss": 0.0008, "step": 108220 }, { "epoch": 1.8290900179985297, "grad_norm": 0.06628002971410751, "learning_rate": 2.2076765409478118e-07, "loss": 0.0008, "step": 108230 }, { "epoch": 1.82925901827738, "grad_norm": 0.00014799812925048172, "learning_rate": 2.2033445909780072e-07, "loss": 0.0005, "step": 108240 }, { "epoch": 1.8294280185562306, "grad_norm": 0.00020395268802531064, "learning_rate": 2.1990167995488087e-07, "loss": 0.0011, "step": 108250 }, { "epoch": 1.8295970188350812, "grad_norm": 0.0024706334806978703, "learning_rate": 2.1946931670367434e-07, "loss": 0.0004, "step": 108260 }, { "epoch": 1.8297660191139316, "grad_norm": 0.00913759134709835, "learning_rate": 2.1903736938179988e-07, "loss": 0.0003, "step": 108270 }, { "epoch": 1.829935019392782, "grad_norm": 0.034086182713508606, "learning_rate": 2.18605838026838e-07, "loss": 0.0006, "step": 108280 }, { "epoch": 1.8301040196716325, "grad_norm": 0.1580514907836914, "learning_rate": 2.1817472267633588e-07, "loss": 0.0012, "step": 108290 }, { "epoch": 1.830273019950483, "grad_norm": 0.0010112915188074112, "learning_rate": 2.177440233678002e-07, "loss": 0.0003, "step": 108300 }, { "epoch": 1.8304420202293334, "grad_norm": 0.0032198044937103987, "learning_rate": 2.1731374013870487e-07, "loss": 0.0005, "step": 108310 }, { "epoch": 1.8306110205081838, "grad_norm": 0.005323966033756733, "learning_rate": 2.1688387302648771e-07, "loss": 0.0006, "step": 108320 }, { "epoch": 1.8307800207870342, "grad_norm": 0.5507721304893494, "learning_rate": 2.164544220685477e-07, "loss": 0.0006, "step": 108330 }, { "epoch": 1.8309490210658848, "grad_norm": 0.009454223327338696, "learning_rate": 2.1602538730224942e-07, "loss": 0.0007, "step": 108340 }, { "epoch": 1.8311180213447353, "grad_norm": 0.05927232652902603, "learning_rate": 2.1559676876492242e-07, "loss": 0.0006, "step": 108350 }, { "epoch": 1.8312870216235857, "grad_norm": 0.04558410122990608, "learning_rate": 2.1516856649385687e-07, "loss": 0.0006, "step": 108360 }, { "epoch": 1.831456021902436, "grad_norm": 0.012260254472494125, "learning_rate": 2.1474078052630908e-07, "loss": 0.0003, "step": 108370 }, { "epoch": 1.8316250221812866, "grad_norm": 0.019253376871347427, "learning_rate": 2.1431341089949875e-07, "loss": 0.0004, "step": 108380 }, { "epoch": 1.831794022460137, "grad_norm": 0.032780036330223083, "learning_rate": 2.138864576506089e-07, "loss": 0.0005, "step": 108390 }, { "epoch": 1.8319630227389876, "grad_norm": 0.06949431449174881, "learning_rate": 2.134599208167859e-07, "loss": 0.0006, "step": 108400 }, { "epoch": 1.832132023017838, "grad_norm": 0.10431086272001266, "learning_rate": 2.1303380043514178e-07, "loss": 0.0006, "step": 108410 }, { "epoch": 1.8323010232966883, "grad_norm": 0.032952189445495605, "learning_rate": 2.1260809654275018e-07, "loss": 0.0012, "step": 108420 }, { "epoch": 1.832470023575539, "grad_norm": 0.05434500053524971, "learning_rate": 2.121828091766498e-07, "loss": 0.0007, "step": 108430 }, { "epoch": 1.8326390238543895, "grad_norm": 0.015353082679212093, "learning_rate": 2.1175793837384217e-07, "loss": 0.0004, "step": 108440 }, { "epoch": 1.8328080241332398, "grad_norm": 0.19077074527740479, "learning_rate": 2.1133348417129384e-07, "loss": 0.0011, "step": 108450 }, { "epoch": 1.8329770244120902, "grad_norm": 0.0716923251748085, "learning_rate": 2.109094466059336e-07, "loss": 0.0007, "step": 108460 }, { "epoch": 1.8331460246909408, "grad_norm": 0.008195516653358936, "learning_rate": 2.104858257146558e-07, "loss": 0.0003, "step": 108470 }, { "epoch": 1.8333150249697912, "grad_norm": 0.016290076076984406, "learning_rate": 2.10062621534316e-07, "loss": 0.0005, "step": 108480 }, { "epoch": 1.8334840252486417, "grad_norm": 0.047955095767974854, "learning_rate": 2.0963983410173583e-07, "loss": 0.0005, "step": 108490 }, { "epoch": 1.833653025527492, "grad_norm": 0.07434266060590744, "learning_rate": 2.0921746345370086e-07, "loss": 0.0011, "step": 108500 }, { "epoch": 1.8338220258063425, "grad_norm": 0.10034968703985214, "learning_rate": 2.0879550962695783e-07, "loss": 0.0008, "step": 108510 }, { "epoch": 1.833991026085193, "grad_norm": 0.030944790691137314, "learning_rate": 2.0837397265821958e-07, "loss": 0.0012, "step": 108520 }, { "epoch": 1.8341600263640436, "grad_norm": 0.04390372335910797, "learning_rate": 2.079528525841612e-07, "loss": 0.0007, "step": 108530 }, { "epoch": 1.834329026642894, "grad_norm": 0.048307739198207855, "learning_rate": 2.075321494414234e-07, "loss": 0.0011, "step": 108540 }, { "epoch": 1.8344980269217444, "grad_norm": 0.13810618221759796, "learning_rate": 2.0711186326660803e-07, "loss": 0.0015, "step": 108550 }, { "epoch": 1.834667027200595, "grad_norm": 0.00930106732994318, "learning_rate": 2.066919940962836e-07, "loss": 0.0003, "step": 108560 }, { "epoch": 1.8348360274794453, "grad_norm": 0.02009192854166031, "learning_rate": 2.0627254196697865e-07, "loss": 0.0016, "step": 108570 }, { "epoch": 1.8350050277582959, "grad_norm": 0.021137626841664314, "learning_rate": 2.0585350691518958e-07, "loss": 0.0005, "step": 108580 }, { "epoch": 1.8351740280371462, "grad_norm": 0.05029303953051567, "learning_rate": 2.054348889773733e-07, "loss": 0.0004, "step": 108590 }, { "epoch": 1.8353430283159966, "grad_norm": 0.03773169964551926, "learning_rate": 2.050166881899518e-07, "loss": 0.0004, "step": 108600 }, { "epoch": 1.8355120285948472, "grad_norm": 0.0030799570959061384, "learning_rate": 2.0459890458931043e-07, "loss": 0.0007, "step": 108610 }, { "epoch": 1.8356810288736978, "grad_norm": 0.01885673590004444, "learning_rate": 2.0418153821179954e-07, "loss": 0.0006, "step": 108620 }, { "epoch": 1.8358500291525481, "grad_norm": 0.026617132127285004, "learning_rate": 2.037645890937301e-07, "loss": 0.0007, "step": 108630 }, { "epoch": 1.8360190294313985, "grad_norm": 0.058524031192064285, "learning_rate": 2.033480572713803e-07, "loss": 0.0009, "step": 108640 }, { "epoch": 1.8361880297102489, "grad_norm": 0.010247909463942051, "learning_rate": 2.0293194278098892e-07, "loss": 0.0007, "step": 108650 }, { "epoch": 1.8363570299890994, "grad_norm": 0.07031330466270447, "learning_rate": 2.0251624565876203e-07, "loss": 0.0005, "step": 108660 }, { "epoch": 1.83652603026795, "grad_norm": 0.0027228700928390026, "learning_rate": 2.0210096594086514e-07, "loss": 0.0007, "step": 108670 }, { "epoch": 1.8366950305468004, "grad_norm": 0.00010234397632302716, "learning_rate": 2.0168610366343099e-07, "loss": 0.0005, "step": 108680 }, { "epoch": 1.8368640308256508, "grad_norm": 0.03394344076514244, "learning_rate": 2.0127165886255407e-07, "loss": 0.0012, "step": 108690 }, { "epoch": 1.8370330311045013, "grad_norm": 0.023084286600351334, "learning_rate": 2.0085763157429328e-07, "loss": 0.0011, "step": 108700 }, { "epoch": 1.837202031383352, "grad_norm": 0.04127196967601776, "learning_rate": 2.0044402183467038e-07, "loss": 0.0005, "step": 108710 }, { "epoch": 1.8373710316622023, "grad_norm": 0.007525291293859482, "learning_rate": 2.0003082967967325e-07, "loss": 0.0003, "step": 108720 }, { "epoch": 1.8375400319410526, "grad_norm": 0.0504603311419487, "learning_rate": 1.9961805514524923e-07, "loss": 0.0003, "step": 108730 }, { "epoch": 1.837709032219903, "grad_norm": 0.03207425773143768, "learning_rate": 1.9920569826731352e-07, "loss": 0.0005, "step": 108740 }, { "epoch": 1.8378780324987536, "grad_norm": 0.013932219706475735, "learning_rate": 1.987937590817418e-07, "loss": 0.0003, "step": 108750 }, { "epoch": 1.8380470327776042, "grad_norm": 0.0014516907976940274, "learning_rate": 1.9838223762437548e-07, "loss": 0.0004, "step": 108760 }, { "epoch": 1.8382160330564545, "grad_norm": 0.05958481505513191, "learning_rate": 1.979711339310192e-07, "loss": 0.0008, "step": 108770 }, { "epoch": 1.838385033335305, "grad_norm": 0.06512217223644257, "learning_rate": 1.9756044803744046e-07, "loss": 0.0008, "step": 108780 }, { "epoch": 1.8385540336141555, "grad_norm": 0.16275621950626373, "learning_rate": 1.9715017997937126e-07, "loss": 0.0007, "step": 108790 }, { "epoch": 1.838723033893006, "grad_norm": 0.029923392459750175, "learning_rate": 1.967403297925069e-07, "loss": 0.0005, "step": 108800 }, { "epoch": 1.8388920341718564, "grad_norm": 0.01783330924808979, "learning_rate": 1.963308975125061e-07, "loss": 0.0002, "step": 108810 }, { "epoch": 1.8390610344507068, "grad_norm": 0.04435092210769653, "learning_rate": 1.9592188317499094e-07, "loss": 0.0008, "step": 108820 }, { "epoch": 1.8392300347295572, "grad_norm": 0.004966772627085447, "learning_rate": 1.9551328681554904e-07, "loss": 0.0007, "step": 108830 }, { "epoch": 1.8393990350084077, "grad_norm": 0.015078980475664139, "learning_rate": 1.9510510846972863e-07, "loss": 0.001, "step": 108840 }, { "epoch": 1.8395680352872583, "grad_norm": 0.03198658302426338, "learning_rate": 1.946973481730452e-07, "loss": 0.0011, "step": 108850 }, { "epoch": 1.8397370355661087, "grad_norm": 0.0027642929926514626, "learning_rate": 1.942900059609737e-07, "loss": 0.0009, "step": 108860 }, { "epoch": 1.839906035844959, "grad_norm": 0.048655200749635696, "learning_rate": 1.9388308186895632e-07, "loss": 0.0004, "step": 108870 }, { "epoch": 1.8400750361238096, "grad_norm": 0.026829611510038376, "learning_rate": 1.9347657593239645e-07, "loss": 0.0005, "step": 108880 }, { "epoch": 1.8402440364026602, "grad_norm": 0.02060195989906788, "learning_rate": 1.9307048818666297e-07, "loss": 0.0012, "step": 108890 }, { "epoch": 1.8404130366815106, "grad_norm": 0.004234641324728727, "learning_rate": 1.9266481866708653e-07, "loss": 0.0005, "step": 108900 }, { "epoch": 1.840582036960361, "grad_norm": 0.009983054362237453, "learning_rate": 1.922595674089639e-07, "loss": 0.0007, "step": 108910 }, { "epoch": 1.8407510372392113, "grad_norm": 0.03913680464029312, "learning_rate": 1.9185473444755242e-07, "loss": 0.001, "step": 108920 }, { "epoch": 1.8409200375180619, "grad_norm": 0.022576579824090004, "learning_rate": 1.914503198180756e-07, "loss": 0.0006, "step": 108930 }, { "epoch": 1.8410890377969125, "grad_norm": 0.06822885572910309, "learning_rate": 1.910463235557186e-07, "loss": 0.0003, "step": 108940 }, { "epoch": 1.8412580380757628, "grad_norm": 0.034167319536209106, "learning_rate": 1.9064274569563168e-07, "loss": 0.0004, "step": 108950 }, { "epoch": 1.8414270383546132, "grad_norm": 0.10746181011199951, "learning_rate": 1.9023958627292672e-07, "loss": 0.0007, "step": 108960 }, { "epoch": 1.8415960386334638, "grad_norm": 0.004286276176571846, "learning_rate": 1.8983684532268287e-07, "loss": 0.0006, "step": 108970 }, { "epoch": 1.8417650389123144, "grad_norm": 0.03944242000579834, "learning_rate": 1.8943452287993823e-07, "loss": 0.0007, "step": 108980 }, { "epoch": 1.8419340391911647, "grad_norm": 0.0013683936558663845, "learning_rate": 1.890326189796987e-07, "loss": 0.0002, "step": 108990 }, { "epoch": 1.842103039470015, "grad_norm": 0.02904042787849903, "learning_rate": 1.886311336569302e-07, "loss": 0.0007, "step": 109000 }, { "epoch": 1.8422720397488654, "grad_norm": 0.018441656604409218, "learning_rate": 1.8823006694656476e-07, "loss": 0.0004, "step": 109010 }, { "epoch": 1.842441040027716, "grad_norm": 0.005052447319030762, "learning_rate": 1.878294188834978e-07, "loss": 0.0009, "step": 109020 }, { "epoch": 1.8426100403065666, "grad_norm": 0.05015619099140167, "learning_rate": 1.8742918950258594e-07, "loss": 0.0007, "step": 109030 }, { "epoch": 1.842779040585417, "grad_norm": 0.010885016061365604, "learning_rate": 1.870293788386529e-07, "loss": 0.0008, "step": 109040 }, { "epoch": 1.8429480408642673, "grad_norm": 0.10773001611232758, "learning_rate": 1.866299869264826e-07, "loss": 0.0007, "step": 109050 }, { "epoch": 1.843117041143118, "grad_norm": 0.014499770477414131, "learning_rate": 1.8623101380082554e-07, "loss": 0.0009, "step": 109060 }, { "epoch": 1.8432860414219685, "grad_norm": 0.0013905505184084177, "learning_rate": 1.8583245949639285e-07, "loss": 0.0003, "step": 109070 }, { "epoch": 1.8434550417008189, "grad_norm": 0.031105399131774902, "learning_rate": 1.854343240478612e-07, "loss": 0.0005, "step": 109080 }, { "epoch": 1.8436240419796692, "grad_norm": 0.10961327701807022, "learning_rate": 1.8503660748987073e-07, "loss": 0.0006, "step": 109090 }, { "epoch": 1.8437930422585196, "grad_norm": 0.0004014700825791806, "learning_rate": 1.8463930985702428e-07, "loss": 0.0003, "step": 109100 }, { "epoch": 1.8439620425373702, "grad_norm": 0.0050053782761096954, "learning_rate": 1.8424243118388918e-07, "loss": 0.0003, "step": 109110 }, { "epoch": 1.8441310428162208, "grad_norm": 0.03133934736251831, "learning_rate": 1.838459715049956e-07, "loss": 0.0019, "step": 109120 }, { "epoch": 1.8443000430950711, "grad_norm": 0.026860803365707397, "learning_rate": 1.8344993085483653e-07, "loss": 0.0004, "step": 109130 }, { "epoch": 1.8444690433739215, "grad_norm": 0.14718422293663025, "learning_rate": 1.8305430926787105e-07, "loss": 0.0026, "step": 109140 }, { "epoch": 1.844638043652772, "grad_norm": 0.003192188451066613, "learning_rate": 1.8265910677851884e-07, "loss": 0.0002, "step": 109150 }, { "epoch": 1.8448070439316226, "grad_norm": 0.06060683727264404, "learning_rate": 1.8226432342116517e-07, "loss": 0.0004, "step": 109160 }, { "epoch": 1.844976044210473, "grad_norm": 0.08313444256782532, "learning_rate": 1.818699592301576e-07, "loss": 0.0019, "step": 109170 }, { "epoch": 1.8451450444893234, "grad_norm": 0.03141210228204727, "learning_rate": 1.814760142398081e-07, "loss": 0.0005, "step": 109180 }, { "epoch": 1.8453140447681737, "grad_norm": 0.0628141239285469, "learning_rate": 1.8108248848439146e-07, "loss": 0.0009, "step": 109190 }, { "epoch": 1.8454830450470243, "grad_norm": 0.07199462503194809, "learning_rate": 1.80689381998147e-07, "loss": 0.0004, "step": 109200 }, { "epoch": 1.845652045325875, "grad_norm": 0.012743060477077961, "learning_rate": 1.8029669481527566e-07, "loss": 0.0008, "step": 109210 }, { "epoch": 1.8458210456047253, "grad_norm": 0.013341827318072319, "learning_rate": 1.7990442696994458e-07, "loss": 0.0003, "step": 109220 }, { "epoch": 1.8459900458835756, "grad_norm": 0.006336951162666082, "learning_rate": 1.79512578496282e-07, "loss": 0.0007, "step": 109230 }, { "epoch": 1.8461590461624262, "grad_norm": 0.023601751774549484, "learning_rate": 1.791211494283812e-07, "loss": 0.0003, "step": 109240 }, { "epoch": 1.8463280464412768, "grad_norm": 0.09381477534770966, "learning_rate": 1.7873013980029718e-07, "loss": 0.002, "step": 109250 }, { "epoch": 1.8464970467201272, "grad_norm": 0.02556357905268669, "learning_rate": 1.7833954964605103e-07, "loss": 0.0003, "step": 109260 }, { "epoch": 1.8466660469989775, "grad_norm": 0.017666513100266457, "learning_rate": 1.7794937899962505e-07, "loss": 0.0004, "step": 109270 }, { "epoch": 1.8468350472778279, "grad_norm": 0.012054857797920704, "learning_rate": 1.775596278949676e-07, "loss": 0.0013, "step": 109280 }, { "epoch": 1.8470040475566785, "grad_norm": 0.09404901415109634, "learning_rate": 1.7717029636598714e-07, "loss": 0.0011, "step": 109290 }, { "epoch": 1.847173047835529, "grad_norm": 0.024840453639626503, "learning_rate": 1.767813844465577e-07, "loss": 0.0003, "step": 109300 }, { "epoch": 1.8473420481143794, "grad_norm": 0.1091921404004097, "learning_rate": 1.763928921705177e-07, "loss": 0.0007, "step": 109310 }, { "epoch": 1.8475110483932298, "grad_norm": 0.005300764925777912, "learning_rate": 1.7600481957166627e-07, "loss": 0.0004, "step": 109320 }, { "epoch": 1.8476800486720804, "grad_norm": 0.0005665574572049081, "learning_rate": 1.7561716668376861e-07, "loss": 0.0008, "step": 109330 }, { "epoch": 1.8478490489509307, "grad_norm": 0.02141609601676464, "learning_rate": 1.752299335405522e-07, "loss": 0.0003, "step": 109340 }, { "epoch": 1.8480180492297813, "grad_norm": 0.0409809872508049, "learning_rate": 1.7484312017570837e-07, "loss": 0.0005, "step": 109350 }, { "epoch": 1.8481870495086317, "grad_norm": 0.03614840656518936, "learning_rate": 1.7445672662289082e-07, "loss": 0.0003, "step": 109360 }, { "epoch": 1.848356049787482, "grad_norm": 0.002984137972816825, "learning_rate": 1.740707529157193e-07, "loss": 0.0006, "step": 109370 }, { "epoch": 1.8485250500663326, "grad_norm": 0.0010859728790819645, "learning_rate": 1.736851990877736e-07, "loss": 0.0006, "step": 109380 }, { "epoch": 1.8486940503451832, "grad_norm": 0.010009992867708206, "learning_rate": 1.7330006517260024e-07, "loss": 0.0004, "step": 109390 }, { "epoch": 1.8488630506240336, "grad_norm": 0.058318689465522766, "learning_rate": 1.7291535120370628e-07, "loss": 0.0002, "step": 109400 }, { "epoch": 1.849032050902884, "grad_norm": 0.009888230822980404, "learning_rate": 1.7253105721456554e-07, "loss": 0.0003, "step": 109410 }, { "epoch": 1.8492010511817345, "grad_norm": 0.05609451234340668, "learning_rate": 1.7214718323861123e-07, "loss": 0.0005, "step": 109420 }, { "epoch": 1.8493700514605849, "grad_norm": 0.046090930700302124, "learning_rate": 1.7176372930924445e-07, "loss": 0.0004, "step": 109430 }, { "epoch": 1.8495390517394354, "grad_norm": 0.027034861966967583, "learning_rate": 1.7138069545982628e-07, "loss": 0.0006, "step": 109440 }, { "epoch": 1.8497080520182858, "grad_norm": 0.001320467097684741, "learning_rate": 1.7099808172368228e-07, "loss": 0.0012, "step": 109450 }, { "epoch": 1.8498770522971362, "grad_norm": 0.020823238417506218, "learning_rate": 1.7061588813410357e-07, "loss": 0.0005, "step": 109460 }, { "epoch": 1.8500460525759868, "grad_norm": 0.008123408071696758, "learning_rate": 1.7023411472434026e-07, "loss": 0.0008, "step": 109470 }, { "epoch": 1.8502150528548373, "grad_norm": 0.10001326352357864, "learning_rate": 1.6985276152760966e-07, "loss": 0.001, "step": 109480 }, { "epoch": 1.8503840531336877, "grad_norm": 0.08047106862068176, "learning_rate": 1.6947182857709242e-07, "loss": 0.0006, "step": 109490 }, { "epoch": 1.850553053412538, "grad_norm": 0.03494994714856148, "learning_rate": 1.690913159059293e-07, "loss": 0.001, "step": 109500 }, { "epoch": 1.8507220536913884, "grad_norm": 0.003561967285349965, "learning_rate": 1.687112235472288e-07, "loss": 0.0003, "step": 109510 }, { "epoch": 1.850891053970239, "grad_norm": 0.058161672204732895, "learning_rate": 1.6833155153406e-07, "loss": 0.0008, "step": 109520 }, { "epoch": 1.8510600542490896, "grad_norm": 0.00724739721044898, "learning_rate": 1.6795229989945594e-07, "loss": 0.0006, "step": 109530 }, { "epoch": 1.85122905452794, "grad_norm": 0.045328930020332336, "learning_rate": 1.675734686764141e-07, "loss": 0.0007, "step": 109540 }, { "epoch": 1.8513980548067903, "grad_norm": 0.03157823905348778, "learning_rate": 1.6719505789789315e-07, "loss": 0.0018, "step": 109550 }, { "epoch": 1.851567055085641, "grad_norm": 0.014154426753520966, "learning_rate": 1.6681706759681892e-07, "loss": 0.0004, "step": 109560 }, { "epoch": 1.8517360553644915, "grad_norm": 0.033427465707063675, "learning_rate": 1.664394978060757e-07, "loss": 0.0007, "step": 109570 }, { "epoch": 1.8519050556433418, "grad_norm": 0.015785370022058487, "learning_rate": 1.6606234855851665e-07, "loss": 0.0011, "step": 109580 }, { "epoch": 1.8520740559221922, "grad_norm": 0.0132228909060359, "learning_rate": 1.656856198869533e-07, "loss": 0.0003, "step": 109590 }, { "epoch": 1.8522430562010426, "grad_norm": 0.19660967588424683, "learning_rate": 1.6530931182416444e-07, "loss": 0.0025, "step": 109600 }, { "epoch": 1.8524120564798932, "grad_norm": 0.02984684519469738, "learning_rate": 1.649334244028894e-07, "loss": 0.0003, "step": 109610 }, { "epoch": 1.8525810567587437, "grad_norm": 0.004236331209540367, "learning_rate": 1.6455795765583372e-07, "loss": 0.0003, "step": 109620 }, { "epoch": 1.852750057037594, "grad_norm": 0.06756148487329483, "learning_rate": 1.6418291161566347e-07, "loss": 0.0005, "step": 109630 }, { "epoch": 1.8529190573164445, "grad_norm": 0.04586157202720642, "learning_rate": 1.6380828631501035e-07, "loss": 0.0007, "step": 109640 }, { "epoch": 1.853088057595295, "grad_norm": 0.003409165423363447, "learning_rate": 1.6343408178646714e-07, "loss": 0.0007, "step": 109650 }, { "epoch": 1.8532570578741456, "grad_norm": 0.04364131763577461, "learning_rate": 1.6306029806259338e-07, "loss": 0.0007, "step": 109660 }, { "epoch": 1.853426058152996, "grad_norm": 0.044084835797548294, "learning_rate": 1.626869351759086e-07, "loss": 0.0002, "step": 109670 }, { "epoch": 1.8535950584318464, "grad_norm": 0.01193965319544077, "learning_rate": 1.6231399315889852e-07, "loss": 0.0003, "step": 109680 }, { "epoch": 1.8537640587106967, "grad_norm": 0.010008934885263443, "learning_rate": 1.619414720440099e-07, "loss": 0.0004, "step": 109690 }, { "epoch": 1.8539330589895473, "grad_norm": 0.04185475409030914, "learning_rate": 1.6156937186365407e-07, "loss": 0.0003, "step": 109700 }, { "epoch": 1.8541020592683979, "grad_norm": 0.012754472903907299, "learning_rate": 1.6119769265020568e-07, "loss": 0.0008, "step": 109710 }, { "epoch": 1.8542710595472482, "grad_norm": 0.027824141085147858, "learning_rate": 1.6082643443600277e-07, "loss": 0.0009, "step": 109720 }, { "epoch": 1.8544400598260986, "grad_norm": 0.0018118839943781495, "learning_rate": 1.6045559725334614e-07, "loss": 0.0004, "step": 109730 }, { "epoch": 1.8546090601049492, "grad_norm": 0.011216542683541775, "learning_rate": 1.6008518113450112e-07, "loss": 0.0004, "step": 109740 }, { "epoch": 1.8547780603837998, "grad_norm": 0.0073775663040578365, "learning_rate": 1.5971518611169467e-07, "loss": 0.0004, "step": 109750 }, { "epoch": 1.8549470606626501, "grad_norm": 0.027089470997452736, "learning_rate": 1.5934561221711942e-07, "loss": 0.0004, "step": 109760 }, { "epoch": 1.8551160609415005, "grad_norm": 0.016810627654194832, "learning_rate": 1.589764594829285e-07, "loss": 0.0006, "step": 109770 }, { "epoch": 1.8552850612203509, "grad_norm": 0.0004215096414554864, "learning_rate": 1.5860772794124125e-07, "loss": 0.0005, "step": 109780 }, { "epoch": 1.8554540614992014, "grad_norm": 0.08347168564796448, "learning_rate": 1.582394176241392e-07, "loss": 0.0008, "step": 109790 }, { "epoch": 1.855623061778052, "grad_norm": 0.027736514806747437, "learning_rate": 1.5787152856366616e-07, "loss": 0.0003, "step": 109800 }, { "epoch": 1.8557920620569024, "grad_norm": 0.04129638522863388, "learning_rate": 1.57504060791831e-07, "loss": 0.0003, "step": 109810 }, { "epoch": 1.8559610623357528, "grad_norm": 0.02299007587134838, "learning_rate": 1.571370143406048e-07, "loss": 0.001, "step": 109820 }, { "epoch": 1.8561300626146033, "grad_norm": 0.07348429411649704, "learning_rate": 1.5677038924192312e-07, "loss": 0.0012, "step": 109830 }, { "epoch": 1.856299062893454, "grad_norm": 0.04478226974606514, "learning_rate": 1.5640418552768266e-07, "loss": 0.0007, "step": 109840 }, { "epoch": 1.8564680631723043, "grad_norm": 0.037637028843164444, "learning_rate": 1.560384032297463e-07, "loss": 0.0009, "step": 109850 }, { "epoch": 1.8566370634511546, "grad_norm": 0.2575816214084625, "learning_rate": 1.5567304237993797e-07, "loss": 0.0005, "step": 109860 }, { "epoch": 1.856806063730005, "grad_norm": 0.010352968238294125, "learning_rate": 1.5530810301004727e-07, "loss": 0.0011, "step": 109870 }, { "epoch": 1.8569750640088556, "grad_norm": 0.07358036190271378, "learning_rate": 1.5494358515182384e-07, "loss": 0.0008, "step": 109880 }, { "epoch": 1.8571440642877062, "grad_norm": 0.1082979217171669, "learning_rate": 1.5457948883698393e-07, "loss": 0.0009, "step": 109890 }, { "epoch": 1.8573130645665565, "grad_norm": 0.02853509411215782, "learning_rate": 1.5421581409720444e-07, "loss": 0.0007, "step": 109900 }, { "epoch": 1.857482064845407, "grad_norm": 0.006572206504642963, "learning_rate": 1.5385256096412838e-07, "loss": 0.0004, "step": 109910 }, { "epoch": 1.8576510651242575, "grad_norm": 0.008874989114701748, "learning_rate": 1.534897294693588e-07, "loss": 0.001, "step": 109920 }, { "epoch": 1.857820065403108, "grad_norm": 0.01589968614280224, "learning_rate": 1.5312731964446548e-07, "loss": 0.0003, "step": 109930 }, { "epoch": 1.8579890656819584, "grad_norm": 0.022283537313342094, "learning_rate": 1.5276533152097816e-07, "loss": 0.001, "step": 109940 }, { "epoch": 1.8581580659608088, "grad_norm": 0.011150048114359379, "learning_rate": 1.524037651303928e-07, "loss": 0.0004, "step": 109950 }, { "epoch": 1.8583270662396592, "grad_norm": 0.006013581529259682, "learning_rate": 1.5204262050416696e-07, "loss": 0.0006, "step": 109960 }, { "epoch": 1.8584960665185097, "grad_norm": 0.1214689388871193, "learning_rate": 1.5168189767372222e-07, "loss": 0.001, "step": 109970 }, { "epoch": 1.8586650667973603, "grad_norm": 0.05199957266449928, "learning_rate": 1.5132159667044233e-07, "loss": 0.0014, "step": 109980 }, { "epoch": 1.8588340670762107, "grad_norm": 0.00684434873983264, "learning_rate": 1.5096171752567667e-07, "loss": 0.0004, "step": 109990 }, { "epoch": 1.859003067355061, "grad_norm": 0.0022178571671247482, "learning_rate": 1.506022602707352e-07, "loss": 0.0005, "step": 110000 }, { "epoch": 1.8591720676339116, "grad_norm": 0.0020311574917286634, "learning_rate": 1.5024322493689348e-07, "loss": 0.0004, "step": 110010 }, { "epoch": 1.8593410679127622, "grad_norm": 0.021953493356704712, "learning_rate": 1.4988461155538813e-07, "loss": 0.0006, "step": 110020 }, { "epoch": 1.8595100681916126, "grad_norm": 0.05131346359848976, "learning_rate": 1.4952642015742091e-07, "loss": 0.0008, "step": 110030 }, { "epoch": 1.859679068470463, "grad_norm": 0.07163525372743607, "learning_rate": 1.4916865077415688e-07, "loss": 0.0004, "step": 110040 }, { "epoch": 1.8598480687493133, "grad_norm": 0.058817777782678604, "learning_rate": 1.4881130343672278e-07, "loss": 0.0009, "step": 110050 }, { "epoch": 1.8600170690281639, "grad_norm": 0.12411772459745407, "learning_rate": 1.4845437817620933e-07, "loss": 0.0005, "step": 110060 }, { "epoch": 1.8601860693070145, "grad_norm": 0.0067708902060985565, "learning_rate": 1.480978750236717e-07, "loss": 0.0007, "step": 110070 }, { "epoch": 1.8603550695858648, "grad_norm": 0.06462078541517258, "learning_rate": 1.4774179401012723e-07, "loss": 0.0005, "step": 110080 }, { "epoch": 1.8605240698647152, "grad_norm": 0.0010421044426038861, "learning_rate": 1.473861351665562e-07, "loss": 0.0015, "step": 110090 }, { "epoch": 1.8606930701435658, "grad_norm": 0.045905958861112595, "learning_rate": 1.470308985239033e-07, "loss": 0.0006, "step": 110100 }, { "epoch": 1.8608620704224164, "grad_norm": 0.02327723614871502, "learning_rate": 1.4667608411307432e-07, "loss": 0.0009, "step": 110110 }, { "epoch": 1.8610310707012667, "grad_norm": 0.06241496652364731, "learning_rate": 1.4632169196494184e-07, "loss": 0.0011, "step": 110120 }, { "epoch": 1.861200070980117, "grad_norm": 0.0030036966782063246, "learning_rate": 1.4596772211033837e-07, "loss": 0.0009, "step": 110130 }, { "epoch": 1.8613690712589674, "grad_norm": 0.020759856328368187, "learning_rate": 1.456141745800621e-07, "loss": 0.0008, "step": 110140 }, { "epoch": 1.861538071537818, "grad_norm": 0.035996537655591965, "learning_rate": 1.4526104940487173e-07, "loss": 0.0004, "step": 110150 }, { "epoch": 1.8617070718166686, "grad_norm": 0.050926003605127335, "learning_rate": 1.4490834661549212e-07, "loss": 0.001, "step": 110160 }, { "epoch": 1.861876072095519, "grad_norm": 0.0453384630382061, "learning_rate": 1.4455606624260987e-07, "loss": 0.0003, "step": 110170 }, { "epoch": 1.8620450723743693, "grad_norm": 0.021722691133618355, "learning_rate": 1.4420420831687542e-07, "loss": 0.0004, "step": 110180 }, { "epoch": 1.86221407265322, "grad_norm": 0.09107028692960739, "learning_rate": 1.4385277286890099e-07, "loss": 0.0012, "step": 110190 }, { "epoch": 1.8623830729320703, "grad_norm": 0.008340844884514809, "learning_rate": 1.435017599292643e-07, "loss": 0.0004, "step": 110200 }, { "epoch": 1.8625520732109209, "grad_norm": 0.004562507849186659, "learning_rate": 1.4315116952850482e-07, "loss": 0.0004, "step": 110210 }, { "epoch": 1.8627210734897712, "grad_norm": 0.01268729753792286, "learning_rate": 1.4280100169712486e-07, "loss": 0.0005, "step": 110220 }, { "epoch": 1.8628900737686216, "grad_norm": 0.1103207990527153, "learning_rate": 1.4245125646559277e-07, "loss": 0.0006, "step": 110230 }, { "epoch": 1.8630590740474722, "grad_norm": 0.05450146272778511, "learning_rate": 1.4210193386433535e-07, "loss": 0.0006, "step": 110240 }, { "epoch": 1.8632280743263228, "grad_norm": 0.06385930627584457, "learning_rate": 1.4175303392374719e-07, "loss": 0.0005, "step": 110250 }, { "epoch": 1.8633970746051731, "grad_norm": 0.0600234791636467, "learning_rate": 1.414045566741845e-07, "loss": 0.0005, "step": 110260 }, { "epoch": 1.8635660748840235, "grad_norm": 0.05960167944431305, "learning_rate": 1.4105650214596478e-07, "loss": 0.0007, "step": 110270 }, { "epoch": 1.863735075162874, "grad_norm": 0.06081349402666092, "learning_rate": 1.407088703693721e-07, "loss": 0.0006, "step": 110280 }, { "epoch": 1.8639040754417244, "grad_norm": 0.021502451971173286, "learning_rate": 1.4036166137465168e-07, "loss": 0.0006, "step": 110290 }, { "epoch": 1.864073075720575, "grad_norm": 0.002646598732098937, "learning_rate": 1.4001487519201162e-07, "loss": 0.0007, "step": 110300 }, { "epoch": 1.8642420759994254, "grad_norm": 0.03035661205649376, "learning_rate": 1.3966851185162556e-07, "loss": 0.0005, "step": 110310 }, { "epoch": 1.8644110762782757, "grad_norm": 0.019656464457511902, "learning_rate": 1.3932257138362658e-07, "loss": 0.001, "step": 110320 }, { "epoch": 1.8645800765571263, "grad_norm": 0.017677245661616325, "learning_rate": 1.3897705381811566e-07, "loss": 0.0004, "step": 110330 }, { "epoch": 1.864749076835977, "grad_norm": 0.08664114028215408, "learning_rate": 1.3863195918515204e-07, "loss": 0.0005, "step": 110340 }, { "epoch": 1.8649180771148273, "grad_norm": 0.02080652117729187, "learning_rate": 1.3828728751476284e-07, "loss": 0.0006, "step": 110350 }, { "epoch": 1.8650870773936776, "grad_norm": 0.026353400200605392, "learning_rate": 1.379430388369346e-07, "loss": 0.0005, "step": 110360 }, { "epoch": 1.8652560776725282, "grad_norm": 0.0249568372964859, "learning_rate": 1.375992131816195e-07, "loss": 0.0008, "step": 110370 }, { "epoch": 1.8654250779513786, "grad_norm": 0.043732523918151855, "learning_rate": 1.372558105787314e-07, "loss": 0.0004, "step": 110380 }, { "epoch": 1.8655940782302292, "grad_norm": 0.042085371911525726, "learning_rate": 1.3691283105814857e-07, "loss": 0.0005, "step": 110390 }, { "epoch": 1.8657630785090795, "grad_norm": 0.01947295106947422, "learning_rate": 1.3657027464971162e-07, "loss": 0.0005, "step": 110400 }, { "epoch": 1.8659320787879299, "grad_norm": 0.07680048793554306, "learning_rate": 1.362281413832245e-07, "loss": 0.001, "step": 110410 }, { "epoch": 1.8661010790667805, "grad_norm": 0.0022646563593298197, "learning_rate": 1.35886431288455e-07, "loss": 0.0006, "step": 110420 }, { "epoch": 1.866270079345631, "grad_norm": 0.06517709791660309, "learning_rate": 1.3554514439513222e-07, "loss": 0.0006, "step": 110430 }, { "epoch": 1.8664390796244814, "grad_norm": 0.09205411374568939, "learning_rate": 1.3520428073295122e-07, "loss": 0.0008, "step": 110440 }, { "epoch": 1.8666080799033318, "grad_norm": 0.10866151005029678, "learning_rate": 1.3486384033156886e-07, "loss": 0.001, "step": 110450 }, { "epoch": 1.8667770801821821, "grad_norm": 0.020667634904384613, "learning_rate": 1.3452382322060366e-07, "loss": 0.0005, "step": 110460 }, { "epoch": 1.8669460804610327, "grad_norm": 0.004244860261678696, "learning_rate": 1.3418422942964027e-07, "loss": 0.0008, "step": 110470 }, { "epoch": 1.8671150807398833, "grad_norm": 0.043460264801979065, "learning_rate": 1.3384505898822343e-07, "loss": 0.0008, "step": 110480 }, { "epoch": 1.8672840810187337, "grad_norm": 0.023740937933325768, "learning_rate": 1.335063119258645e-07, "loss": 0.0003, "step": 110490 }, { "epoch": 1.867453081297584, "grad_norm": 0.04052386432886124, "learning_rate": 1.3316798827203493e-07, "loss": 0.0007, "step": 110500 }, { "epoch": 1.8676220815764346, "grad_norm": 0.07000137120485306, "learning_rate": 1.328300880561706e-07, "loss": 0.0006, "step": 110510 }, { "epoch": 1.8677910818552852, "grad_norm": 0.006084958557039499, "learning_rate": 1.3249261130767022e-07, "loss": 0.0006, "step": 110520 }, { "epoch": 1.8679600821341356, "grad_norm": 0.0023774036671966314, "learning_rate": 1.321555580558964e-07, "loss": 0.0013, "step": 110530 }, { "epoch": 1.868129082412986, "grad_norm": 0.06854826956987381, "learning_rate": 1.3181892833017463e-07, "loss": 0.0005, "step": 110540 }, { "epoch": 1.8682980826918363, "grad_norm": 0.03659120947122574, "learning_rate": 1.3148272215979307e-07, "loss": 0.0006, "step": 110550 }, { "epoch": 1.8684670829706869, "grad_norm": 0.0016194320050999522, "learning_rate": 1.3114693957400283e-07, "loss": 0.0008, "step": 110560 }, { "epoch": 1.8686360832495374, "grad_norm": 0.031096505001187325, "learning_rate": 1.3081158060201883e-07, "loss": 0.0009, "step": 110570 }, { "epoch": 1.8688050835283878, "grad_norm": 0.0014152682851999998, "learning_rate": 1.3047664527301994e-07, "loss": 0.0003, "step": 110580 }, { "epoch": 1.8689740838072382, "grad_norm": 0.06311298161745071, "learning_rate": 1.3014213361614514e-07, "loss": 0.001, "step": 110590 }, { "epoch": 1.8691430840860888, "grad_norm": 0.010693120770156384, "learning_rate": 1.2980804566050054e-07, "loss": 0.0006, "step": 110600 }, { "epoch": 1.8693120843649393, "grad_norm": 0.04796507582068443, "learning_rate": 1.2947438143515234e-07, "loss": 0.0005, "step": 110610 }, { "epoch": 1.8694810846437897, "grad_norm": 0.003044690238311887, "learning_rate": 1.2914114096913122e-07, "loss": 0.0005, "step": 110620 }, { "epoch": 1.86965008492264, "grad_norm": 0.0033997853752225637, "learning_rate": 1.2880832429143008e-07, "loss": 0.0007, "step": 110630 }, { "epoch": 1.8698190852014904, "grad_norm": 0.025361163541674614, "learning_rate": 1.284759314310069e-07, "loss": 0.0006, "step": 110640 }, { "epoch": 1.869988085480341, "grad_norm": 0.016363272443413734, "learning_rate": 1.2814396241678074e-07, "loss": 0.0007, "step": 110650 }, { "epoch": 1.8701570857591916, "grad_norm": 0.07601359486579895, "learning_rate": 1.278124172776346e-07, "loss": 0.001, "step": 110660 }, { "epoch": 1.870326086038042, "grad_norm": 0.03996589034795761, "learning_rate": 1.2748129604241432e-07, "loss": 0.0005, "step": 110670 }, { "epoch": 1.8704950863168923, "grad_norm": 0.05205439031124115, "learning_rate": 1.2715059873992907e-07, "loss": 0.0008, "step": 110680 }, { "epoch": 1.870664086595743, "grad_norm": 0.08824488520622253, "learning_rate": 1.268203253989514e-07, "loss": 0.0006, "step": 110690 }, { "epoch": 1.8708330868745935, "grad_norm": 0.01283740159124136, "learning_rate": 1.2649047604821663e-07, "loss": 0.0006, "step": 110700 }, { "epoch": 1.8710020871534438, "grad_norm": 0.03213268145918846, "learning_rate": 1.2616105071642238e-07, "loss": 0.0004, "step": 110710 }, { "epoch": 1.8711710874322942, "grad_norm": 0.06657550483942032, "learning_rate": 1.258320494322318e-07, "loss": 0.0012, "step": 110720 }, { "epoch": 1.8713400877111446, "grad_norm": 0.027185996994376183, "learning_rate": 1.2550347222426873e-07, "loss": 0.0006, "step": 110730 }, { "epoch": 1.8715090879899952, "grad_norm": 0.05009664222598076, "learning_rate": 1.251753191211208e-07, "loss": 0.0007, "step": 110740 }, { "epoch": 1.8716780882688457, "grad_norm": 0.019768835976719856, "learning_rate": 1.2484759015133906e-07, "loss": 0.0006, "step": 110750 }, { "epoch": 1.871847088547696, "grad_norm": 0.000817089865449816, "learning_rate": 1.2452028534343852e-07, "loss": 0.0005, "step": 110760 }, { "epoch": 1.8720160888265465, "grad_norm": 0.003386547788977623, "learning_rate": 1.2419340472589415e-07, "loss": 0.0008, "step": 110770 }, { "epoch": 1.872185089105397, "grad_norm": 0.006252896040678024, "learning_rate": 1.238669483271482e-07, "loss": 0.0006, "step": 110780 }, { "epoch": 1.8723540893842476, "grad_norm": 0.023714499548077583, "learning_rate": 1.2354091617560292e-07, "loss": 0.0003, "step": 110790 }, { "epoch": 1.872523089663098, "grad_norm": 0.008874212391674519, "learning_rate": 1.2321530829962458e-07, "loss": 0.0006, "step": 110800 }, { "epoch": 1.8726920899419484, "grad_norm": 0.05294567719101906, "learning_rate": 1.2289012472754324e-07, "loss": 0.0003, "step": 110810 }, { "epoch": 1.8728610902207987, "grad_norm": 0.00282698730006814, "learning_rate": 1.225653654876513e-07, "loss": 0.0002, "step": 110820 }, { "epoch": 1.8730300904996493, "grad_norm": 0.03028349205851555, "learning_rate": 1.2224103060820393e-07, "loss": 0.0004, "step": 110830 }, { "epoch": 1.8731990907784999, "grad_norm": 0.030639857053756714, "learning_rate": 1.2191712011742074e-07, "loss": 0.001, "step": 110840 }, { "epoch": 1.8733680910573502, "grad_norm": 0.04265419766306877, "learning_rate": 1.2159363404348256e-07, "loss": 0.0009, "step": 110850 }, { "epoch": 1.8735370913362006, "grad_norm": 0.010115021839737892, "learning_rate": 1.2127057241453465e-07, "loss": 0.0005, "step": 110860 }, { "epoch": 1.8737060916150512, "grad_norm": 0.05838964879512787, "learning_rate": 1.2094793525868564e-07, "loss": 0.0005, "step": 110870 }, { "epoch": 1.8738750918939018, "grad_norm": 0.009496191516518593, "learning_rate": 1.2062572260400472e-07, "loss": 0.0004, "step": 110880 }, { "epoch": 1.8740440921727521, "grad_norm": 0.06020539253950119, "learning_rate": 1.2030393447852784e-07, "loss": 0.0006, "step": 110890 }, { "epoch": 1.8742130924516025, "grad_norm": 0.07069530338048935, "learning_rate": 1.199825709102509e-07, "loss": 0.0007, "step": 110900 }, { "epoch": 1.8743820927304529, "grad_norm": 0.1499357521533966, "learning_rate": 1.1966163192713488e-07, "loss": 0.0007, "step": 110910 }, { "epoch": 1.8745510930093034, "grad_norm": 0.0009730269084684551, "learning_rate": 1.1934111755710242e-07, "loss": 0.0012, "step": 110920 }, { "epoch": 1.874720093288154, "grad_norm": 0.005736366380006075, "learning_rate": 1.1902102782804015e-07, "loss": 0.0009, "step": 110930 }, { "epoch": 1.8748890935670044, "grad_norm": 0.003876183880493045, "learning_rate": 1.1870136276779742e-07, "loss": 0.0009, "step": 110940 }, { "epoch": 1.8750580938458548, "grad_norm": 0.01212595496326685, "learning_rate": 1.1838212240418645e-07, "loss": 0.0007, "step": 110950 }, { "epoch": 1.8752270941247053, "grad_norm": 0.0345769077539444, "learning_rate": 1.1806330676498279e-07, "loss": 0.0004, "step": 110960 }, { "epoch": 1.875396094403556, "grad_norm": 0.0129469595849514, "learning_rate": 1.1774491587792536e-07, "loss": 0.0008, "step": 110970 }, { "epoch": 1.8755650946824063, "grad_norm": 0.10325078666210175, "learning_rate": 1.1742694977071479e-07, "loss": 0.0003, "step": 110980 }, { "epoch": 1.8757340949612566, "grad_norm": 0.037547655403614044, "learning_rate": 1.1710940847101615e-07, "loss": 0.0017, "step": 110990 }, { "epoch": 1.875903095240107, "grad_norm": 0.04398932680487633, "learning_rate": 1.167922920064568e-07, "loss": 0.0004, "step": 111000 }, { "epoch": 1.8760720955189576, "grad_norm": 0.018700091168284416, "learning_rate": 1.1647560040462857e-07, "loss": 0.0005, "step": 111010 }, { "epoch": 1.8762410957978082, "grad_norm": 0.048042118549346924, "learning_rate": 1.1615933369308385e-07, "loss": 0.0005, "step": 111020 }, { "epoch": 1.8764100960766585, "grad_norm": 0.002763420110568404, "learning_rate": 1.1584349189934063e-07, "loss": 0.0007, "step": 111030 }, { "epoch": 1.876579096355509, "grad_norm": 0.03974473103880882, "learning_rate": 1.1552807505087694e-07, "loss": 0.0008, "step": 111040 }, { "epoch": 1.8767480966343595, "grad_norm": 0.007572009228169918, "learning_rate": 1.1521308317513691e-07, "loss": 0.0003, "step": 111050 }, { "epoch": 1.87691709691321, "grad_norm": 0.04790041968226433, "learning_rate": 1.1489851629952587e-07, "loss": 0.001, "step": 111060 }, { "epoch": 1.8770860971920604, "grad_norm": 0.0019335891120135784, "learning_rate": 1.1458437445141301e-07, "loss": 0.0003, "step": 111070 }, { "epoch": 1.8772550974709108, "grad_norm": 0.04101487621665001, "learning_rate": 1.1427065765812983e-07, "loss": 0.0008, "step": 111080 }, { "epoch": 1.8774240977497612, "grad_norm": 0.046852122992277145, "learning_rate": 1.1395736594697116e-07, "loss": 0.0005, "step": 111090 }, { "epoch": 1.8775930980286117, "grad_norm": 0.0006627286784350872, "learning_rate": 1.1364449934519572e-07, "loss": 0.0006, "step": 111100 }, { "epoch": 1.8777620983074623, "grad_norm": 0.04942318797111511, "learning_rate": 1.1333205788002289e-07, "loss": 0.0006, "step": 111110 }, { "epoch": 1.8779310985863127, "grad_norm": 0.04318777099251747, "learning_rate": 1.1302004157863811e-07, "loss": 0.0012, "step": 111120 }, { "epoch": 1.878100098865163, "grad_norm": 0.06974602490663528, "learning_rate": 1.1270845046818745e-07, "loss": 0.0015, "step": 111130 }, { "epoch": 1.8782690991440136, "grad_norm": 0.08759629726409912, "learning_rate": 1.1239728457578092e-07, "loss": 0.0005, "step": 111140 }, { "epoch": 1.878438099422864, "grad_norm": 0.018597111105918884, "learning_rate": 1.1208654392849183e-07, "loss": 0.0006, "step": 111150 }, { "epoch": 1.8786070997017146, "grad_norm": 0.04458988830447197, "learning_rate": 1.1177622855335579e-07, "loss": 0.0015, "step": 111160 }, { "epoch": 1.878776099980565, "grad_norm": 0.0010363217443227768, "learning_rate": 1.1146633847737176e-07, "loss": 0.0005, "step": 111170 }, { "epoch": 1.8789451002594153, "grad_norm": 0.016656868159770966, "learning_rate": 1.1115687372750151e-07, "loss": 0.0006, "step": 111180 }, { "epoch": 1.8791141005382659, "grad_norm": 0.034169506281614304, "learning_rate": 1.1084783433067125e-07, "loss": 0.0001, "step": 111190 }, { "epoch": 1.8792831008171165, "grad_norm": 0.00958054419606924, "learning_rate": 1.1053922031376673e-07, "loss": 0.0007, "step": 111200 }, { "epoch": 1.8794521010959668, "grad_norm": 0.03231184184551239, "learning_rate": 1.1023103170364035e-07, "loss": 0.0006, "step": 111210 }, { "epoch": 1.8796211013748172, "grad_norm": 0.04403608292341232, "learning_rate": 1.0992326852710622e-07, "loss": 0.0008, "step": 111220 }, { "epoch": 1.8797901016536678, "grad_norm": 0.05286185070872307, "learning_rate": 1.0961593081094012e-07, "loss": 0.0008, "step": 111230 }, { "epoch": 1.8799591019325181, "grad_norm": 0.014492380432784557, "learning_rate": 1.0930901858188347e-07, "loss": 0.0004, "step": 111240 }, { "epoch": 1.8801281022113687, "grad_norm": 0.0011498809326440096, "learning_rate": 1.0900253186663712e-07, "loss": 0.0003, "step": 111250 }, { "epoch": 1.880297102490219, "grad_norm": 0.010239934548735619, "learning_rate": 1.0869647069186861e-07, "loss": 0.0003, "step": 111260 }, { "epoch": 1.8804661027690694, "grad_norm": 0.08053793013095856, "learning_rate": 1.08390835084205e-07, "loss": 0.0007, "step": 111270 }, { "epoch": 1.88063510304792, "grad_norm": 0.00035482627572491765, "learning_rate": 1.0808562507024056e-07, "loss": 0.0004, "step": 111280 }, { "epoch": 1.8808041033267706, "grad_norm": 0.0689387395977974, "learning_rate": 1.0778084067652739e-07, "loss": 0.0005, "step": 111290 }, { "epoch": 1.880973103605621, "grad_norm": 0.23589535057544708, "learning_rate": 1.0747648192958482e-07, "loss": 0.0003, "step": 111300 }, { "epoch": 1.8811421038844713, "grad_norm": 0.04281485453248024, "learning_rate": 1.0717254885589334e-07, "loss": 0.0004, "step": 111310 }, { "epoch": 1.881311104163322, "grad_norm": 0.11141208559274673, "learning_rate": 1.0686904148189625e-07, "loss": 0.0009, "step": 111320 }, { "epoch": 1.8814801044421723, "grad_norm": 0.017679790034890175, "learning_rate": 1.065659598340002e-07, "loss": 0.0008, "step": 111330 }, { "epoch": 1.8816491047210229, "grad_norm": 0.021234925836324692, "learning_rate": 1.062633039385752e-07, "loss": 0.0004, "step": 111340 }, { "epoch": 1.8818181049998732, "grad_norm": 0.03269995003938675, "learning_rate": 1.0596107382195352e-07, "loss": 0.0005, "step": 111350 }, { "epoch": 1.8819871052787236, "grad_norm": 0.07181331515312195, "learning_rate": 1.0565926951043026e-07, "loss": 0.0004, "step": 111360 }, { "epoch": 1.8821561055575742, "grad_norm": 0.010781611315906048, "learning_rate": 1.0535789103026439e-07, "loss": 0.0003, "step": 111370 }, { "epoch": 1.8823251058364248, "grad_norm": 0.1765391230583191, "learning_rate": 1.0505693840767661e-07, "loss": 0.0007, "step": 111380 }, { "epoch": 1.8824941061152751, "grad_norm": 0.020220760256052017, "learning_rate": 1.0475641166885209e-07, "loss": 0.0007, "step": 111390 }, { "epoch": 1.8826631063941255, "grad_norm": 0.0009305546409450471, "learning_rate": 1.0445631083993768e-07, "loss": 0.0007, "step": 111400 }, { "epoch": 1.8828321066729758, "grad_norm": 0.005653336178511381, "learning_rate": 1.0415663594704362e-07, "loss": 0.0002, "step": 111410 }, { "epoch": 1.8830011069518264, "grad_norm": 0.028162632137537003, "learning_rate": 1.0385738701624349e-07, "loss": 0.0005, "step": 111420 }, { "epoch": 1.883170107230677, "grad_norm": 0.005178904160857201, "learning_rate": 1.0355856407357312e-07, "loss": 0.0002, "step": 111430 }, { "epoch": 1.8833391075095274, "grad_norm": 0.0075320713222026825, "learning_rate": 1.0326016714503062e-07, "loss": 0.001, "step": 111440 }, { "epoch": 1.8835081077883777, "grad_norm": 0.028461139649152756, "learning_rate": 1.0296219625657966e-07, "loss": 0.0009, "step": 111450 }, { "epoch": 1.8836771080672283, "grad_norm": 0.04403156787157059, "learning_rate": 1.0266465143414395e-07, "loss": 0.0006, "step": 111460 }, { "epoch": 1.883846108346079, "grad_norm": 0.0008579157874919474, "learning_rate": 1.0236753270361222e-07, "loss": 0.0004, "step": 111470 }, { "epoch": 1.8840151086249293, "grad_norm": 0.0005617240676656365, "learning_rate": 1.0207084009083379e-07, "loss": 0.0005, "step": 111480 }, { "epoch": 1.8841841089037796, "grad_norm": 0.02151833474636078, "learning_rate": 1.0177457362162414e-07, "loss": 0.0006, "step": 111490 }, { "epoch": 1.88435310918263, "grad_norm": 0.044212453067302704, "learning_rate": 1.0147873332175873e-07, "loss": 0.0003, "step": 111500 }, { "epoch": 1.8845221094614806, "grad_norm": 0.02930149808526039, "learning_rate": 1.0118331921697755e-07, "loss": 0.0005, "step": 111510 }, { "epoch": 1.8846911097403312, "grad_norm": 0.10000759363174438, "learning_rate": 1.0088833133298226e-07, "loss": 0.0008, "step": 111520 }, { "epoch": 1.8848601100191815, "grad_norm": 0.0430021695792675, "learning_rate": 1.005937696954401e-07, "loss": 0.0005, "step": 111530 }, { "epoch": 1.8850291102980319, "grad_norm": 0.0028830496594309807, "learning_rate": 1.0029963432997725e-07, "loss": 0.0004, "step": 111540 }, { "epoch": 1.8851981105768825, "grad_norm": 0.004205740988254547, "learning_rate": 1.0000592526218544e-07, "loss": 0.0006, "step": 111550 }, { "epoch": 1.885367110855733, "grad_norm": 0.051776617765426636, "learning_rate": 9.971264251761981e-08, "loss": 0.0008, "step": 111560 }, { "epoch": 1.8855361111345834, "grad_norm": 0.04432372376322746, "learning_rate": 9.941978612179659e-08, "loss": 0.0005, "step": 111570 }, { "epoch": 1.8857051114134338, "grad_norm": 0.03758419677615166, "learning_rate": 9.912735610019541e-08, "loss": 0.0008, "step": 111580 }, { "epoch": 1.8858741116922841, "grad_norm": 0.016436981037259102, "learning_rate": 9.883535247825982e-08, "loss": 0.0005, "step": 111590 }, { "epoch": 1.8860431119711347, "grad_norm": 0.0603216327726841, "learning_rate": 9.854377528139558e-08, "loss": 0.0009, "step": 111600 }, { "epoch": 1.8862121122499853, "grad_norm": 0.010252782143652439, "learning_rate": 9.825262453497075e-08, "loss": 0.0013, "step": 111610 }, { "epoch": 1.8863811125288357, "grad_norm": 0.13375674188137054, "learning_rate": 9.796190026431729e-08, "loss": 0.0008, "step": 111620 }, { "epoch": 1.886550112807686, "grad_norm": 0.024899881333112717, "learning_rate": 9.767160249472941e-08, "loss": 0.0003, "step": 111630 }, { "epoch": 1.8867191130865366, "grad_norm": 0.00046924164053052664, "learning_rate": 9.73817312514641e-08, "loss": 0.0003, "step": 111640 }, { "epoch": 1.8868881133653872, "grad_norm": 0.04159824922680855, "learning_rate": 9.709228655974235e-08, "loss": 0.002, "step": 111650 }, { "epoch": 1.8870571136442376, "grad_norm": 0.027793768793344498, "learning_rate": 9.680326844474675e-08, "loss": 0.0008, "step": 111660 }, { "epoch": 1.887226113923088, "grad_norm": 0.03554021567106247, "learning_rate": 9.651467693162276e-08, "loss": 0.0004, "step": 111670 }, { "epoch": 1.8873951142019383, "grad_norm": 0.028292205184698105, "learning_rate": 9.622651204548028e-08, "loss": 0.0003, "step": 111680 }, { "epoch": 1.8875641144807889, "grad_norm": 0.03103375807404518, "learning_rate": 9.593877381139039e-08, "loss": 0.0003, "step": 111690 }, { "epoch": 1.8877331147596395, "grad_norm": 0.05996498093008995, "learning_rate": 9.56514622543886e-08, "loss": 0.0007, "step": 111700 }, { "epoch": 1.8879021150384898, "grad_norm": 0.016905006021261215, "learning_rate": 9.536457739947047e-08, "loss": 0.0008, "step": 111710 }, { "epoch": 1.8880711153173402, "grad_norm": 0.004696437623351812, "learning_rate": 9.507811927159882e-08, "loss": 0.0007, "step": 111720 }, { "epoch": 1.8882401155961908, "grad_norm": 0.026309417560696602, "learning_rate": 9.479208789569484e-08, "loss": 0.0004, "step": 111730 }, { "epoch": 1.8884091158750413, "grad_norm": 0.06356547027826309, "learning_rate": 9.45064832966458e-08, "loss": 0.0004, "step": 111740 }, { "epoch": 1.8885781161538917, "grad_norm": 0.012746340595185757, "learning_rate": 9.42213054993002e-08, "loss": 0.0007, "step": 111750 }, { "epoch": 1.888747116432742, "grad_norm": 0.02876432053744793, "learning_rate": 9.393655452847039e-08, "loss": 0.0005, "step": 111760 }, { "epoch": 1.8889161167115924, "grad_norm": 0.007819334976375103, "learning_rate": 9.365223040893046e-08, "loss": 0.0003, "step": 111770 }, { "epoch": 1.889085116990443, "grad_norm": 0.01989988051354885, "learning_rate": 9.336833316541838e-08, "loss": 0.0011, "step": 111780 }, { "epoch": 1.8892541172692936, "grad_norm": 0.010571062564849854, "learning_rate": 9.30848628226344e-08, "loss": 0.0004, "step": 111790 }, { "epoch": 1.889423117548144, "grad_norm": 0.008451041765511036, "learning_rate": 9.280181940524214e-08, "loss": 0.0011, "step": 111800 }, { "epoch": 1.8895921178269943, "grad_norm": 0.001930863130837679, "learning_rate": 9.251920293786742e-08, "loss": 0.0006, "step": 111810 }, { "epoch": 1.889761118105845, "grad_norm": 0.010977152734994888, "learning_rate": 9.223701344509894e-08, "loss": 0.0008, "step": 111820 }, { "epoch": 1.8899301183846955, "grad_norm": 0.017599212005734444, "learning_rate": 9.195525095148983e-08, "loss": 0.0006, "step": 111830 }, { "epoch": 1.8900991186635459, "grad_norm": 0.06310348212718964, "learning_rate": 9.167391548155324e-08, "loss": 0.0008, "step": 111840 }, { "epoch": 1.8902681189423962, "grad_norm": 0.07708850502967834, "learning_rate": 9.139300705976739e-08, "loss": 0.0006, "step": 111850 }, { "epoch": 1.8904371192212466, "grad_norm": 0.00364290876314044, "learning_rate": 9.111252571057272e-08, "loss": 0.0004, "step": 111860 }, { "epoch": 1.8906061195000972, "grad_norm": 0.004606438800692558, "learning_rate": 9.083247145837303e-08, "loss": 0.001, "step": 111870 }, { "epoch": 1.8907751197789477, "grad_norm": 0.18687938153743744, "learning_rate": 9.05528443275333e-08, "loss": 0.0009, "step": 111880 }, { "epoch": 1.890944120057798, "grad_norm": 0.17379017174243927, "learning_rate": 9.027364434238294e-08, "loss": 0.0021, "step": 111890 }, { "epoch": 1.8911131203366485, "grad_norm": 0.00936430599540472, "learning_rate": 8.999487152721365e-08, "loss": 0.0004, "step": 111900 }, { "epoch": 1.891282120615499, "grad_norm": 0.07176022231578827, "learning_rate": 8.971652590628043e-08, "loss": 0.0007, "step": 111910 }, { "epoch": 1.8914511208943496, "grad_norm": 0.2520315945148468, "learning_rate": 8.943860750379952e-08, "loss": 0.0008, "step": 111920 }, { "epoch": 1.8916201211732, "grad_norm": 0.00437377393245697, "learning_rate": 8.916111634395264e-08, "loss": 0.001, "step": 111930 }, { "epoch": 1.8917891214520504, "grad_norm": 0.0005836533382534981, "learning_rate": 8.888405245088217e-08, "loss": 0.0006, "step": 111940 }, { "epoch": 1.8919581217309007, "grad_norm": 0.08991707861423492, "learning_rate": 8.860741584869325e-08, "loss": 0.0008, "step": 111950 }, { "epoch": 1.8921271220097513, "grad_norm": 0.053484927862882614, "learning_rate": 8.833120656145667e-08, "loss": 0.0007, "step": 111960 }, { "epoch": 1.892296122288602, "grad_norm": 0.1418548822402954, "learning_rate": 8.805542461320149e-08, "loss": 0.002, "step": 111970 }, { "epoch": 1.8924651225674523, "grad_norm": 0.028906142339110374, "learning_rate": 8.778007002792411e-08, "loss": 0.0004, "step": 111980 }, { "epoch": 1.8926341228463026, "grad_norm": 0.004131465218961239, "learning_rate": 8.750514282958089e-08, "loss": 0.0005, "step": 111990 }, { "epoch": 1.8928031231251532, "grad_norm": 0.014240585267543793, "learning_rate": 8.723064304209106e-08, "loss": 0.0006, "step": 112000 }, { "epoch": 1.8929721234040038, "grad_norm": 0.0017454311018809676, "learning_rate": 8.695657068933883e-08, "loss": 0.0005, "step": 112010 }, { "epoch": 1.8931411236828541, "grad_norm": 0.055087897926568985, "learning_rate": 8.668292579516902e-08, "loss": 0.0011, "step": 112020 }, { "epoch": 1.8933101239617045, "grad_norm": 0.0882839784026146, "learning_rate": 8.640970838339036e-08, "loss": 0.0005, "step": 112030 }, { "epoch": 1.8934791242405549, "grad_norm": 0.09185419976711273, "learning_rate": 8.613691847777384e-08, "loss": 0.0009, "step": 112040 }, { "epoch": 1.8936481245194055, "grad_norm": 0.04476842284202576, "learning_rate": 8.58645561020538e-08, "loss": 0.0009, "step": 112050 }, { "epoch": 1.893817124798256, "grad_norm": 0.05075672268867493, "learning_rate": 8.559262127992685e-08, "loss": 0.0006, "step": 112060 }, { "epoch": 1.8939861250771064, "grad_norm": 0.037351932376623154, "learning_rate": 8.532111403505238e-08, "loss": 0.0012, "step": 112070 }, { "epoch": 1.8941551253559568, "grad_norm": 0.011177490465342999, "learning_rate": 8.505003439105375e-08, "loss": 0.0005, "step": 112080 }, { "epoch": 1.8943241256348073, "grad_norm": 0.026821009814739227, "learning_rate": 8.477938237151539e-08, "loss": 0.0004, "step": 112090 }, { "epoch": 1.8944931259136577, "grad_norm": 0.05947759002447128, "learning_rate": 8.450915799998516e-08, "loss": 0.0007, "step": 112100 }, { "epoch": 1.8946621261925083, "grad_norm": 0.020872613415122032, "learning_rate": 8.423936129997479e-08, "loss": 0.0004, "step": 112110 }, { "epoch": 1.8948311264713587, "grad_norm": 0.037279363721609116, "learning_rate": 8.396999229495718e-08, "loss": 0.0006, "step": 112120 }, { "epoch": 1.895000126750209, "grad_norm": 0.1426037698984146, "learning_rate": 8.370105100836911e-08, "loss": 0.0007, "step": 112130 }, { "epoch": 1.8951691270290596, "grad_norm": 0.03487572818994522, "learning_rate": 8.343253746360968e-08, "loss": 0.0059, "step": 112140 }, { "epoch": 1.8953381273079102, "grad_norm": 0.0009698563371784985, "learning_rate": 8.31644516840402e-08, "loss": 0.0004, "step": 112150 }, { "epoch": 1.8955071275867605, "grad_norm": 0.01319518405944109, "learning_rate": 8.289679369298642e-08, "loss": 0.0008, "step": 112160 }, { "epoch": 1.895676127865611, "grad_norm": 0.0071914829313755035, "learning_rate": 8.26295635137353e-08, "loss": 0.0006, "step": 112170 }, { "epoch": 1.8958451281444615, "grad_norm": 0.011561156250536442, "learning_rate": 8.236276116953767e-08, "loss": 0.0006, "step": 112180 }, { "epoch": 1.8960141284233119, "grad_norm": 0.004148657899349928, "learning_rate": 8.209638668360609e-08, "loss": 0.0006, "step": 112190 }, { "epoch": 1.8961831287021624, "grad_norm": 0.018528766930103302, "learning_rate": 8.1830440079117e-08, "loss": 0.0003, "step": 112200 }, { "epoch": 1.8963521289810128, "grad_norm": 0.0021219479385763407, "learning_rate": 8.156492137920857e-08, "loss": 0.0006, "step": 112210 }, { "epoch": 1.8965211292598632, "grad_norm": 0.03522593155503273, "learning_rate": 8.129983060698233e-08, "loss": 0.0005, "step": 112220 }, { "epoch": 1.8966901295387137, "grad_norm": 0.01392155047506094, "learning_rate": 8.103516778550202e-08, "loss": 0.0006, "step": 112230 }, { "epoch": 1.8968591298175643, "grad_norm": 0.0011064070276916027, "learning_rate": 8.077093293779592e-08, "loss": 0.0004, "step": 112240 }, { "epoch": 1.8970281300964147, "grad_norm": 0.09907397627830505, "learning_rate": 8.05071260868523e-08, "loss": 0.0006, "step": 112250 }, { "epoch": 1.897197130375265, "grad_norm": 0.17797499895095825, "learning_rate": 8.024374725562445e-08, "loss": 0.0011, "step": 112260 }, { "epoch": 1.8973661306541156, "grad_norm": 0.026572344824671745, "learning_rate": 7.998079646702683e-08, "loss": 0.0003, "step": 112270 }, { "epoch": 1.897535130932966, "grad_norm": 0.001539503806270659, "learning_rate": 7.971827374393837e-08, "loss": 0.0007, "step": 112280 }, { "epoch": 1.8977041312118166, "grad_norm": 0.005540081299841404, "learning_rate": 7.945617910919967e-08, "loss": 0.0007, "step": 112290 }, { "epoch": 1.897873131490667, "grad_norm": 0.028968367725610733, "learning_rate": 7.919451258561361e-08, "loss": 0.0005, "step": 112300 }, { "epoch": 1.8980421317695173, "grad_norm": 0.09642549604177475, "learning_rate": 7.893327419594699e-08, "loss": 0.0006, "step": 112310 }, { "epoch": 1.8982111320483679, "grad_norm": 0.03285757452249527, "learning_rate": 7.867246396292827e-08, "loss": 0.0005, "step": 112320 }, { "epoch": 1.8983801323272185, "grad_norm": 0.03382939100265503, "learning_rate": 7.841208190924988e-08, "loss": 0.0023, "step": 112330 }, { "epoch": 1.8985491326060688, "grad_norm": 0.016068004071712494, "learning_rate": 7.815212805756589e-08, "loss": 0.0007, "step": 112340 }, { "epoch": 1.8987181328849192, "grad_norm": 0.04922647401690483, "learning_rate": 7.789260243049324e-08, "loss": 0.0009, "step": 112350 }, { "epoch": 1.8988871331637696, "grad_norm": 0.04650222882628441, "learning_rate": 7.763350505061273e-08, "loss": 0.0004, "step": 112360 }, { "epoch": 1.8990561334426201, "grad_norm": 0.05765797942876816, "learning_rate": 7.73748359404669e-08, "loss": 0.0007, "step": 112370 }, { "epoch": 1.8992251337214707, "grad_norm": 0.16425466537475586, "learning_rate": 7.711659512256109e-08, "loss": 0.0006, "step": 112380 }, { "epoch": 1.899394134000321, "grad_norm": 0.10192276537418365, "learning_rate": 7.685878261936341e-08, "loss": 0.0006, "step": 112390 }, { "epoch": 1.8995631342791715, "grad_norm": 0.042300738394260406, "learning_rate": 7.660139845330428e-08, "loss": 0.0016, "step": 112400 }, { "epoch": 1.899732134558022, "grad_norm": 0.02796986885368824, "learning_rate": 7.634444264677854e-08, "loss": 0.0008, "step": 112410 }, { "epoch": 1.8999011348368726, "grad_norm": 0.1012326180934906, "learning_rate": 7.60879152221411e-08, "loss": 0.0005, "step": 112420 }, { "epoch": 1.900070135115723, "grad_norm": 0.008573319762945175, "learning_rate": 7.583181620171298e-08, "loss": 0.0003, "step": 112430 }, { "epoch": 1.9002391353945733, "grad_norm": 0.0624210424721241, "learning_rate": 7.557614560777416e-08, "loss": 0.0007, "step": 112440 }, { "epoch": 1.9004081356734237, "grad_norm": 0.073053739964962, "learning_rate": 7.532090346257071e-08, "loss": 0.0007, "step": 112450 }, { "epoch": 1.9005771359522743, "grad_norm": 0.03362878039479256, "learning_rate": 7.506608978830876e-08, "loss": 0.0009, "step": 112460 }, { "epoch": 1.9007461362311249, "grad_norm": 0.032777752727270126, "learning_rate": 7.481170460715947e-08, "loss": 0.0007, "step": 112470 }, { "epoch": 1.9009151365099752, "grad_norm": 0.08562567830085754, "learning_rate": 7.455774794125403e-08, "loss": 0.0007, "step": 112480 }, { "epoch": 1.9010841367888256, "grad_norm": 0.048164382576942444, "learning_rate": 7.430421981268976e-08, "loss": 0.0003, "step": 112490 }, { "epoch": 1.9012531370676762, "grad_norm": 0.010092861019074917, "learning_rate": 7.405112024352346e-08, "loss": 0.0009, "step": 112500 }, { "epoch": 1.9014221373465268, "grad_norm": 0.07001367956399918, "learning_rate": 7.379844925577639e-08, "loss": 0.0012, "step": 112510 }, { "epoch": 1.9015911376253771, "grad_norm": 0.024450233206152916, "learning_rate": 7.35462068714321e-08, "loss": 0.0003, "step": 112520 }, { "epoch": 1.9017601379042275, "grad_norm": 0.014951656572520733, "learning_rate": 7.329439311243747e-08, "loss": 0.0003, "step": 112530 }, { "epoch": 1.9019291381830778, "grad_norm": 0.021235046908259392, "learning_rate": 7.304300800070053e-08, "loss": 0.0009, "step": 112540 }, { "epoch": 1.9020981384619284, "grad_norm": 0.022454794496297836, "learning_rate": 7.279205155809432e-08, "loss": 0.0006, "step": 112550 }, { "epoch": 1.902267138740779, "grad_norm": 0.04021525755524635, "learning_rate": 7.254152380645196e-08, "loss": 0.0006, "step": 112560 }, { "epoch": 1.9024361390196294, "grad_norm": 0.036809828132390976, "learning_rate": 7.229142476757101e-08, "loss": 0.0005, "step": 112570 }, { "epoch": 1.9026051392984797, "grad_norm": 0.022405659779906273, "learning_rate": 7.204175446321182e-08, "loss": 0.0005, "step": 112580 }, { "epoch": 1.9027741395773303, "grad_norm": 0.0018231738358736038, "learning_rate": 7.179251291509593e-08, "loss": 0.0004, "step": 112590 }, { "epoch": 1.902943139856181, "grad_norm": 0.00038132930058054626, "learning_rate": 7.154370014490986e-08, "loss": 0.0009, "step": 112600 }, { "epoch": 1.9031121401350313, "grad_norm": 0.032419078052043915, "learning_rate": 7.129531617430019e-08, "loss": 0.0012, "step": 112610 }, { "epoch": 1.9032811404138816, "grad_norm": 0.05176890268921852, "learning_rate": 7.104736102487852e-08, "loss": 0.0003, "step": 112620 }, { "epoch": 1.903450140692732, "grad_norm": 0.07563209533691406, "learning_rate": 7.079983471821761e-08, "loss": 0.0005, "step": 112630 }, { "epoch": 1.9036191409715826, "grad_norm": 0.009525924921035767, "learning_rate": 7.05527372758541e-08, "loss": 0.0008, "step": 112640 }, { "epoch": 1.9037881412504332, "grad_norm": 0.07802736014127731, "learning_rate": 7.030606871928525e-08, "loss": 0.0007, "step": 112650 }, { "epoch": 1.9039571415292835, "grad_norm": 0.001047362806275487, "learning_rate": 7.005982906997389e-08, "loss": 0.0006, "step": 112660 }, { "epoch": 1.9041261418081339, "grad_norm": 0.010851298458874226, "learning_rate": 6.981401834934343e-08, "loss": 0.0009, "step": 112670 }, { "epoch": 1.9042951420869845, "grad_norm": 0.05091039463877678, "learning_rate": 6.956863657878065e-08, "loss": 0.0003, "step": 112680 }, { "epoch": 1.904464142365835, "grad_norm": 0.026290442794561386, "learning_rate": 6.932368377963517e-08, "loss": 0.0011, "step": 112690 }, { "epoch": 1.9046331426446854, "grad_norm": 0.05944421887397766, "learning_rate": 6.90791599732188e-08, "loss": 0.0005, "step": 112700 }, { "epoch": 1.9048021429235358, "grad_norm": 0.009202356450259686, "learning_rate": 6.883506518080619e-08, "loss": 0.0006, "step": 112710 }, { "epoch": 1.9049711432023861, "grad_norm": 0.021600691601634026, "learning_rate": 6.85913994236348e-08, "loss": 0.0004, "step": 112720 }, { "epoch": 1.9051401434812367, "grad_norm": 0.028977418318390846, "learning_rate": 6.834816272290546e-08, "loss": 0.0006, "step": 112730 }, { "epoch": 1.9053091437600873, "grad_norm": 0.02023027464747429, "learning_rate": 6.810535509977956e-08, "loss": 0.0007, "step": 112740 }, { "epoch": 1.9054781440389377, "grad_norm": 0.022253448143601418, "learning_rate": 6.786297657538355e-08, "loss": 0.0005, "step": 112750 }, { "epoch": 1.905647144317788, "grad_norm": 0.003908324986696243, "learning_rate": 6.76210271708061e-08, "loss": 0.0006, "step": 112760 }, { "epoch": 1.9058161445966386, "grad_norm": 0.0015063261380419135, "learning_rate": 6.73795069070965e-08, "loss": 0.0005, "step": 112770 }, { "epoch": 1.9059851448754892, "grad_norm": 0.017922574654221535, "learning_rate": 6.713841580526903e-08, "loss": 0.0004, "step": 112780 }, { "epoch": 1.9061541451543396, "grad_norm": 0.07381732761859894, "learning_rate": 6.689775388629971e-08, "loss": 0.0008, "step": 112790 }, { "epoch": 1.90632314543319, "grad_norm": 0.003931921906769276, "learning_rate": 6.665752117112733e-08, "loss": 0.0006, "step": 112800 }, { "epoch": 1.9064921457120403, "grad_norm": 0.05270419269800186, "learning_rate": 6.641771768065298e-08, "loss": 0.0005, "step": 112810 }, { "epoch": 1.9066611459908909, "grad_norm": 0.030747007578611374, "learning_rate": 6.617834343574159e-08, "loss": 0.001, "step": 112820 }, { "epoch": 1.9068301462697415, "grad_norm": 0.025559140369296074, "learning_rate": 6.59393984572182e-08, "loss": 0.0005, "step": 112830 }, { "epoch": 1.9069991465485918, "grad_norm": 0.028459573164582253, "learning_rate": 6.570088276587394e-08, "loss": 0.0008, "step": 112840 }, { "epoch": 1.9071681468274422, "grad_norm": 0.00013460649643093348, "learning_rate": 6.546279638246e-08, "loss": 0.0006, "step": 112850 }, { "epoch": 1.9073371471062928, "grad_norm": 0.02334650419652462, "learning_rate": 6.522513932769093e-08, "loss": 0.0003, "step": 112860 }, { "epoch": 1.9075061473851433, "grad_norm": 0.12924088537693024, "learning_rate": 6.49879116222446e-08, "loss": 0.0005, "step": 112870 }, { "epoch": 1.9076751476639937, "grad_norm": 0.016007723286747932, "learning_rate": 6.475111328676009e-08, "loss": 0.0006, "step": 112880 }, { "epoch": 1.907844147942844, "grad_norm": 0.05216017737984657, "learning_rate": 6.45147443418409e-08, "loss": 0.0008, "step": 112890 }, { "epoch": 1.9080131482216944, "grad_norm": 0.07244875282049179, "learning_rate": 6.427880480805226e-08, "loss": 0.0004, "step": 112900 }, { "epoch": 1.908182148500545, "grad_norm": 0.005763023626059294, "learning_rate": 6.404329470592108e-08, "loss": 0.0004, "step": 112910 }, { "epoch": 1.9083511487793956, "grad_norm": 0.030477937310934067, "learning_rate": 6.380821405593929e-08, "loss": 0.0009, "step": 112920 }, { "epoch": 1.908520149058246, "grad_norm": 0.02988293394446373, "learning_rate": 6.357356287855832e-08, "loss": 0.0005, "step": 112930 }, { "epoch": 1.9086891493370963, "grad_norm": 0.04334455728530884, "learning_rate": 6.333934119419516e-08, "loss": 0.0005, "step": 112940 }, { "epoch": 1.908858149615947, "grad_norm": 0.0667472705245018, "learning_rate": 6.310554902322852e-08, "loss": 0.001, "step": 112950 }, { "epoch": 1.9090271498947975, "grad_norm": 0.06299169361591339, "learning_rate": 6.287218638599879e-08, "loss": 0.0004, "step": 112960 }, { "epoch": 1.9091961501736479, "grad_norm": 0.0009392150095663965, "learning_rate": 6.263925330280973e-08, "loss": 0.0003, "step": 112970 }, { "epoch": 1.9093651504524982, "grad_norm": 0.018651852384209633, "learning_rate": 6.240674979392736e-08, "loss": 0.0012, "step": 112980 }, { "epoch": 1.9095341507313486, "grad_norm": 0.015531927347183228, "learning_rate": 6.217467587958159e-08, "loss": 0.0005, "step": 112990 }, { "epoch": 1.9097031510101992, "grad_norm": 0.008683258667588234, "learning_rate": 6.19430315799624e-08, "loss": 0.0007, "step": 113000 }, { "epoch": 1.9098721512890497, "grad_norm": 0.004097287543118, "learning_rate": 6.171181691522587e-08, "loss": 0.0004, "step": 113010 }, { "epoch": 1.9100411515679, "grad_norm": 0.03216277435421944, "learning_rate": 6.148103190548705e-08, "loss": 0.0004, "step": 113020 }, { "epoch": 1.9102101518467505, "grad_norm": 0.020455561578273773, "learning_rate": 6.125067657082706e-08, "loss": 0.0005, "step": 113030 }, { "epoch": 1.910379152125601, "grad_norm": 0.031674377620220184, "learning_rate": 6.102075093128601e-08, "loss": 0.0005, "step": 113040 }, { "epoch": 1.9105481524044514, "grad_norm": 0.018984632566571236, "learning_rate": 6.079125500687012e-08, "loss": 0.0004, "step": 113050 }, { "epoch": 1.910717152683302, "grad_norm": 0.01681148260831833, "learning_rate": 6.05621888175456e-08, "loss": 0.0006, "step": 113060 }, { "epoch": 1.9108861529621524, "grad_norm": 0.03032616339623928, "learning_rate": 6.033355238324324e-08, "loss": 0.0003, "step": 113070 }, { "epoch": 1.9110551532410027, "grad_norm": 0.04922556132078171, "learning_rate": 6.010534572385485e-08, "loss": 0.0005, "step": 113080 }, { "epoch": 1.9112241535198533, "grad_norm": 0.11718804389238358, "learning_rate": 5.987756885923568e-08, "loss": 0.0025, "step": 113090 }, { "epoch": 1.911393153798704, "grad_norm": 0.03614810109138489, "learning_rate": 5.965022180920377e-08, "loss": 0.0008, "step": 113100 }, { "epoch": 1.9115621540775543, "grad_norm": 0.0017513089114800096, "learning_rate": 5.942330459353884e-08, "loss": 0.0007, "step": 113110 }, { "epoch": 1.9117311543564046, "grad_norm": 0.01735696755349636, "learning_rate": 5.9196817231984e-08, "loss": 0.0009, "step": 113120 }, { "epoch": 1.9119001546352552, "grad_norm": 0.032418813556432724, "learning_rate": 5.89707597442446e-08, "loss": 0.0014, "step": 113130 }, { "epoch": 1.9120691549141056, "grad_norm": 0.10654094070196152, "learning_rate": 5.8745132149989335e-08, "loss": 0.0006, "step": 113140 }, { "epoch": 1.9122381551929561, "grad_norm": 0.028356071561574936, "learning_rate": 5.851993446884807e-08, "loss": 0.0006, "step": 113150 }, { "epoch": 1.9124071554718065, "grad_norm": 0.01239448320120573, "learning_rate": 5.829516672041513e-08, "loss": 0.0004, "step": 113160 }, { "epoch": 1.9125761557506569, "grad_norm": 0.07548968493938446, "learning_rate": 5.807082892424543e-08, "loss": 0.0003, "step": 113170 }, { "epoch": 1.9127451560295075, "grad_norm": 0.0353437177836895, "learning_rate": 5.784692109985834e-08, "loss": 0.0004, "step": 113180 }, { "epoch": 1.912914156308358, "grad_norm": 0.026465613394975662, "learning_rate": 5.7623443266733856e-08, "loss": 0.0004, "step": 113190 }, { "epoch": 1.9130831565872084, "grad_norm": 0.0006597606698051095, "learning_rate": 5.7400395444316415e-08, "loss": 0.0006, "step": 113200 }, { "epoch": 1.9132521568660588, "grad_norm": 0.0004718085110653192, "learning_rate": 5.71777776520116e-08, "loss": 0.0004, "step": 113210 }, { "epoch": 1.9134211571449091, "grad_norm": 0.10736609250307083, "learning_rate": 5.695558990918892e-08, "loss": 0.0007, "step": 113220 }, { "epoch": 1.9135901574237597, "grad_norm": 0.01379178836941719, "learning_rate": 5.673383223517959e-08, "loss": 0.0004, "step": 113230 }, { "epoch": 1.9137591577026103, "grad_norm": 0.0035380241461098194, "learning_rate": 5.65125046492776e-08, "loss": 0.0008, "step": 113240 }, { "epoch": 1.9139281579814607, "grad_norm": 0.06314975768327713, "learning_rate": 5.6291607170739226e-08, "loss": 0.0007, "step": 113250 }, { "epoch": 1.914097158260311, "grad_norm": 0.015247541479766369, "learning_rate": 5.6071139818784095e-08, "loss": 0.0005, "step": 113260 }, { "epoch": 1.9142661585391616, "grad_norm": 0.022014690563082695, "learning_rate": 5.5851102612593525e-08, "loss": 0.0009, "step": 113270 }, { "epoch": 1.9144351588180122, "grad_norm": 0.008971292525529861, "learning_rate": 5.56314955713122e-08, "loss": 0.0004, "step": 113280 }, { "epoch": 1.9146041590968625, "grad_norm": 0.09483298659324646, "learning_rate": 5.541231871404651e-08, "loss": 0.0006, "step": 113290 }, { "epoch": 1.914773159375713, "grad_norm": 0.07216836512088776, "learning_rate": 5.519357205986564e-08, "loss": 0.0014, "step": 113300 }, { "epoch": 1.9149421596545633, "grad_norm": 0.04808535799384117, "learning_rate": 5.49752556278027e-08, "loss": 0.0006, "step": 113310 }, { "epoch": 1.9151111599334139, "grad_norm": 0.014555119909346104, "learning_rate": 5.475736943685195e-08, "loss": 0.0023, "step": 113320 }, { "epoch": 1.9152801602122644, "grad_norm": 0.10032723098993301, "learning_rate": 5.4539913505969876e-08, "loss": 0.0006, "step": 113330 }, { "epoch": 1.9154491604911148, "grad_norm": 0.06952448934316635, "learning_rate": 5.432288785407691e-08, "loss": 0.0008, "step": 113340 }, { "epoch": 1.9156181607699652, "grad_norm": 0.04177826642990112, "learning_rate": 5.410629250005517e-08, "loss": 0.0011, "step": 113350 }, { "epoch": 1.9157871610488157, "grad_norm": 0.0009109816746786237, "learning_rate": 5.389012746274902e-08, "loss": 0.0009, "step": 113360 }, { "epoch": 1.9159561613276663, "grad_norm": 0.004139615222811699, "learning_rate": 5.367439276096675e-08, "loss": 0.0005, "step": 113370 }, { "epoch": 1.9161251616065167, "grad_norm": 0.00573534332215786, "learning_rate": 5.3459088413477245e-08, "loss": 0.0006, "step": 113380 }, { "epoch": 1.916294161885367, "grad_norm": 0.021768197417259216, "learning_rate": 5.3244214439014396e-08, "loss": 0.0005, "step": 113390 }, { "epoch": 1.9164631621642174, "grad_norm": 0.00780107406899333, "learning_rate": 5.302977085627159e-08, "loss": 0.0004, "step": 113400 }, { "epoch": 1.916632162443068, "grad_norm": 0.14811588823795319, "learning_rate": 5.281575768390779e-08, "loss": 0.0008, "step": 113410 }, { "epoch": 1.9168011627219186, "grad_norm": 0.032275181263685226, "learning_rate": 5.260217494054254e-08, "loss": 0.0007, "step": 113420 }, { "epoch": 1.916970163000769, "grad_norm": 0.011303196661174297, "learning_rate": 5.238902264475876e-08, "loss": 0.0008, "step": 113430 }, { "epoch": 1.9171391632796193, "grad_norm": 0.00017243089678231627, "learning_rate": 5.217630081510161e-08, "loss": 0.0003, "step": 113440 }, { "epoch": 1.91730816355847, "grad_norm": 0.0003776454250328243, "learning_rate": 5.196400947007962e-08, "loss": 0.0006, "step": 113450 }, { "epoch": 1.9174771638373205, "grad_norm": 0.006594053935259581, "learning_rate": 5.1752148628161917e-08, "loss": 0.0009, "step": 113460 }, { "epoch": 1.9176461641161708, "grad_norm": 0.024989726021885872, "learning_rate": 5.1540718307782624e-08, "loss": 0.0003, "step": 113470 }, { "epoch": 1.9178151643950212, "grad_norm": 0.012359109707176685, "learning_rate": 5.132971852733648e-08, "loss": 0.0003, "step": 113480 }, { "epoch": 1.9179841646738716, "grad_norm": 0.009366139769554138, "learning_rate": 5.1119149305181584e-08, "loss": 0.001, "step": 113490 }, { "epoch": 1.9181531649527221, "grad_norm": 0.025234593078494072, "learning_rate": 5.090901065963827e-08, "loss": 0.0005, "step": 113500 }, { "epoch": 1.9183221652315727, "grad_norm": 0.04369024187326431, "learning_rate": 5.069930260899081e-08, "loss": 0.0007, "step": 113510 }, { "epoch": 1.918491165510423, "grad_norm": 0.011204466223716736, "learning_rate": 5.049002517148349e-08, "loss": 0.0004, "step": 113520 }, { "epoch": 1.9186601657892735, "grad_norm": 0.12482357025146484, "learning_rate": 5.028117836532453e-08, "loss": 0.0007, "step": 113530 }, { "epoch": 1.918829166068124, "grad_norm": 0.04277415573596954, "learning_rate": 5.007276220868551e-08, "loss": 0.0005, "step": 113540 }, { "epoch": 1.9189981663469746, "grad_norm": 0.004969023633748293, "learning_rate": 4.986477671969914e-08, "loss": 0.0004, "step": 113550 }, { "epoch": 1.919167166625825, "grad_norm": 0.0015870180213823915, "learning_rate": 4.965722191646094e-08, "loss": 0.0007, "step": 113560 }, { "epoch": 1.9193361669046753, "grad_norm": 0.036459941416978836, "learning_rate": 4.945009781702925e-08, "loss": 0.0003, "step": 113570 }, { "epoch": 1.9195051671835257, "grad_norm": 0.023583462461829185, "learning_rate": 4.924340443942521e-08, "loss": 0.0006, "step": 113580 }, { "epoch": 1.9196741674623763, "grad_norm": 0.05077134445309639, "learning_rate": 4.903714180163221e-08, "loss": 0.0016, "step": 113590 }, { "epoch": 1.9198431677412269, "grad_norm": 0.03774239867925644, "learning_rate": 4.883130992159535e-08, "loss": 0.0005, "step": 113600 }, { "epoch": 1.9200121680200772, "grad_norm": 0.021962564438581467, "learning_rate": 4.862590881722362e-08, "loss": 0.0006, "step": 113610 }, { "epoch": 1.9201811682989276, "grad_norm": 0.04520101100206375, "learning_rate": 4.8420938506387736e-08, "loss": 0.0005, "step": 113620 }, { "epoch": 1.9203501685777782, "grad_norm": 0.03361218050122261, "learning_rate": 4.821639900692121e-08, "loss": 0.0005, "step": 113630 }, { "epoch": 1.9205191688566288, "grad_norm": 0.005732972640544176, "learning_rate": 4.801229033662036e-08, "loss": 0.0003, "step": 113640 }, { "epoch": 1.9206881691354791, "grad_norm": 0.03359975293278694, "learning_rate": 4.7808612513242644e-08, "loss": 0.0002, "step": 113650 }, { "epoch": 1.9208571694143295, "grad_norm": 0.03723776340484619, "learning_rate": 4.760536555450945e-08, "loss": 0.001, "step": 113660 }, { "epoch": 1.9210261696931799, "grad_norm": 0.007026666309684515, "learning_rate": 4.740254947810441e-08, "loss": 0.0002, "step": 113670 }, { "epoch": 1.9211951699720304, "grad_norm": 0.04161248356103897, "learning_rate": 4.720016430167396e-08, "loss": 0.0006, "step": 113680 }, { "epoch": 1.921364170250881, "grad_norm": 0.002371510025113821, "learning_rate": 4.699821004282568e-08, "loss": 0.0006, "step": 113690 }, { "epoch": 1.9215331705297314, "grad_norm": 0.03056161478161812, "learning_rate": 4.679668671913107e-08, "loss": 0.0011, "step": 113700 }, { "epoch": 1.9217021708085817, "grad_norm": 0.04500853270292282, "learning_rate": 4.659559434812333e-08, "loss": 0.0008, "step": 113710 }, { "epoch": 1.9218711710874323, "grad_norm": 0.025726649910211563, "learning_rate": 4.639493294729902e-08, "loss": 0.0008, "step": 113720 }, { "epoch": 1.922040171366283, "grad_norm": 0.0328289270401001, "learning_rate": 4.61947025341164e-08, "loss": 0.0022, "step": 113730 }, { "epoch": 1.9222091716451333, "grad_norm": 0.04501111060380936, "learning_rate": 4.5994903125995974e-08, "loss": 0.0008, "step": 113740 }, { "epoch": 1.9223781719239836, "grad_norm": 0.12412121891975403, "learning_rate": 4.579553474032161e-08, "loss": 0.0014, "step": 113750 }, { "epoch": 1.922547172202834, "grad_norm": 0.06506326049566269, "learning_rate": 4.559659739444e-08, "loss": 0.0011, "step": 113760 }, { "epoch": 1.9227161724816846, "grad_norm": 0.023001553490757942, "learning_rate": 4.5398091105658956e-08, "loss": 0.0016, "step": 113770 }, { "epoch": 1.9228851727605352, "grad_norm": 0.01615159958600998, "learning_rate": 4.5200015891249115e-08, "loss": 0.0013, "step": 113780 }, { "epoch": 1.9230541730393855, "grad_norm": 0.003368740202859044, "learning_rate": 4.500237176844447e-08, "loss": 0.0008, "step": 113790 }, { "epoch": 1.923223173318236, "grad_norm": 0.012499597854912281, "learning_rate": 4.4805158754441804e-08, "loss": 0.0002, "step": 113800 }, { "epoch": 1.9233921735970865, "grad_norm": 0.019146615639328957, "learning_rate": 4.460837686639796e-08, "loss": 0.0003, "step": 113810 }, { "epoch": 1.923561173875937, "grad_norm": 0.017188021913170815, "learning_rate": 4.441202612143536e-08, "loss": 0.0006, "step": 113820 }, { "epoch": 1.9237301741547874, "grad_norm": 0.0036173483822494745, "learning_rate": 4.421610653663588e-08, "loss": 0.0006, "step": 113830 }, { "epoch": 1.9238991744336378, "grad_norm": 0.005861383862793446, "learning_rate": 4.4020618129047565e-08, "loss": 0.0008, "step": 113840 }, { "epoch": 1.9240681747124881, "grad_norm": 0.16966085135936737, "learning_rate": 4.38255609156768e-08, "loss": 0.0011, "step": 113850 }, { "epoch": 1.9242371749913387, "grad_norm": 0.028564633801579475, "learning_rate": 4.363093491349612e-08, "loss": 0.0003, "step": 113860 }, { "epoch": 1.9244061752701893, "grad_norm": 0.015617894940078259, "learning_rate": 4.3436740139438104e-08, "loss": 0.0004, "step": 113870 }, { "epoch": 1.9245751755490397, "grad_norm": 0.038057055324316025, "learning_rate": 4.324297661039867e-08, "loss": 0.0007, "step": 113880 }, { "epoch": 1.92474417582789, "grad_norm": 0.009012563154101372, "learning_rate": 4.3049644343236554e-08, "loss": 0.0004, "step": 113890 }, { "epoch": 1.9249131761067406, "grad_norm": 0.017938757315278053, "learning_rate": 4.2856743354771636e-08, "loss": 0.0003, "step": 113900 }, { "epoch": 1.925082176385591, "grad_norm": 0.0009190856944769621, "learning_rate": 4.266427366178882e-08, "loss": 0.0007, "step": 113910 }, { "epoch": 1.9252511766644416, "grad_norm": 0.026434002444148064, "learning_rate": 4.2472235281032506e-08, "loss": 0.0003, "step": 113920 }, { "epoch": 1.925420176943292, "grad_norm": 0.049956440925598145, "learning_rate": 4.2280628229212086e-08, "loss": 0.0004, "step": 113930 }, { "epoch": 1.9255891772221423, "grad_norm": 0.046455077826976776, "learning_rate": 4.208945252299701e-08, "loss": 0.0007, "step": 113940 }, { "epoch": 1.9257581775009929, "grad_norm": 0.005489541217684746, "learning_rate": 4.189870817902175e-08, "loss": 0.0004, "step": 113950 }, { "epoch": 1.9259271777798435, "grad_norm": 0.09641050547361374, "learning_rate": 4.170839521388137e-08, "loss": 0.001, "step": 113960 }, { "epoch": 1.9260961780586938, "grad_norm": 0.00044007538235746324, "learning_rate": 4.151851364413373e-08, "loss": 0.0005, "step": 113970 }, { "epoch": 1.9262651783375442, "grad_norm": 0.25061318278312683, "learning_rate": 4.132906348630006e-08, "loss": 0.0005, "step": 113980 }, { "epoch": 1.9264341786163948, "grad_norm": 0.02810671553015709, "learning_rate": 4.114004475686328e-08, "loss": 0.0004, "step": 113990 }, { "epoch": 1.9266031788952451, "grad_norm": 0.04363776743412018, "learning_rate": 4.0951457472268584e-08, "loss": 0.0009, "step": 114000 }, { "epoch": 1.9267721791740957, "grad_norm": 0.04865751415491104, "learning_rate": 4.07633016489245e-08, "loss": 0.0002, "step": 114010 }, { "epoch": 1.926941179452946, "grad_norm": 0.004051418509334326, "learning_rate": 4.057557730320072e-08, "loss": 0.0007, "step": 114020 }, { "epoch": 1.9271101797317964, "grad_norm": 0.06691235303878784, "learning_rate": 4.038828445143139e-08, "loss": 0.0006, "step": 114030 }, { "epoch": 1.927279180010647, "grad_norm": 0.05675894394516945, "learning_rate": 4.020142310991071e-08, "loss": 0.0009, "step": 114040 }, { "epoch": 1.9274481802894976, "grad_norm": 0.07560726255178452, "learning_rate": 4.0014993294896776e-08, "loss": 0.0004, "step": 114050 }, { "epoch": 1.927617180568348, "grad_norm": 0.004082811065018177, "learning_rate": 3.9828995022610506e-08, "loss": 0.0005, "step": 114060 }, { "epoch": 1.9277861808471983, "grad_norm": 0.10598105937242508, "learning_rate": 3.964342830923451e-08, "loss": 0.0006, "step": 114070 }, { "epoch": 1.927955181126049, "grad_norm": 0.0012448799097910523, "learning_rate": 3.9458293170913096e-08, "loss": 0.0005, "step": 114080 }, { "epoch": 1.9281241814048993, "grad_norm": 0.002484311815351248, "learning_rate": 3.9273589623755045e-08, "loss": 0.0003, "step": 114090 }, { "epoch": 1.9282931816837499, "grad_norm": 0.027685141190886497, "learning_rate": 3.908931768382973e-08, "loss": 0.0009, "step": 114100 }, { "epoch": 1.9284621819626002, "grad_norm": 0.024779699742794037, "learning_rate": 3.8905477367169876e-08, "loss": 0.0005, "step": 114110 }, { "epoch": 1.9286311822414506, "grad_norm": 0.06355228275060654, "learning_rate": 3.8722068689770464e-08, "loss": 0.001, "step": 114120 }, { "epoch": 1.9288001825203012, "grad_norm": 0.050809379667043686, "learning_rate": 3.853909166758929e-08, "loss": 0.0005, "step": 114130 }, { "epoch": 1.9289691827991517, "grad_norm": 0.017558986321091652, "learning_rate": 3.835654631654584e-08, "loss": 0.0007, "step": 114140 }, { "epoch": 1.9291381830780021, "grad_norm": 0.040090251713991165, "learning_rate": 3.817443265252296e-08, "loss": 0.0008, "step": 114150 }, { "epoch": 1.9293071833568525, "grad_norm": 0.012433486990630627, "learning_rate": 3.799275069136466e-08, "loss": 0.0007, "step": 114160 }, { "epoch": 1.9294761836357028, "grad_norm": 0.045401155948638916, "learning_rate": 3.781150044887827e-08, "loss": 0.0008, "step": 114170 }, { "epoch": 1.9296451839145534, "grad_norm": 0.03455643728375435, "learning_rate": 3.763068194083452e-08, "loss": 0.0005, "step": 114180 }, { "epoch": 1.929814184193404, "grad_norm": 0.018427403643727303, "learning_rate": 3.7450295182963616e-08, "loss": 0.0005, "step": 114190 }, { "epoch": 1.9299831844722544, "grad_norm": 0.03793042525649071, "learning_rate": 3.7270340190961875e-08, "loss": 0.0005, "step": 114200 }, { "epoch": 1.9301521847511047, "grad_norm": 0.0003596784081310034, "learning_rate": 3.709081698048511e-08, "loss": 0.0008, "step": 114210 }, { "epoch": 1.9303211850299553, "grad_norm": 0.0029309610836207867, "learning_rate": 3.691172556715361e-08, "loss": 0.0004, "step": 114220 }, { "epoch": 1.930490185308806, "grad_norm": 0.07106602191925049, "learning_rate": 3.6733065966548244e-08, "loss": 0.0001, "step": 114230 }, { "epoch": 1.9306591855876563, "grad_norm": 0.09174923598766327, "learning_rate": 3.6554838194214345e-08, "loss": 0.0005, "step": 114240 }, { "epoch": 1.9308281858665066, "grad_norm": 0.051286231726408005, "learning_rate": 3.63770422656573e-08, "loss": 0.0004, "step": 114250 }, { "epoch": 1.930997186145357, "grad_norm": 0.05942637473344803, "learning_rate": 3.619967819634695e-08, "loss": 0.0006, "step": 114260 }, { "epoch": 1.9311661864242076, "grad_norm": 0.01240352913737297, "learning_rate": 3.602274600171485e-08, "loss": 0.0006, "step": 114270 }, { "epoch": 1.9313351867030581, "grad_norm": 0.035853300243616104, "learning_rate": 3.584624569715478e-08, "loss": 0.0003, "step": 114280 }, { "epoch": 1.9315041869819085, "grad_norm": 0.00031042128102853894, "learning_rate": 3.5670177298023356e-08, "loss": 0.0013, "step": 114290 }, { "epoch": 1.9316731872607589, "grad_norm": 0.013527607545256615, "learning_rate": 3.549454081963943e-08, "loss": 0.0013, "step": 114300 }, { "epoch": 1.9318421875396095, "grad_norm": 0.007355086971074343, "learning_rate": 3.5319336277284125e-08, "loss": 0.0012, "step": 114310 }, { "epoch": 1.93201118781846, "grad_norm": 0.09528569877147675, "learning_rate": 3.5144563686200784e-08, "loss": 0.0008, "step": 114320 }, { "epoch": 1.9321801880973104, "grad_norm": 0.012314501218497753, "learning_rate": 3.497022306159559e-08, "loss": 0.0004, "step": 114330 }, { "epoch": 1.9323491883761608, "grad_norm": 0.011436869390308857, "learning_rate": 3.479631441863696e-08, "loss": 0.001, "step": 114340 }, { "epoch": 1.9325181886550111, "grad_norm": 0.05101019889116287, "learning_rate": 3.462283777245612e-08, "loss": 0.0006, "step": 114350 }, { "epoch": 1.9326871889338617, "grad_norm": 0.018834391608834267, "learning_rate": 3.4449793138146e-08, "loss": 0.0005, "step": 114360 }, { "epoch": 1.9328561892127123, "grad_norm": 0.04030166193842888, "learning_rate": 3.4277180530762896e-08, "loss": 0.0007, "step": 114370 }, { "epoch": 1.9330251894915627, "grad_norm": 0.026856768876314163, "learning_rate": 3.410499996532424e-08, "loss": 0.0006, "step": 114380 }, { "epoch": 1.933194189770413, "grad_norm": 0.05108177289366722, "learning_rate": 3.393325145681137e-08, "loss": 0.0006, "step": 114390 }, { "epoch": 1.9333631900492636, "grad_norm": 0.02175753563642502, "learning_rate": 3.3761935020166224e-08, "loss": 0.0003, "step": 114400 }, { "epoch": 1.9335321903281142, "grad_norm": 0.00420082313939929, "learning_rate": 3.3591050670295224e-08, "loss": 0.0005, "step": 114410 }, { "epoch": 1.9337011906069645, "grad_norm": 0.03318898379802704, "learning_rate": 3.342059842206535e-08, "loss": 0.001, "step": 114420 }, { "epoch": 1.933870190885815, "grad_norm": 0.0321153961122036, "learning_rate": 3.325057829030698e-08, "loss": 0.0004, "step": 114430 }, { "epoch": 1.9340391911646653, "grad_norm": 0.0018499215366318822, "learning_rate": 3.308099028981271e-08, "loss": 0.0002, "step": 114440 }, { "epoch": 1.9342081914435159, "grad_norm": 0.009329008869826794, "learning_rate": 3.291183443533741e-08, "loss": 0.0004, "step": 114450 }, { "epoch": 1.9343771917223664, "grad_norm": 0.2155163586139679, "learning_rate": 3.27431107415993e-08, "loss": 0.0008, "step": 114460 }, { "epoch": 1.9345461920012168, "grad_norm": 0.03188404440879822, "learning_rate": 3.257481922327665e-08, "loss": 0.0006, "step": 114470 }, { "epoch": 1.9347151922800672, "grad_norm": 0.025516286492347717, "learning_rate": 3.240695989501275e-08, "loss": 0.0005, "step": 114480 }, { "epoch": 1.9348841925589177, "grad_norm": 0.019352493807673454, "learning_rate": 3.223953277141201e-08, "loss": 0.001, "step": 114490 }, { "epoch": 1.9350531928377683, "grad_norm": 0.02275828830897808, "learning_rate": 3.207253786704112e-08, "loss": 0.0006, "step": 114500 }, { "epoch": 1.9352221931166187, "grad_norm": 0.04524204879999161, "learning_rate": 3.190597519643013e-08, "loss": 0.0009, "step": 114510 }, { "epoch": 1.935391193395469, "grad_norm": 0.013399074785411358, "learning_rate": 3.173984477406966e-08, "loss": 0.0005, "step": 114520 }, { "epoch": 1.9355601936743194, "grad_norm": 0.09556085616350174, "learning_rate": 3.1574146614414826e-08, "loss": 0.0012, "step": 114530 }, { "epoch": 1.93572919395317, "grad_norm": 0.19229021668434143, "learning_rate": 3.140888073188131e-08, "loss": 0.0008, "step": 114540 }, { "epoch": 1.9358981942320206, "grad_norm": 0.09433633834123611, "learning_rate": 3.1244047140849274e-08, "loss": 0.0006, "step": 114550 }, { "epoch": 1.936067194510871, "grad_norm": 0.044848211109638214, "learning_rate": 3.1079645855658926e-08, "loss": 0.0006, "step": 114560 }, { "epoch": 1.9362361947897213, "grad_norm": 0.028230784460902214, "learning_rate": 3.091567689061437e-08, "loss": 0.0008, "step": 114570 }, { "epoch": 1.936405195068572, "grad_norm": 0.03477562591433525, "learning_rate": 3.075214025998141e-08, "loss": 0.0002, "step": 114580 }, { "epoch": 1.9365741953474225, "grad_norm": 0.016347402706742287, "learning_rate": 3.0589035977989235e-08, "loss": 0.0006, "step": 114590 }, { "epoch": 1.9367431956262728, "grad_norm": 0.056321483105421066, "learning_rate": 3.042636405882815e-08, "loss": 0.0006, "step": 114600 }, { "epoch": 1.9369121959051232, "grad_norm": 0.07306868582963943, "learning_rate": 3.0264124516651285e-08, "loss": 0.0008, "step": 114610 }, { "epoch": 1.9370811961839736, "grad_norm": 0.1208910420536995, "learning_rate": 3.010231736557512e-08, "loss": 0.0009, "step": 114620 }, { "epoch": 1.9372501964628241, "grad_norm": 0.01394712645560503, "learning_rate": 2.994094261967617e-08, "loss": 0.0009, "step": 114630 }, { "epoch": 1.9374191967416747, "grad_norm": 0.0054717701859772205, "learning_rate": 2.978000029299599e-08, "loss": 0.0006, "step": 114640 }, { "epoch": 1.937588197020525, "grad_norm": 0.018918504938483238, "learning_rate": 2.961949039953782e-08, "loss": 0.0003, "step": 114650 }, { "epoch": 1.9377571972993755, "grad_norm": 0.04983522742986679, "learning_rate": 2.9459412953264933e-08, "loss": 0.0004, "step": 114660 }, { "epoch": 1.937926197578226, "grad_norm": 0.042717378586530685, "learning_rate": 2.929976796810674e-08, "loss": 0.0005, "step": 114670 }, { "epoch": 1.9380951978570766, "grad_norm": 0.00874321162700653, "learning_rate": 2.9140555457952135e-08, "loss": 0.0002, "step": 114680 }, { "epoch": 1.938264198135927, "grad_norm": 0.021173780784010887, "learning_rate": 2.898177543665337e-08, "loss": 0.0005, "step": 114690 }, { "epoch": 1.9384331984147773, "grad_norm": 0.0008916796068660915, "learning_rate": 2.8823427918025505e-08, "loss": 0.0006, "step": 114700 }, { "epoch": 1.9386021986936277, "grad_norm": 0.06250502914190292, "learning_rate": 2.8665512915845295e-08, "loss": 0.0008, "step": 114710 }, { "epoch": 1.9387711989724783, "grad_norm": 0.00023853992752265185, "learning_rate": 2.8508030443851752e-08, "loss": 0.0002, "step": 114720 }, { "epoch": 1.9389401992513289, "grad_norm": 0.016771283000707626, "learning_rate": 2.835098051574725e-08, "loss": 0.0007, "step": 114730 }, { "epoch": 1.9391091995301792, "grad_norm": 0.050640370696783066, "learning_rate": 2.8194363145195857e-08, "loss": 0.001, "step": 114740 }, { "epoch": 1.9392781998090296, "grad_norm": 0.03314594551920891, "learning_rate": 2.8038178345823895e-08, "loss": 0.0006, "step": 114750 }, { "epoch": 1.9394472000878802, "grad_norm": 0.011672952212393284, "learning_rate": 2.7882426131219943e-08, "loss": 0.0003, "step": 114760 }, { "epoch": 1.9396162003667308, "grad_norm": 0.05314328148961067, "learning_rate": 2.7727106514935375e-08, "loss": 0.0005, "step": 114770 }, { "epoch": 1.9397852006455811, "grad_norm": 0.00885872170329094, "learning_rate": 2.7572219510483832e-08, "loss": 0.0007, "step": 114780 }, { "epoch": 1.9399542009244315, "grad_norm": 0.07471611350774765, "learning_rate": 2.7417765131340647e-08, "loss": 0.0006, "step": 114790 }, { "epoch": 1.9401232012032819, "grad_norm": 0.04525149613618851, "learning_rate": 2.7263743390945062e-08, "loss": 0.0006, "step": 114800 }, { "epoch": 1.9402922014821324, "grad_norm": 0.007435646373778582, "learning_rate": 2.711015430269748e-08, "loss": 0.0002, "step": 114810 }, { "epoch": 1.940461201760983, "grad_norm": 0.06884460896253586, "learning_rate": 2.695699787995998e-08, "loss": 0.0005, "step": 114820 }, { "epoch": 1.9406302020398334, "grad_norm": 0.00087527692085132, "learning_rate": 2.6804274136059128e-08, "loss": 0.0004, "step": 114830 }, { "epoch": 1.9407992023186837, "grad_norm": 0.02839229255914688, "learning_rate": 2.6651983084282075e-08, "loss": 0.0005, "step": 114840 }, { "epoch": 1.9409682025975343, "grad_norm": 0.023673107847571373, "learning_rate": 2.6500124737878775e-08, "loss": 0.0004, "step": 114850 }, { "epoch": 1.9411372028763847, "grad_norm": 0.0012992864940315485, "learning_rate": 2.6348699110061992e-08, "loss": 0.0008, "step": 114860 }, { "epoch": 1.9413062031552353, "grad_norm": 0.007297023665159941, "learning_rate": 2.6197706214006192e-08, "loss": 0.0005, "step": 114870 }, { "epoch": 1.9414752034340856, "grad_norm": 0.05078316107392311, "learning_rate": 2.6047146062848083e-08, "loss": 0.0005, "step": 114880 }, { "epoch": 1.941644203712936, "grad_norm": 0.013788257725536823, "learning_rate": 2.5897018669688302e-08, "loss": 0.0006, "step": 114890 }, { "epoch": 1.9418132039917866, "grad_norm": 0.015619263984262943, "learning_rate": 2.5747324047587508e-08, "loss": 0.0007, "step": 114900 }, { "epoch": 1.9419822042706372, "grad_norm": 0.005703152623027563, "learning_rate": 2.559806220957084e-08, "loss": 0.0006, "step": 114910 }, { "epoch": 1.9421512045494875, "grad_norm": 0.03437591344118118, "learning_rate": 2.544923316862402e-08, "loss": 0.0009, "step": 114920 }, { "epoch": 1.942320204828338, "grad_norm": 0.013513635843992233, "learning_rate": 2.5300836937696138e-08, "loss": 0.0002, "step": 114930 }, { "epoch": 1.9424892051071885, "grad_norm": 0.005370032507926226, "learning_rate": 2.5152873529698528e-08, "loss": 0.0007, "step": 114940 }, { "epoch": 1.9426582053860388, "grad_norm": 0.010226094163954258, "learning_rate": 2.5005342957504785e-08, "loss": 0.0002, "step": 114950 }, { "epoch": 1.9428272056648894, "grad_norm": 0.02545175701379776, "learning_rate": 2.4858245233950194e-08, "loss": 0.0004, "step": 114960 }, { "epoch": 1.9429962059437398, "grad_norm": 0.07027272880077362, "learning_rate": 2.4711580371833966e-08, "loss": 0.0003, "step": 114970 }, { "epoch": 1.9431652062225901, "grad_norm": 0.014199493452906609, "learning_rate": 2.456534838391533e-08, "loss": 0.0004, "step": 114980 }, { "epoch": 1.9433342065014407, "grad_norm": 0.02236546389758587, "learning_rate": 2.4419549282918565e-08, "loss": 0.0007, "step": 114990 }, { "epoch": 1.9435032067802913, "grad_norm": 0.022393174469470978, "learning_rate": 2.4274183081527403e-08, "loss": 0.0004, "step": 115000 }, { "epoch": 1.9436722070591417, "grad_norm": 0.03282710537314415, "learning_rate": 2.4129249792391173e-08, "loss": 0.0006, "step": 115010 }, { "epoch": 1.943841207337992, "grad_norm": 0.02575145661830902, "learning_rate": 2.3984749428118124e-08, "loss": 0.0003, "step": 115020 }, { "epoch": 1.9440102076168426, "grad_norm": 0.004959126468747854, "learning_rate": 2.3840682001281534e-08, "loss": 0.0003, "step": 115030 }, { "epoch": 1.944179207895693, "grad_norm": 0.10797479748725891, "learning_rate": 2.3697047524415818e-08, "loss": 0.0005, "step": 115040 }, { "epoch": 1.9443482081745436, "grad_norm": 0.03880942985415459, "learning_rate": 2.3553846010017644e-08, "loss": 0.0005, "step": 115050 }, { "epoch": 1.944517208453394, "grad_norm": 0.01588328368961811, "learning_rate": 2.3411077470546494e-08, "loss": 0.0017, "step": 115060 }, { "epoch": 1.9446862087322443, "grad_norm": 0.0011270693503320217, "learning_rate": 2.3268741918423543e-08, "loss": 0.0002, "step": 115070 }, { "epoch": 1.9448552090110949, "grad_norm": 0.07981029152870178, "learning_rate": 2.3126839366032772e-08, "loss": 0.0006, "step": 115080 }, { "epoch": 1.9450242092899455, "grad_norm": 0.0014120024861767888, "learning_rate": 2.2985369825720416e-08, "loss": 0.0005, "step": 115090 }, { "epoch": 1.9451932095687958, "grad_norm": 0.04195106029510498, "learning_rate": 2.2844333309794965e-08, "loss": 0.0013, "step": 115100 }, { "epoch": 1.9453622098476462, "grad_norm": 0.017905879765748978, "learning_rate": 2.270372983052771e-08, "loss": 0.0006, "step": 115110 }, { "epoch": 1.9455312101264965, "grad_norm": 0.015927642583847046, "learning_rate": 2.2563559400151647e-08, "loss": 0.0007, "step": 115120 }, { "epoch": 1.9457002104053471, "grad_norm": 0.000682278536260128, "learning_rate": 2.2423822030861462e-08, "loss": 0.0006, "step": 115130 }, { "epoch": 1.9458692106841977, "grad_norm": 0.03333627060055733, "learning_rate": 2.2284517734816324e-08, "loss": 0.0004, "step": 115140 }, { "epoch": 1.946038210963048, "grad_norm": 0.013326448388397694, "learning_rate": 2.2145646524135423e-08, "loss": 0.0011, "step": 115150 }, { "epoch": 1.9462072112418984, "grad_norm": 0.009179934859275818, "learning_rate": 2.200720841090187e-08, "loss": 0.001, "step": 115160 }, { "epoch": 1.946376211520749, "grad_norm": 0.018448440358042717, "learning_rate": 2.1869203407159924e-08, "loss": 0.0007, "step": 115170 }, { "epoch": 1.9465452117995996, "grad_norm": 0.008957796730101109, "learning_rate": 2.173163152491664e-08, "loss": 0.0008, "step": 115180 }, { "epoch": 1.94671421207845, "grad_norm": 0.06361734122037888, "learning_rate": 2.1594492776141894e-08, "loss": 0.0005, "step": 115190 }, { "epoch": 1.9468832123573003, "grad_norm": 0.00624101934954524, "learning_rate": 2.1457787172767253e-08, "loss": 0.0008, "step": 115200 }, { "epoch": 1.9470522126361507, "grad_norm": 0.01926502026617527, "learning_rate": 2.132151472668653e-08, "loss": 0.0003, "step": 115210 }, { "epoch": 1.9472212129150013, "grad_norm": 0.06001385673880577, "learning_rate": 2.118567544975636e-08, "loss": 0.0005, "step": 115220 }, { "epoch": 1.9473902131938519, "grad_norm": 0.010368529707193375, "learning_rate": 2.1050269353795616e-08, "loss": 0.0005, "step": 115230 }, { "epoch": 1.9475592134727022, "grad_norm": 0.06347133964300156, "learning_rate": 2.0915296450584878e-08, "loss": 0.0005, "step": 115240 }, { "epoch": 1.9477282137515526, "grad_norm": 0.07419361174106598, "learning_rate": 2.0780756751867526e-08, "loss": 0.0005, "step": 115250 }, { "epoch": 1.9478972140304032, "grad_norm": 0.07868395000696182, "learning_rate": 2.06466502693492e-08, "loss": 0.001, "step": 115260 }, { "epoch": 1.9480662143092538, "grad_norm": 0.11085677891969681, "learning_rate": 2.0512977014697788e-08, "loss": 0.001, "step": 115270 }, { "epoch": 1.9482352145881041, "grad_norm": 0.034024741500616074, "learning_rate": 2.0379736999543433e-08, "loss": 0.0003, "step": 115280 }, { "epoch": 1.9484042148669545, "grad_norm": 0.03799097239971161, "learning_rate": 2.0246930235478524e-08, "loss": 0.0004, "step": 115290 }, { "epoch": 1.9485732151458048, "grad_norm": 0.018938124179840088, "learning_rate": 2.0114556734058822e-08, "loss": 0.0007, "step": 115300 }, { "epoch": 1.9487422154246554, "grad_norm": 0.09261860698461533, "learning_rate": 1.9982616506800668e-08, "loss": 0.001, "step": 115310 }, { "epoch": 1.948911215703506, "grad_norm": 0.006570580881088972, "learning_rate": 1.9851109565183212e-08, "loss": 0.0004, "step": 115320 }, { "epoch": 1.9490802159823564, "grad_norm": 0.010035947896540165, "learning_rate": 1.9720035920648973e-08, "loss": 0.0009, "step": 115330 }, { "epoch": 1.9492492162612067, "grad_norm": 0.016145925968885422, "learning_rate": 1.95893955846016e-08, "loss": 0.0005, "step": 115340 }, { "epoch": 1.9494182165400573, "grad_norm": 0.05869593098759651, "learning_rate": 1.9459188568407007e-08, "loss": 0.0004, "step": 115350 }, { "epoch": 1.949587216818908, "grad_norm": 0.040348123759031296, "learning_rate": 1.932941488339446e-08, "loss": 0.0013, "step": 115360 }, { "epoch": 1.9497562170977583, "grad_norm": 0.026913391426205635, "learning_rate": 1.920007454085493e-08, "loss": 0.0005, "step": 115370 }, { "epoch": 1.9499252173766086, "grad_norm": 0.002638952573761344, "learning_rate": 1.907116755204108e-08, "loss": 0.0009, "step": 115380 }, { "epoch": 1.950094217655459, "grad_norm": 0.00859399139881134, "learning_rate": 1.8942693928169496e-08, "loss": 0.0005, "step": 115390 }, { "epoch": 1.9502632179343096, "grad_norm": 0.09759102761745453, "learning_rate": 1.8814653680416794e-08, "loss": 0.0007, "step": 115400 }, { "epoch": 1.9504322182131602, "grad_norm": 0.0072417533956468105, "learning_rate": 1.8687046819923504e-08, "loss": 0.0002, "step": 115410 }, { "epoch": 1.9506012184920105, "grad_norm": 0.0020835737232118845, "learning_rate": 1.8559873357792413e-08, "loss": 0.0008, "step": 115420 }, { "epoch": 1.9507702187708609, "grad_norm": 0.0367913618683815, "learning_rate": 1.8433133305088003e-08, "loss": 0.0027, "step": 115430 }, { "epoch": 1.9509392190497115, "grad_norm": 0.014195457100868225, "learning_rate": 1.830682667283701e-08, "loss": 0.0008, "step": 115440 }, { "epoch": 1.951108219328562, "grad_norm": 0.003432026132941246, "learning_rate": 1.818095347202953e-08, "loss": 0.0005, "step": 115450 }, { "epoch": 1.9512772196074124, "grad_norm": 0.010386678390204906, "learning_rate": 1.8055513713615693e-08, "loss": 0.0004, "step": 115460 }, { "epoch": 1.9514462198862628, "grad_norm": 0.05468853563070297, "learning_rate": 1.793050740851121e-08, "loss": 0.0007, "step": 115470 }, { "epoch": 1.9516152201651131, "grad_norm": 0.014295250177383423, "learning_rate": 1.7805934567590723e-08, "loss": 0.0006, "step": 115480 }, { "epoch": 1.9517842204439637, "grad_norm": 0.07433163374662399, "learning_rate": 1.768179520169333e-08, "loss": 0.0009, "step": 115490 }, { "epoch": 1.9519532207228143, "grad_norm": 0.014535046182572842, "learning_rate": 1.7558089321619286e-08, "loss": 0.0003, "step": 115500 }, { "epoch": 1.9521222210016647, "grad_norm": 0.05266273766756058, "learning_rate": 1.7434816938132205e-08, "loss": 0.0007, "step": 115510 }, { "epoch": 1.952291221280515, "grad_norm": 0.023370753973722458, "learning_rate": 1.7311978061957392e-08, "loss": 0.0011, "step": 115520 }, { "epoch": 1.9524602215593656, "grad_norm": 0.06951917707920074, "learning_rate": 1.7189572703781855e-08, "loss": 0.0007, "step": 115530 }, { "epoch": 1.9526292218382162, "grad_norm": 0.025955677032470703, "learning_rate": 1.7067600874255963e-08, "loss": 0.0005, "step": 115540 }, { "epoch": 1.9527982221170666, "grad_norm": 0.007542863488197327, "learning_rate": 1.694606258399123e-08, "loss": 0.0001, "step": 115550 }, { "epoch": 1.952967222395917, "grad_norm": 0.0012686381815001369, "learning_rate": 1.6824957843563083e-08, "loss": 0.0003, "step": 115560 }, { "epoch": 1.9531362226747673, "grad_norm": 0.0691961944103241, "learning_rate": 1.670428666350754e-08, "loss": 0.0011, "step": 115570 }, { "epoch": 1.9533052229536179, "grad_norm": 0.01200967188924551, "learning_rate": 1.6584049054323426e-08, "loss": 0.0003, "step": 115580 }, { "epoch": 1.9534742232324684, "grad_norm": 0.14624682068824768, "learning_rate": 1.646424502647237e-08, "loss": 0.0005, "step": 115590 }, { "epoch": 1.9536432235113188, "grad_norm": 0.007319706957787275, "learning_rate": 1.63448745903777e-08, "loss": 0.0005, "step": 115600 }, { "epoch": 1.9538122237901692, "grad_norm": 0.021409472450613976, "learning_rate": 1.6225937756425005e-08, "loss": 0.0008, "step": 115610 }, { "epoch": 1.9539812240690198, "grad_norm": 0.0024789045564830303, "learning_rate": 1.6107434534963218e-08, "loss": 0.0004, "step": 115620 }, { "epoch": 1.9541502243478703, "grad_norm": 0.025097843259572983, "learning_rate": 1.5989364936301877e-08, "loss": 0.0007, "step": 115630 }, { "epoch": 1.9543192246267207, "grad_norm": 0.0783858373761177, "learning_rate": 1.5871728970713873e-08, "loss": 0.0009, "step": 115640 }, { "epoch": 1.954488224905571, "grad_norm": 0.01838279701769352, "learning_rate": 1.5754526648434355e-08, "loss": 0.0002, "step": 115650 }, { "epoch": 1.9546572251844214, "grad_norm": 0.02446633391082287, "learning_rate": 1.563775797966016e-08, "loss": 0.0007, "step": 115660 }, { "epoch": 1.954826225463272, "grad_norm": 0.03329815715551376, "learning_rate": 1.5521422974550392e-08, "loss": 0.001, "step": 115670 }, { "epoch": 1.9549952257421226, "grad_norm": 0.01897369883954525, "learning_rate": 1.5405521643227505e-08, "loss": 0.001, "step": 115680 }, { "epoch": 1.955164226020973, "grad_norm": 0.011578774079680443, "learning_rate": 1.52900539957751e-08, "loss": 0.0009, "step": 115690 }, { "epoch": 1.9553332262998233, "grad_norm": 0.04486700892448425, "learning_rate": 1.5175020042239586e-08, "loss": 0.0007, "step": 115700 }, { "epoch": 1.955502226578674, "grad_norm": 0.012665933929383755, "learning_rate": 1.506041979262962e-08, "loss": 0.0006, "step": 115710 }, { "epoch": 1.9556712268575245, "grad_norm": 0.04158971831202507, "learning_rate": 1.4946253256915567e-08, "loss": 0.0005, "step": 115720 }, { "epoch": 1.9558402271363748, "grad_norm": 0.01785312034189701, "learning_rate": 1.4832520445030029e-08, "loss": 0.0005, "step": 115730 }, { "epoch": 1.9560092274152252, "grad_norm": 0.02986268885433674, "learning_rate": 1.4719221366869541e-08, "loss": 0.0005, "step": 115740 }, { "epoch": 1.9561782276940756, "grad_norm": 0.0688711553812027, "learning_rate": 1.4606356032290659e-08, "loss": 0.0005, "step": 115750 }, { "epoch": 1.9563472279729262, "grad_norm": 0.028957679867744446, "learning_rate": 1.449392445111386e-08, "loss": 0.0005, "step": 115760 }, { "epoch": 1.9565162282517767, "grad_norm": 0.004538863431662321, "learning_rate": 1.4381926633120769e-08, "loss": 0.0003, "step": 115770 }, { "epoch": 1.956685228530627, "grad_norm": 0.010118100792169571, "learning_rate": 1.4270362588055808e-08, "loss": 0.0006, "step": 115780 }, { "epoch": 1.9568542288094775, "grad_norm": 0.01838894560933113, "learning_rate": 1.4159232325626215e-08, "loss": 0.0018, "step": 115790 }, { "epoch": 1.957023229088328, "grad_norm": 0.03047986514866352, "learning_rate": 1.4048535855500366e-08, "loss": 0.0008, "step": 115800 }, { "epoch": 1.9571922293671784, "grad_norm": 0.02254408970475197, "learning_rate": 1.3938273187308892e-08, "loss": 0.0002, "step": 115810 }, { "epoch": 1.957361229646029, "grad_norm": 0.09422022104263306, "learning_rate": 1.3828444330645785e-08, "loss": 0.0008, "step": 115820 }, { "epoch": 1.9575302299248794, "grad_norm": 0.09769272804260254, "learning_rate": 1.3719049295066732e-08, "loss": 0.0008, "step": 115830 }, { "epoch": 1.9576992302037297, "grad_norm": 0.05070381611585617, "learning_rate": 1.3610088090089679e-08, "loss": 0.0005, "step": 115840 }, { "epoch": 1.9578682304825803, "grad_norm": 0.035554975271224976, "learning_rate": 1.3501560725194263e-08, "loss": 0.0005, "step": 115850 }, { "epoch": 1.9580372307614309, "grad_norm": 0.030171066522598267, "learning_rate": 1.339346720982293e-08, "loss": 0.0008, "step": 115860 }, { "epoch": 1.9582062310402812, "grad_norm": 0.025329411029815674, "learning_rate": 1.3285807553380937e-08, "loss": 0.0003, "step": 115870 }, { "epoch": 1.9583752313191316, "grad_norm": 0.022418124601244926, "learning_rate": 1.3178581765235232e-08, "loss": 0.001, "step": 115880 }, { "epoch": 1.9585442315979822, "grad_norm": 0.07500436156988144, "learning_rate": 1.3071789854713912e-08, "loss": 0.001, "step": 115890 }, { "epoch": 1.9587132318768326, "grad_norm": 0.046022024005651474, "learning_rate": 1.2965431831109542e-08, "loss": 0.0003, "step": 115900 }, { "epoch": 1.9588822321556831, "grad_norm": 0.011159485206007957, "learning_rate": 1.2859507703675279e-08, "loss": 0.0005, "step": 115910 }, { "epoch": 1.9590512324345335, "grad_norm": 0.03546522185206413, "learning_rate": 1.275401748162708e-08, "loss": 0.0003, "step": 115920 }, { "epoch": 1.9592202327133839, "grad_norm": 0.028044404461979866, "learning_rate": 1.264896117414316e-08, "loss": 0.0008, "step": 115930 }, { "epoch": 1.9593892329922344, "grad_norm": 0.010457886382937431, "learning_rate": 1.2544338790363986e-08, "loss": 0.0003, "step": 115940 }, { "epoch": 1.959558233271085, "grad_norm": 0.13008491694927216, "learning_rate": 1.2440150339392276e-08, "loss": 0.0015, "step": 115950 }, { "epoch": 1.9597272335499354, "grad_norm": 0.025522135198116302, "learning_rate": 1.2336395830292447e-08, "loss": 0.0008, "step": 115960 }, { "epoch": 1.9598962338287857, "grad_norm": 0.019671659916639328, "learning_rate": 1.223307527209172e-08, "loss": 0.0003, "step": 115970 }, { "epoch": 1.9600652341076361, "grad_norm": 0.2073049694299698, "learning_rate": 1.213018867378013e-08, "loss": 0.0009, "step": 115980 }, { "epoch": 1.9602342343864867, "grad_norm": 0.01109260879456997, "learning_rate": 1.2027736044308846e-08, "loss": 0.0006, "step": 115990 }, { "epoch": 1.9604032346653373, "grad_norm": 0.03754560649394989, "learning_rate": 1.1925717392591297e-08, "loss": 0.0006, "step": 116000 }, { "epoch": 1.9605722349441876, "grad_norm": 0.03922279179096222, "learning_rate": 1.1824132727504822e-08, "loss": 0.0017, "step": 116010 }, { "epoch": 1.960741235223038, "grad_norm": 0.0020950832404196262, "learning_rate": 1.17229820578868e-08, "loss": 0.0003, "step": 116020 }, { "epoch": 1.9609102355018886, "grad_norm": 0.02770181931555271, "learning_rate": 1.1622265392537967e-08, "loss": 0.0004, "step": 116030 }, { "epoch": 1.9610792357807392, "grad_norm": 0.0313149057328701, "learning_rate": 1.1521982740221316e-08, "loss": 0.0008, "step": 116040 }, { "epoch": 1.9612482360595895, "grad_norm": 0.08879216015338898, "learning_rate": 1.1422134109662642e-08, "loss": 0.0007, "step": 116050 }, { "epoch": 1.96141723633844, "grad_norm": 0.029225923120975494, "learning_rate": 1.1322719509547775e-08, "loss": 0.0007, "step": 116060 }, { "epoch": 1.9615862366172903, "grad_norm": 0.008420857600867748, "learning_rate": 1.1223738948527018e-08, "loss": 0.0006, "step": 116070 }, { "epoch": 1.9617552368961408, "grad_norm": 0.01581859588623047, "learning_rate": 1.1125192435212373e-08, "loss": 0.0001, "step": 116080 }, { "epoch": 1.9619242371749914, "grad_norm": 0.07278886437416077, "learning_rate": 1.1027079978177535e-08, "loss": 0.0007, "step": 116090 }, { "epoch": 1.9620932374538418, "grad_norm": 0.00021185188961680979, "learning_rate": 1.0929401585958454e-08, "loss": 0.0003, "step": 116100 }, { "epoch": 1.9622622377326921, "grad_norm": 0.015291017480194569, "learning_rate": 1.0832157267054999e-08, "loss": 0.001, "step": 116110 }, { "epoch": 1.9624312380115427, "grad_norm": 0.039209552109241486, "learning_rate": 1.0735347029925958e-08, "loss": 0.0003, "step": 116120 }, { "epoch": 1.9626002382903933, "grad_norm": 0.069135382771492, "learning_rate": 1.0638970882995704e-08, "loss": 0.0008, "step": 116130 }, { "epoch": 1.9627692385692437, "grad_norm": 0.04019022360444069, "learning_rate": 1.0543028834649194e-08, "loss": 0.0009, "step": 116140 }, { "epoch": 1.962938238848094, "grad_norm": 0.08860526233911514, "learning_rate": 1.0447520893233087e-08, "loss": 0.0006, "step": 116150 }, { "epoch": 1.9631072391269444, "grad_norm": 0.02146318554878235, "learning_rate": 1.0352447067057958e-08, "loss": 0.0011, "step": 116160 }, { "epoch": 1.963276239405795, "grad_norm": 0.04622725397348404, "learning_rate": 1.0257807364395522e-08, "loss": 0.0007, "step": 116170 }, { "epoch": 1.9634452396846456, "grad_norm": 0.015559851191937923, "learning_rate": 1.0163601793479194e-08, "loss": 0.0007, "step": 116180 }, { "epoch": 1.963614239963496, "grad_norm": 0.05115272477269173, "learning_rate": 1.0069830362506306e-08, "loss": 0.001, "step": 116190 }, { "epoch": 1.9637832402423463, "grad_norm": 0.010766013525426388, "learning_rate": 9.976493079634775e-09, "loss": 0.0004, "step": 116200 }, { "epoch": 1.9639522405211969, "grad_norm": 0.016246164217591286, "learning_rate": 9.883589952985884e-09, "loss": 0.0018, "step": 116210 }, { "epoch": 1.9641212408000475, "grad_norm": 0.06242336705327034, "learning_rate": 9.791120990642056e-09, "loss": 0.0005, "step": 116220 }, { "epoch": 1.9642902410788978, "grad_norm": 0.013016702607274055, "learning_rate": 9.699086200648522e-09, "loss": 0.0003, "step": 116230 }, { "epoch": 1.9644592413577482, "grad_norm": 0.017408093437552452, "learning_rate": 9.607485591013322e-09, "loss": 0.0011, "step": 116240 }, { "epoch": 1.9646282416365985, "grad_norm": 0.03357071802020073, "learning_rate": 9.516319169705635e-09, "loss": 0.0006, "step": 116250 }, { "epoch": 1.9647972419154491, "grad_norm": 0.020204557105898857, "learning_rate": 9.425586944658006e-09, "loss": 0.0003, "step": 116260 }, { "epoch": 1.9649662421942997, "grad_norm": 0.01886160299181938, "learning_rate": 9.335288923764118e-09, "loss": 0.0018, "step": 116270 }, { "epoch": 1.96513524247315, "grad_norm": 0.04038720577955246, "learning_rate": 9.245425114880469e-09, "loss": 0.0007, "step": 116280 }, { "epoch": 1.9653042427520004, "grad_norm": 0.07107829302549362, "learning_rate": 9.155995525825245e-09, "loss": 0.0013, "step": 116290 }, { "epoch": 1.965473243030851, "grad_norm": 0.016410810872912407, "learning_rate": 9.067000164380003e-09, "loss": 0.0004, "step": 116300 }, { "epoch": 1.9656422433097016, "grad_norm": 0.05313790962100029, "learning_rate": 8.978439038287435e-09, "loss": 0.0018, "step": 116310 }, { "epoch": 1.965811243588552, "grad_norm": 0.026425667107105255, "learning_rate": 8.89031215525249e-09, "loss": 0.0012, "step": 116320 }, { "epoch": 1.9659802438674023, "grad_norm": 0.015971403568983078, "learning_rate": 8.802619522942924e-09, "loss": 0.0008, "step": 116330 }, { "epoch": 1.9661492441462527, "grad_norm": 0.04588667303323746, "learning_rate": 8.715361148988188e-09, "loss": 0.0015, "step": 116340 }, { "epoch": 1.9663182444251033, "grad_norm": 0.08584762364625931, "learning_rate": 8.628537040980545e-09, "loss": 0.0004, "step": 116350 }, { "epoch": 1.9664872447039539, "grad_norm": 0.05310555920004845, "learning_rate": 8.54214720647395e-09, "loss": 0.0008, "step": 116360 }, { "epoch": 1.9666562449828042, "grad_norm": 0.04578254744410515, "learning_rate": 8.456191652984614e-09, "loss": 0.0007, "step": 116370 }, { "epoch": 1.9668252452616546, "grad_norm": 0.06566239148378372, "learning_rate": 8.370670387991553e-09, "loss": 0.0004, "step": 116380 }, { "epoch": 1.9669942455405052, "grad_norm": 0.009525866247713566, "learning_rate": 8.285583418934373e-09, "loss": 0.0012, "step": 116390 }, { "epoch": 1.9671632458193558, "grad_norm": 0.0021493479143828154, "learning_rate": 8.200930753217706e-09, "loss": 0.0008, "step": 116400 }, { "epoch": 1.9673322460982061, "grad_norm": 0.141135573387146, "learning_rate": 8.116712398205106e-09, "loss": 0.0014, "step": 116410 }, { "epoch": 1.9675012463770565, "grad_norm": 0.10668013244867325, "learning_rate": 8.032928361225156e-09, "loss": 0.0005, "step": 116420 }, { "epoch": 1.9676702466559068, "grad_norm": 0.029699115082621574, "learning_rate": 7.94957864956758e-09, "loss": 0.0009, "step": 116430 }, { "epoch": 1.9678392469347574, "grad_norm": 0.09455449134111404, "learning_rate": 7.866663270483243e-09, "loss": 0.0028, "step": 116440 }, { "epoch": 1.968008247213608, "grad_norm": 0.079514279961586, "learning_rate": 7.784182231186377e-09, "loss": 0.0006, "step": 116450 }, { "epoch": 1.9681772474924584, "grad_norm": 0.0032341405749320984, "learning_rate": 7.702135538853461e-09, "loss": 0.0006, "step": 116460 }, { "epoch": 1.9683462477713087, "grad_norm": 0.22825823724269867, "learning_rate": 7.620523200623786e-09, "loss": 0.0007, "step": 116470 }, { "epoch": 1.9685152480501593, "grad_norm": 0.029165804386138916, "learning_rate": 7.539345223596672e-09, "loss": 0.0006, "step": 116480 }, { "epoch": 1.96868424832901, "grad_norm": 0.022949257865548134, "learning_rate": 7.45860161483536e-09, "loss": 0.0008, "step": 116490 }, { "epoch": 1.9688532486078603, "grad_norm": 7.957002526381984e-05, "learning_rate": 7.378292381365338e-09, "loss": 0.0009, "step": 116500 }, { "epoch": 1.9690222488867106, "grad_norm": 0.0328722819685936, "learning_rate": 7.298417530173796e-09, "loss": 0.0011, "step": 116510 }, { "epoch": 1.969191249165561, "grad_norm": 0.02899502031505108, "learning_rate": 7.218977068210175e-09, "loss": 0.0006, "step": 116520 }, { "epoch": 1.9693602494444116, "grad_norm": 0.011906208470463753, "learning_rate": 7.13997100238617e-09, "loss": 0.0005, "step": 116530 }, { "epoch": 1.9695292497232622, "grad_norm": 0.0017242592293769121, "learning_rate": 7.06139933957517e-09, "loss": 0.0009, "step": 116540 }, { "epoch": 1.9696982500021125, "grad_norm": 0.020453786477446556, "learning_rate": 6.983262086613929e-09, "loss": 0.0004, "step": 116550 }, { "epoch": 1.9698672502809629, "grad_norm": 0.0002729600528255105, "learning_rate": 6.905559250300897e-09, "loss": 0.0003, "step": 116560 }, { "epoch": 1.9700362505598135, "grad_norm": 0.003292068839073181, "learning_rate": 6.8282908373962234e-09, "loss": 0.0003, "step": 116570 }, { "epoch": 1.970205250838664, "grad_norm": 0.041888050734996796, "learning_rate": 6.751456854622307e-09, "loss": 0.001, "step": 116580 }, { "epoch": 1.9703742511175144, "grad_norm": 0.009963915683329105, "learning_rate": 6.6750573086649116e-09, "loss": 0.0007, "step": 116590 }, { "epoch": 1.9705432513963648, "grad_norm": 0.019223950803279877, "learning_rate": 6.599092206170942e-09, "loss": 0.0004, "step": 116600 }, { "epoch": 1.9707122516752151, "grad_norm": 0.07051514834165573, "learning_rate": 6.523561553749558e-09, "loss": 0.0004, "step": 116610 }, { "epoch": 1.9708812519540657, "grad_norm": 0.06629837304353714, "learning_rate": 6.448465357971612e-09, "loss": 0.0007, "step": 116620 }, { "epoch": 1.9710502522329163, "grad_norm": 0.007716650143265724, "learning_rate": 6.373803625371877e-09, "loss": 0.0008, "step": 116630 }, { "epoch": 1.9712192525117667, "grad_norm": 0.1488904505968094, "learning_rate": 6.299576362445714e-09, "loss": 0.0011, "step": 116640 }, { "epoch": 1.971388252790617, "grad_norm": 0.19914045929908752, "learning_rate": 6.225783575651845e-09, "loss": 0.0013, "step": 116650 }, { "epoch": 1.9715572530694676, "grad_norm": 0.06798209995031357, "learning_rate": 6.152425271410134e-09, "loss": 0.0003, "step": 116660 }, { "epoch": 1.971726253348318, "grad_norm": 0.03254680335521698, "learning_rate": 6.079501456102699e-09, "loss": 0.0002, "step": 116670 }, { "epoch": 1.9718952536271686, "grad_norm": 0.023305783048272133, "learning_rate": 6.007012136075019e-09, "loss": 0.0004, "step": 116680 }, { "epoch": 1.972064253906019, "grad_norm": 0.0026318468153476715, "learning_rate": 5.934957317633716e-09, "loss": 0.0004, "step": 116690 }, { "epoch": 1.9722332541848693, "grad_norm": 0.04221843183040619, "learning_rate": 5.86333700704822e-09, "loss": 0.0009, "step": 116700 }, { "epoch": 1.9724022544637199, "grad_norm": 0.015916381031274796, "learning_rate": 5.7921512105491014e-09, "loss": 0.0002, "step": 116710 }, { "epoch": 1.9725712547425704, "grad_norm": 0.07472866773605347, "learning_rate": 5.721399934330851e-09, "loss": 0.001, "step": 116720 }, { "epoch": 1.9727402550214208, "grad_norm": 0.011363265104591846, "learning_rate": 5.6510831845485446e-09, "loss": 0.0002, "step": 116730 }, { "epoch": 1.9729092553002712, "grad_norm": 0.01561447698622942, "learning_rate": 5.581200967319511e-09, "loss": 0.0004, "step": 116740 }, { "epoch": 1.9730782555791218, "grad_norm": 0.0016669128090143204, "learning_rate": 5.511753288724997e-09, "loss": 0.0004, "step": 116750 }, { "epoch": 1.9732472558579721, "grad_norm": 0.050874121487140656, "learning_rate": 5.442740154806836e-09, "loss": 0.0003, "step": 116760 }, { "epoch": 1.9734162561368227, "grad_norm": 0.010174013674259186, "learning_rate": 5.374161571569114e-09, "loss": 0.0003, "step": 116770 }, { "epoch": 1.973585256415673, "grad_norm": 0.15439996123313904, "learning_rate": 5.30601754497928e-09, "loss": 0.0009, "step": 116780 }, { "epoch": 1.9737542566945234, "grad_norm": 0.030735179781913757, "learning_rate": 5.238308080965926e-09, "loss": 0.0002, "step": 116790 }, { "epoch": 1.973923256973374, "grad_norm": 0.009195013903081417, "learning_rate": 5.171033185419339e-09, "loss": 0.0005, "step": 116800 }, { "epoch": 1.9740922572522246, "grad_norm": 0.016738492995500565, "learning_rate": 5.1041928641937245e-09, "loss": 0.0006, "step": 116810 }, { "epoch": 1.974261257531075, "grad_norm": 0.12038344144821167, "learning_rate": 5.037787123104432e-09, "loss": 0.0007, "step": 116820 }, { "epoch": 1.9744302578099253, "grad_norm": 0.0054839253425598145, "learning_rate": 4.971815967928506e-09, "loss": 0.0007, "step": 116830 }, { "epoch": 1.974599258088776, "grad_norm": 0.0022869163658469915, "learning_rate": 4.9062794044058005e-09, "loss": 0.0005, "step": 116840 }, { "epoch": 1.9747682583676263, "grad_norm": 0.029526591300964355, "learning_rate": 4.841177438238975e-09, "loss": 0.0005, "step": 116850 }, { "epoch": 1.9749372586464768, "grad_norm": 0.049096524715423584, "learning_rate": 4.776510075091834e-09, "loss": 0.0004, "step": 116860 }, { "epoch": 1.9751062589253272, "grad_norm": 0.013877199031412601, "learning_rate": 4.712277320590431e-09, "loss": 0.0008, "step": 116870 }, { "epoch": 1.9752752592041776, "grad_norm": 0.028061717748641968, "learning_rate": 4.648479180323628e-09, "loss": 0.0005, "step": 116880 }, { "epoch": 1.9754442594830282, "grad_norm": 0.0018484432948753238, "learning_rate": 4.5851156598419875e-09, "loss": 0.0015, "step": 116890 }, { "epoch": 1.9756132597618787, "grad_norm": 0.05465136468410492, "learning_rate": 4.522186764659431e-09, "loss": 0.0019, "step": 116900 }, { "epoch": 1.975782260040729, "grad_norm": 0.07190567255020142, "learning_rate": 4.459692500249357e-09, "loss": 0.0003, "step": 116910 }, { "epoch": 1.9759512603195795, "grad_norm": 0.0014867663849145174, "learning_rate": 4.39763287205075e-09, "loss": 0.0002, "step": 116920 }, { "epoch": 1.9761202605984298, "grad_norm": 0.0075894673354923725, "learning_rate": 4.336007885461513e-09, "loss": 0.0013, "step": 116930 }, { "epoch": 1.9762892608772804, "grad_norm": 0.03782112896442413, "learning_rate": 4.274817545844579e-09, "loss": 0.0007, "step": 116940 }, { "epoch": 1.976458261156131, "grad_norm": 0.008285623043775558, "learning_rate": 4.214061858523466e-09, "loss": 0.0002, "step": 116950 }, { "epoch": 1.9766272614349814, "grad_norm": 0.03550859913229942, "learning_rate": 4.153740828783948e-09, "loss": 0.0003, "step": 116960 }, { "epoch": 1.9767962617138317, "grad_norm": 0.0036612313706427813, "learning_rate": 4.093854461874602e-09, "loss": 0.0006, "step": 116970 }, { "epoch": 1.9769652619926823, "grad_norm": 0.03043430857360363, "learning_rate": 4.034402763005707e-09, "loss": 0.0007, "step": 116980 }, { "epoch": 1.9771342622715329, "grad_norm": 0.06967907398939133, "learning_rate": 3.975385737349791e-09, "loss": 0.0012, "step": 116990 }, { "epoch": 1.9773032625503832, "grad_norm": 0.04638737067580223, "learning_rate": 3.916803390041079e-09, "loss": 0.0008, "step": 117000 }, { "epoch": 1.9774722628292336, "grad_norm": 0.0009102841722778976, "learning_rate": 3.858655726177718e-09, "loss": 0.0004, "step": 117010 }, { "epoch": 1.977641263108084, "grad_norm": 0.03580011427402496, "learning_rate": 3.800942750817882e-09, "loss": 0.0015, "step": 117020 }, { "epoch": 1.9778102633869346, "grad_norm": 0.013440757989883423, "learning_rate": 3.74366446898311e-09, "loss": 0.0004, "step": 117030 }, { "epoch": 1.9779792636657851, "grad_norm": 0.11920007318258286, "learning_rate": 3.686820885656639e-09, "loss": 0.0018, "step": 117040 }, { "epoch": 1.9781482639446355, "grad_norm": 0.0707445740699768, "learning_rate": 3.6304120057850666e-09, "loss": 0.0006, "step": 117050 }, { "epoch": 1.9783172642234859, "grad_norm": 0.030176930129528046, "learning_rate": 3.574437834275024e-09, "loss": 0.0003, "step": 117060 }, { "epoch": 1.9784862645023364, "grad_norm": 0.0009361015981994569, "learning_rate": 3.5188983759976148e-09, "loss": 0.001, "step": 117070 }, { "epoch": 1.978655264781187, "grad_norm": 0.006318709347397089, "learning_rate": 3.4637936357845294e-09, "loss": 0.0006, "step": 117080 }, { "epoch": 1.9788242650600374, "grad_norm": 0.009384984150528908, "learning_rate": 3.4091236184297104e-09, "loss": 0.0003, "step": 117090 }, { "epoch": 1.9789932653388878, "grad_norm": 0.0048936703242361546, "learning_rate": 3.3548883286904643e-09, "loss": 0.0006, "step": 117100 }, { "epoch": 1.9791622656177381, "grad_norm": 0.2528400421142578, "learning_rate": 3.3010877712857935e-09, "loss": 0.0007, "step": 117110 }, { "epoch": 1.9793312658965887, "grad_norm": 0.0070115034468472, "learning_rate": 3.2477219508952884e-09, "loss": 0.0003, "step": 117120 }, { "epoch": 1.9795002661754393, "grad_norm": 0.06373371928930283, "learning_rate": 3.1947908721630118e-09, "loss": 0.0007, "step": 117130 }, { "epoch": 1.9796692664542896, "grad_norm": 0.012733974494040012, "learning_rate": 3.1422945396936134e-09, "loss": 0.0006, "step": 117140 }, { "epoch": 1.97983826673314, "grad_norm": 0.015270589850842953, "learning_rate": 3.090232958055661e-09, "loss": 0.0006, "step": 117150 }, { "epoch": 1.9800072670119906, "grad_norm": 0.04006693512201309, "learning_rate": 3.0386061317777547e-09, "loss": 0.0003, "step": 117160 }, { "epoch": 1.9801762672908412, "grad_norm": 0.04597114399075508, "learning_rate": 2.987414065351857e-09, "loss": 0.0008, "step": 117170 }, { "epoch": 1.9803452675696915, "grad_norm": 0.05991408973932266, "learning_rate": 2.936656763232182e-09, "loss": 0.0009, "step": 117180 }, { "epoch": 1.980514267848542, "grad_norm": 0.0410887710750103, "learning_rate": 2.886334229834087e-09, "loss": 0.0006, "step": 117190 }, { "epoch": 1.9806832681273923, "grad_norm": 0.02065216936171055, "learning_rate": 2.8364464695374016e-09, "loss": 0.0005, "step": 117200 }, { "epoch": 1.9808522684062428, "grad_norm": 0.028431864455342293, "learning_rate": 2.786993486680878e-09, "loss": 0.0005, "step": 117210 }, { "epoch": 1.9810212686850934, "grad_norm": 0.0024671663995832205, "learning_rate": 2.73797528556885e-09, "loss": 0.0003, "step": 117220 }, { "epoch": 1.9811902689639438, "grad_norm": 0.08679312467575073, "learning_rate": 2.689391870464575e-09, "loss": 0.0005, "step": 117230 }, { "epoch": 1.9813592692427942, "grad_norm": 0.015164055861532688, "learning_rate": 2.6412432455957815e-09, "loss": 0.0007, "step": 117240 }, { "epoch": 1.9815282695216447, "grad_norm": 0.09785941988229752, "learning_rate": 2.5935294151518962e-09, "loss": 0.0004, "step": 117250 }, { "epoch": 1.9816972698004953, "grad_norm": 0.008396162651479244, "learning_rate": 2.5462503832834885e-09, "loss": 0.0006, "step": 117260 }, { "epoch": 1.9818662700793457, "grad_norm": 0.06484334170818329, "learning_rate": 2.499406154105044e-09, "loss": 0.0008, "step": 117270 }, { "epoch": 1.982035270358196, "grad_norm": 0.00378989614546299, "learning_rate": 2.452996731691637e-09, "loss": 0.0008, "step": 117280 }, { "epoch": 1.9822042706370464, "grad_norm": 0.02313057705760002, "learning_rate": 2.407022120080593e-09, "loss": 0.0003, "step": 117290 }, { "epoch": 1.982373270915897, "grad_norm": 0.019154351204633713, "learning_rate": 2.3614823232731566e-09, "loss": 0.001, "step": 117300 }, { "epoch": 1.9825422711947476, "grad_norm": 0.00604259455576539, "learning_rate": 2.3163773452306025e-09, "loss": 0.0003, "step": 117310 }, { "epoch": 1.982711271473598, "grad_norm": 0.002542471280321479, "learning_rate": 2.27170718987757e-09, "loss": 0.0006, "step": 117320 }, { "epoch": 1.9828802717524483, "grad_norm": 0.023002471774816513, "learning_rate": 2.2274718611009492e-09, "loss": 0.0002, "step": 117330 }, { "epoch": 1.9830492720312989, "grad_norm": 0.013058079406619072, "learning_rate": 2.1836713627487737e-09, "loss": 0.0009, "step": 117340 }, { "epoch": 1.9832182723101495, "grad_norm": 0.002443569479510188, "learning_rate": 2.1403056986318837e-09, "loss": 0.0005, "step": 117350 }, { "epoch": 1.9833872725889998, "grad_norm": 0.0016154100885614753, "learning_rate": 2.0973748725239276e-09, "loss": 0.0003, "step": 117360 }, { "epoch": 1.9835562728678502, "grad_norm": 0.1025247722864151, "learning_rate": 2.054878888159695e-09, "loss": 0.001, "step": 117370 }, { "epoch": 1.9837252731467006, "grad_norm": 0.03784005716443062, "learning_rate": 2.0128177492362288e-09, "loss": 0.0006, "step": 117380 }, { "epoch": 1.9838942734255511, "grad_norm": 0.0560203418135643, "learning_rate": 1.9711914594139347e-09, "loss": 0.0005, "step": 117390 }, { "epoch": 1.9840632737044017, "grad_norm": 0.08814750611782074, "learning_rate": 1.9300000223138047e-09, "loss": 0.0011, "step": 117400 }, { "epoch": 1.984232273983252, "grad_norm": 0.030366649851202965, "learning_rate": 1.889243441519639e-09, "loss": 0.0005, "step": 117410 }, { "epoch": 1.9844012742621024, "grad_norm": 0.010514308698475361, "learning_rate": 1.8489217205780453e-09, "loss": 0.0004, "step": 117420 }, { "epoch": 1.984570274540953, "grad_norm": 0.02470279298722744, "learning_rate": 1.8090348629967724e-09, "loss": 0.0006, "step": 117430 }, { "epoch": 1.9847392748198036, "grad_norm": 0.033508431166410446, "learning_rate": 1.769582872245823e-09, "loss": 0.0005, "step": 117440 }, { "epoch": 1.984908275098654, "grad_norm": 0.030686290934681892, "learning_rate": 1.7305657517585616e-09, "loss": 0.0003, "step": 117450 }, { "epoch": 1.9850772753775043, "grad_norm": 0.051201045513153076, "learning_rate": 1.6919835049294952e-09, "loss": 0.0007, "step": 117460 }, { "epoch": 1.9852462756563547, "grad_norm": 0.07473219931125641, "learning_rate": 1.653836135114828e-09, "loss": 0.0004, "step": 117470 }, { "epoch": 1.9854152759352053, "grad_norm": 0.08911773562431335, "learning_rate": 1.6161236456341268e-09, "loss": 0.0006, "step": 117480 }, { "epoch": 1.9855842762140559, "grad_norm": 0.03183027356863022, "learning_rate": 1.5788460397686555e-09, "loss": 0.0005, "step": 117490 }, { "epoch": 1.9857532764929062, "grad_norm": 0.01687448099255562, "learning_rate": 1.542003320760821e-09, "loss": 0.0007, "step": 117500 }, { "epoch": 1.9859222767717566, "grad_norm": 0.0014734736178070307, "learning_rate": 1.505595491817502e-09, "loss": 0.0007, "step": 117510 }, { "epoch": 1.9860912770506072, "grad_norm": 0.011509898118674755, "learning_rate": 1.4696225561050548e-09, "loss": 0.0004, "step": 117520 }, { "epoch": 1.9862602773294578, "grad_norm": 0.19320784509181976, "learning_rate": 1.4340845167543082e-09, "loss": 0.002, "step": 117530 }, { "epoch": 1.9864292776083081, "grad_norm": 0.09815225750207901, "learning_rate": 1.3989813768566785e-09, "loss": 0.0003, "step": 117540 }, { "epoch": 1.9865982778871585, "grad_norm": 0.002282196655869484, "learning_rate": 1.3643131394663888e-09, "loss": 0.0001, "step": 117550 }, { "epoch": 1.9867672781660088, "grad_norm": 0.0062440186738967896, "learning_rate": 1.3300798075993604e-09, "loss": 0.0007, "step": 117560 }, { "epoch": 1.9869362784448594, "grad_norm": 0.08370395749807358, "learning_rate": 1.2962813842354316e-09, "loss": 0.001, "step": 117570 }, { "epoch": 1.98710527872371, "grad_norm": 0.014962395653128624, "learning_rate": 1.2629178723133628e-09, "loss": 0.0003, "step": 117580 }, { "epoch": 1.9872742790025604, "grad_norm": 0.021809512749314308, "learning_rate": 1.2299892747374975e-09, "loss": 0.0004, "step": 117590 }, { "epoch": 1.9874432792814107, "grad_norm": 0.06284677982330322, "learning_rate": 1.197495594372211e-09, "loss": 0.0003, "step": 117600 }, { "epoch": 1.9876122795602613, "grad_norm": 0.16132612526416779, "learning_rate": 1.1654368340441313e-09, "loss": 0.0007, "step": 117610 }, { "epoch": 1.9877812798391117, "grad_norm": 0.00846133567392826, "learning_rate": 1.1338129965432488e-09, "loss": 0.0003, "step": 117620 }, { "epoch": 1.9879502801179623, "grad_norm": 0.07285846769809723, "learning_rate": 1.1026240846206959e-09, "loss": 0.0005, "step": 117630 }, { "epoch": 1.9881192803968126, "grad_norm": 0.03198720142245293, "learning_rate": 1.071870100989858e-09, "loss": 0.0005, "step": 117640 }, { "epoch": 1.988288280675663, "grad_norm": 0.013117395341396332, "learning_rate": 1.0415510483269276e-09, "loss": 0.0002, "step": 117650 }, { "epoch": 1.9884572809545136, "grad_norm": 0.0321161225438118, "learning_rate": 1.0116669292692393e-09, "loss": 0.0019, "step": 117660 }, { "epoch": 1.9886262812333642, "grad_norm": 0.002161074196919799, "learning_rate": 9.82217746417491e-10, "loss": 0.0009, "step": 117670 }, { "epoch": 1.9887952815122145, "grad_norm": 0.025001564994454384, "learning_rate": 9.532035023335219e-10, "loss": 0.0024, "step": 117680 }, { "epoch": 1.9889642817910649, "grad_norm": 0.04951312392950058, "learning_rate": 9.246241995419791e-10, "loss": 0.0003, "step": 117690 }, { "epoch": 1.9891332820699155, "grad_norm": 0.05861971899867058, "learning_rate": 8.964798405292074e-10, "loss": 0.0005, "step": 117700 }, { "epoch": 1.9893022823487658, "grad_norm": 0.04036771133542061, "learning_rate": 8.687704277432485e-10, "loss": 0.0007, "step": 117710 }, { "epoch": 1.9894712826276164, "grad_norm": 0.0003445586480665952, "learning_rate": 8.414959635960618e-10, "loss": 0.0006, "step": 117720 }, { "epoch": 1.9896402829064668, "grad_norm": 0.09235519915819168, "learning_rate": 8.146564504601939e-10, "loss": 0.0007, "step": 117730 }, { "epoch": 1.9898092831853171, "grad_norm": 0.07501112669706345, "learning_rate": 7.882518906709991e-10, "loss": 0.0006, "step": 117740 }, { "epoch": 1.9899782834641677, "grad_norm": 0.009308923967182636, "learning_rate": 7.622822865255286e-10, "loss": 0.0012, "step": 117750 }, { "epoch": 1.9901472837430183, "grad_norm": 0.059830378741025925, "learning_rate": 7.367476402830864e-10, "loss": 0.0009, "step": 117760 }, { "epoch": 1.9903162840218687, "grad_norm": 0.015614648349583149, "learning_rate": 7.116479541657839e-10, "loss": 0.0003, "step": 117770 }, { "epoch": 1.990485284300719, "grad_norm": 0.06734458357095718, "learning_rate": 6.869832303574298e-10, "loss": 0.0006, "step": 117780 }, { "epoch": 1.9906542845795696, "grad_norm": 0.006938898470252752, "learning_rate": 6.627534710035299e-10, "loss": 0.0024, "step": 117790 }, { "epoch": 1.99082328485842, "grad_norm": 0.036555707454681396, "learning_rate": 6.38958678212398e-10, "loss": 0.0009, "step": 117800 }, { "epoch": 1.9909922851372706, "grad_norm": 0.07078488916158676, "learning_rate": 6.155988540545999e-10, "loss": 0.0009, "step": 117810 }, { "epoch": 1.991161285416121, "grad_norm": 0.004905702080577612, "learning_rate": 5.926740005618436e-10, "loss": 0.001, "step": 117820 }, { "epoch": 1.9913302856949713, "grad_norm": 0.11632024496793747, "learning_rate": 5.701841197297553e-10, "loss": 0.0013, "step": 117830 }, { "epoch": 1.9914992859738219, "grad_norm": 0.010307352989912033, "learning_rate": 5.481292135139926e-10, "loss": 0.0004, "step": 117840 }, { "epoch": 1.9916682862526724, "grad_norm": 0.1011987254023552, "learning_rate": 5.265092838335762e-10, "loss": 0.0007, "step": 117850 }, { "epoch": 1.9918372865315228, "grad_norm": 0.00017056192154996097, "learning_rate": 5.053243325703339e-10, "loss": 0.0006, "step": 117860 }, { "epoch": 1.9920062868103732, "grad_norm": 0.00032645714236423373, "learning_rate": 4.84574361567236e-10, "loss": 0.0003, "step": 117870 }, { "epoch": 1.9921752870892235, "grad_norm": 0.006950442213565111, "learning_rate": 4.6425937262895017e-10, "loss": 0.0008, "step": 117880 }, { "epoch": 1.9923442873680741, "grad_norm": 0.019113315269351006, "learning_rate": 4.443793675235064e-10, "loss": 0.0005, "step": 117890 }, { "epoch": 1.9925132876469247, "grad_norm": 0.01378389447927475, "learning_rate": 4.2493434798007715e-10, "loss": 0.0006, "step": 117900 }, { "epoch": 1.992682287925775, "grad_norm": 0.03983868658542633, "learning_rate": 4.059243156911974e-10, "loss": 0.0005, "step": 117910 }, { "epoch": 1.9928512882046254, "grad_norm": 0.017251068726181984, "learning_rate": 3.873492723105443e-10, "loss": 0.0003, "step": 117920 }, { "epoch": 1.993020288483476, "grad_norm": 0.0006921543390490115, "learning_rate": 3.692092194540475e-10, "loss": 0.0006, "step": 117930 }, { "epoch": 1.9931892887623266, "grad_norm": 0.08097910135984421, "learning_rate": 3.515041587004442e-10, "loss": 0.0007, "step": 117940 }, { "epoch": 1.993358289041177, "grad_norm": 0.01713497005403042, "learning_rate": 3.342340915896136e-10, "loss": 0.0005, "step": 117950 }, { "epoch": 1.9935272893200273, "grad_norm": 0.018909644335508347, "learning_rate": 3.173990196242427e-10, "loss": 0.0011, "step": 117960 }, { "epoch": 1.9936962895988777, "grad_norm": 0.0007610021275468171, "learning_rate": 3.0099894426927066e-10, "loss": 0.0013, "step": 117970 }, { "epoch": 1.9938652898777283, "grad_norm": 0.0057690683752298355, "learning_rate": 2.850338669518893e-10, "loss": 0.0005, "step": 117980 }, { "epoch": 1.9940342901565788, "grad_norm": 0.0039121732115745544, "learning_rate": 2.695037890604324e-10, "loss": 0.0004, "step": 117990 }, { "epoch": 1.9942032904354292, "grad_norm": 0.05107155814766884, "learning_rate": 2.5440871194604144e-10, "loss": 0.0006, "step": 118000 }, { "epoch": 1.9943722907142796, "grad_norm": 0.058372240513563156, "learning_rate": 2.3974863692266537e-10, "loss": 0.0009, "step": 118010 }, { "epoch": 1.9945412909931302, "grad_norm": 0.03363621607422829, "learning_rate": 2.255235652653953e-10, "loss": 0.0005, "step": 118020 }, { "epoch": 1.9947102912719807, "grad_norm": 0.015248659998178482, "learning_rate": 2.1173349821268508e-10, "loss": 0.0005, "step": 118030 }, { "epoch": 1.994879291550831, "grad_norm": 0.10280773043632507, "learning_rate": 1.9837843696302039e-10, "loss": 0.002, "step": 118040 }, { "epoch": 1.9950482918296815, "grad_norm": 0.07708601653575897, "learning_rate": 1.854583826793599e-10, "loss": 0.0013, "step": 118050 }, { "epoch": 1.9952172921085318, "grad_norm": 0.02261054702103138, "learning_rate": 1.7297333648524928e-10, "loss": 0.0032, "step": 118060 }, { "epoch": 1.9953862923873824, "grad_norm": 0.026327621191740036, "learning_rate": 1.6092329946704178e-10, "loss": 0.0008, "step": 118070 }, { "epoch": 1.995555292666233, "grad_norm": 0.019617615267634392, "learning_rate": 1.4930827267389814e-10, "loss": 0.0003, "step": 118080 }, { "epoch": 1.9957242929450834, "grad_norm": 0.11332027614116669, "learning_rate": 1.3812825711556622e-10, "loss": 0.0008, "step": 118090 }, { "epoch": 1.9958932932239337, "grad_norm": 0.01800505258142948, "learning_rate": 1.2738325376460136e-10, "loss": 0.0004, "step": 118100 }, { "epoch": 1.9960622935027843, "grad_norm": 0.0007165533606894314, "learning_rate": 1.170732635563665e-10, "loss": 0.0007, "step": 118110 }, { "epoch": 1.9962312937816349, "grad_norm": 0.19624696671962738, "learning_rate": 1.0719828738792182e-10, "loss": 0.0006, "step": 118120 }, { "epoch": 1.9964002940604852, "grad_norm": 0.03249282389879227, "learning_rate": 9.775832611802483e-11, "loss": 0.0006, "step": 118130 }, { "epoch": 1.9965692943393356, "grad_norm": 0.04424285143613815, "learning_rate": 8.875338056824057e-11, "loss": 0.0003, "step": 118140 }, { "epoch": 1.996738294618186, "grad_norm": 0.0007160080131143332, "learning_rate": 8.018345152238649e-11, "loss": 0.0008, "step": 118150 }, { "epoch": 1.9969072948970366, "grad_norm": 0.08032648265361786, "learning_rate": 7.204853972542225e-11, "loss": 0.0005, "step": 118160 }, { "epoch": 1.9970762951758871, "grad_norm": 0.014756135642528534, "learning_rate": 6.434864588567013e-11, "loss": 0.0007, "step": 118170 }, { "epoch": 1.9972452954547375, "grad_norm": 0.05486934632062912, "learning_rate": 5.708377067259463e-11, "loss": 0.0013, "step": 118180 }, { "epoch": 1.9974142957335879, "grad_norm": 0.07138478010892868, "learning_rate": 5.0253914718467746e-11, "loss": 0.0018, "step": 118190 }, { "epoch": 1.9975832960124384, "grad_norm": 0.03233276307582855, "learning_rate": 4.385907861781391e-11, "loss": 0.0003, "step": 118200 }, { "epoch": 1.997752296291289, "grad_norm": 0.05701679736375809, "learning_rate": 3.789926292685486e-11, "loss": 0.0008, "step": 118210 }, { "epoch": 1.9979212965701394, "grad_norm": 0.15732277929782867, "learning_rate": 3.237446816350964e-11, "loss": 0.0004, "step": 118220 }, { "epoch": 1.9980902968489898, "grad_norm": 0.02079329639673233, "learning_rate": 2.7284694809615042e-11, "loss": 0.001, "step": 118230 }, { "epoch": 1.9982592971278401, "grad_norm": 0.04122069850564003, "learning_rate": 2.2629943307039827e-11, "loss": 0.001, "step": 118240 }, { "epoch": 1.9984282974066907, "grad_norm": 0.01309211365878582, "learning_rate": 1.8410214061015396e-11, "loss": 0.0003, "step": 118250 }, { "epoch": 1.9985972976855413, "grad_norm": 0.09661834686994553, "learning_rate": 1.4625507439025577e-11, "loss": 0.0009, "step": 118260 }, { "epoch": 1.9987662979643916, "grad_norm": 0.16295121610164642, "learning_rate": 1.1275823770251493e-11, "loss": 0.0006, "step": 118270 }, { "epoch": 1.998935298243242, "grad_norm": 0.011920265853404999, "learning_rate": 8.361163345571577e-12, "loss": 0.0012, "step": 118280 }, { "epoch": 1.9991042985220926, "grad_norm": 0.009564779698848724, "learning_rate": 5.881526419226902e-12, "loss": 0.0012, "step": 118290 }, { "epoch": 1.9992732988009432, "grad_norm": 0.05826535448431969, "learning_rate": 3.836913206600734e-12, "loss": 0.0008, "step": 118300 }, { "epoch": 1.9994422990797935, "grad_norm": 0.009812925010919571, "learning_rate": 2.2273238858838697e-12, "loss": 0.0007, "step": 118310 }, { "epoch": 1.999611299358644, "grad_norm": 0.0485992468893528, "learning_rate": 1.0527585964092979e-12, "loss": 0.0004, "step": 118320 }, { "epoch": 1.9997802996374943, "grad_norm": 0.030909573659300804, "learning_rate": 3.1321744142776e-13, "loss": 0.0006, "step": 118330 }, { "epoch": 1.9999492999163448, "grad_norm": 0.021162638440728188, "learning_rate": 8.700484777079965e-15, "loss": 0.0006, "step": 118340 }, { "epoch": 1.9999830999721149, "step": 118342, "total_flos": 1.4764570273982185e+19, "train_loss": 0.003653812557283815, "train_runtime": 130211.2924, "train_samples_per_second": 7.271, "train_steps_per_second": 0.909 } ], "logging_steps": 10, "max_steps": 118342, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 62000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4764570273982185e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }