{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2219753360737697, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008887901344295079, "grad_norm": 0.8810775279998779, "learning_rate": 0.0, "loss": 1.9172, "step": 1 }, { "epoch": 0.0017775802688590157, "grad_norm": 0.7469067573547363, "learning_rate": 2.1533827903669654e-05, "loss": 1.8244, "step": 2 }, { "epoch": 0.0026663704032885236, "grad_norm": 0.6811410784721375, "learning_rate": 3.413030972429927e-05, "loss": 1.926, "step": 3 }, { "epoch": 0.0035551605377180315, "grad_norm": 0.5220848321914673, "learning_rate": 4.306765580733931e-05, "loss": 1.8387, "step": 4 }, { "epoch": 0.004443950672147539, "grad_norm": 0.5415656566619873, "learning_rate": 5e-05, "loss": 1.8832, "step": 5 }, { "epoch": 0.005332740806577047, "grad_norm": 0.3646649718284607, "learning_rate": 5.5664137627968925e-05, "loss": 1.8791, "step": 6 }, { "epoch": 0.006221530941006555, "grad_norm": 0.42480552196502686, "learning_rate": 6.0453097756108376e-05, "loss": 1.7676, "step": 7 }, { "epoch": 0.007110321075436063, "grad_norm": 0.38259413838386536, "learning_rate": 6.460148371100896e-05, "loss": 1.7888, "step": 8 }, { "epoch": 0.00799911120986557, "grad_norm": 0.3880155086517334, "learning_rate": 6.826061944859854e-05, "loss": 1.7697, "step": 9 }, { "epoch": 0.008887901344295079, "grad_norm": 0.5344939231872559, "learning_rate": 7.153382790366967e-05, "loss": 1.6064, "step": 10 }, { "epoch": 0.009776691478724587, "grad_norm": 0.42493271827697754, "learning_rate": 7.449480512024892e-05, "loss": 1.7261, "step": 11 }, { "epoch": 0.010665481613154094, "grad_norm": 0.49020832777023315, "learning_rate": 7.719796553163858e-05, "loss": 1.7062, "step": 12 }, { "epoch": 0.011554271747583602, "grad_norm": 0.5008671283721924, "learning_rate": 7.968463205835412e-05, "loss": 1.7161, "step": 13 }, { "epoch": 0.01244306188201311, "grad_norm": 0.6378142833709717, "learning_rate": 8.198692565977803e-05, "loss": 1.5226, "step": 14 }, { "epoch": 0.013331852016442618, "grad_norm": 0.6101044416427612, "learning_rate": 8.413030972429928e-05, "loss": 1.5718, "step": 15 }, { "epoch": 0.014220642150872126, "grad_norm": 0.4299439787864685, "learning_rate": 8.613531161467861e-05, "loss": 1.7133, "step": 16 }, { "epoch": 0.015109432285301634, "grad_norm": 0.48655468225479126, "learning_rate": 8.80187213861294e-05, "loss": 1.5746, "step": 17 }, { "epoch": 0.01599822241973114, "grad_norm": 0.577294647693634, "learning_rate": 8.979444735226819e-05, "loss": 1.4959, "step": 18 }, { "epoch": 0.016887012554160648, "grad_norm": 0.5080670714378357, "learning_rate": 9.147414002175752e-05, "loss": 1.4947, "step": 19 }, { "epoch": 0.017775802688590157, "grad_norm": 0.6289512515068054, "learning_rate": 9.306765580733931e-05, "loss": 1.5576, "step": 20 }, { "epoch": 0.018664592823019664, "grad_norm": 0.5262102484703064, "learning_rate": 9.458340748040766e-05, "loss": 1.5228, "step": 21 }, { "epoch": 0.019553382957449173, "grad_norm": 0.4739653170108795, "learning_rate": 9.602863302391859e-05, "loss": 1.5751, "step": 22 }, { "epoch": 0.02044217309187868, "grad_norm": 0.4563744068145752, "learning_rate": 9.740960467331899e-05, "loss": 1.4833, "step": 23 }, { "epoch": 0.02133096322630819, "grad_norm": 0.5045209527015686, "learning_rate": 9.873179343530825e-05, "loss": 1.5092, "step": 24 }, { "epoch": 0.022219753360737695, "grad_norm": 0.3677147328853607, "learning_rate": 0.0001, "loss": 1.5253, "step": 25 }, { "epoch": 0.023108543495167205, "grad_norm": 0.5037134289741516, "learning_rate": 0.0001, "loss": 1.4579, "step": 26 }, { "epoch": 0.02399733362959671, "grad_norm": 0.5134053230285645, "learning_rate": 0.0001, "loss": 1.5509, "step": 27 }, { "epoch": 0.02488612376402622, "grad_norm": 0.4475038945674896, "learning_rate": 0.0001, "loss": 1.4419, "step": 28 }, { "epoch": 0.025774913898455726, "grad_norm": 0.4869753122329712, "learning_rate": 0.0001, "loss": 1.4236, "step": 29 }, { "epoch": 0.026663704032885236, "grad_norm": 0.3995779752731323, "learning_rate": 0.0001, "loss": 1.4746, "step": 30 }, { "epoch": 0.027552494167314742, "grad_norm": 0.43385857343673706, "learning_rate": 0.0001, "loss": 1.4692, "step": 31 }, { "epoch": 0.028441284301744252, "grad_norm": 0.46782711148262024, "learning_rate": 0.0001, "loss": 1.414, "step": 32 }, { "epoch": 0.029330074436173758, "grad_norm": 0.5026598572731018, "learning_rate": 0.0001, "loss": 1.348, "step": 33 }, { "epoch": 0.030218864570603268, "grad_norm": 0.3947531580924988, "learning_rate": 0.0001, "loss": 1.325, "step": 34 }, { "epoch": 0.031107654705032774, "grad_norm": 0.4685068428516388, "learning_rate": 0.0001, "loss": 1.3592, "step": 35 }, { "epoch": 0.03199644483946228, "grad_norm": 0.5283563733100891, "learning_rate": 0.0001, "loss": 1.4594, "step": 36 }, { "epoch": 0.03288523497389179, "grad_norm": 0.48610830307006836, "learning_rate": 0.0001, "loss": 1.4441, "step": 37 }, { "epoch": 0.033774025108321296, "grad_norm": 0.46293529868125916, "learning_rate": 0.0001, "loss": 1.3483, "step": 38 }, { "epoch": 0.034662815242750805, "grad_norm": 0.5796862244606018, "learning_rate": 0.0001, "loss": 1.3338, "step": 39 }, { "epoch": 0.035551605377180315, "grad_norm": 0.5200364589691162, "learning_rate": 0.0001, "loss": 1.3805, "step": 40 }, { "epoch": 0.036440395511609824, "grad_norm": 0.4546230137348175, "learning_rate": 0.0001, "loss": 1.3318, "step": 41 }, { "epoch": 0.03732918564603933, "grad_norm": 0.5082724094390869, "learning_rate": 0.0001, "loss": 1.2617, "step": 42 }, { "epoch": 0.03821797578046884, "grad_norm": 0.4639153480529785, "learning_rate": 0.0001, "loss": 1.2998, "step": 43 }, { "epoch": 0.039106765914898346, "grad_norm": 0.6027429699897766, "learning_rate": 0.0001, "loss": 1.3551, "step": 44 }, { "epoch": 0.039995556049327856, "grad_norm": 0.4588245451450348, "learning_rate": 0.0001, "loss": 1.297, "step": 45 }, { "epoch": 0.04088434618375736, "grad_norm": 0.49096405506134033, "learning_rate": 0.0001, "loss": 1.3503, "step": 46 }, { "epoch": 0.04177313631818687, "grad_norm": 0.5192633867263794, "learning_rate": 0.0001, "loss": 1.3139, "step": 47 }, { "epoch": 0.04266192645261638, "grad_norm": 0.5095855593681335, "learning_rate": 0.0001, "loss": 1.2607, "step": 48 }, { "epoch": 0.04355071658704588, "grad_norm": 0.5086390376091003, "learning_rate": 0.0001, "loss": 1.294, "step": 49 }, { "epoch": 0.04443950672147539, "grad_norm": 0.5870110988616943, "learning_rate": 0.0001, "loss": 1.3086, "step": 50 }, { "epoch": 0.0453282968559049, "grad_norm": 0.5581070184707642, "learning_rate": 0.0001, "loss": 1.2506, "step": 51 }, { "epoch": 0.04621708699033441, "grad_norm": 0.5818192362785339, "learning_rate": 0.0001, "loss": 1.3169, "step": 52 }, { "epoch": 0.04710587712476391, "grad_norm": 0.612249493598938, "learning_rate": 0.0001, "loss": 1.2275, "step": 53 }, { "epoch": 0.04799466725919342, "grad_norm": 0.712792158126831, "learning_rate": 0.0001, "loss": 1.2525, "step": 54 }, { "epoch": 0.04888345739362293, "grad_norm": 0.6237956285476685, "learning_rate": 0.0001, "loss": 1.21, "step": 55 }, { "epoch": 0.04977224752805244, "grad_norm": 0.5647529363632202, "learning_rate": 0.0001, "loss": 1.2615, "step": 56 }, { "epoch": 0.05066103766248194, "grad_norm": 0.5913922786712646, "learning_rate": 0.0001, "loss": 1.227, "step": 57 }, { "epoch": 0.05154982779691145, "grad_norm": 0.5439374446868896, "learning_rate": 0.0001, "loss": 1.2046, "step": 58 }, { "epoch": 0.05243861793134096, "grad_norm": 0.4831898510456085, "learning_rate": 0.0001, "loss": 1.2106, "step": 59 }, { "epoch": 0.05332740806577047, "grad_norm": 0.47741156816482544, "learning_rate": 0.0001, "loss": 1.2144, "step": 60 }, { "epoch": 0.054216198200199975, "grad_norm": 0.4893709123134613, "learning_rate": 0.0001, "loss": 1.1871, "step": 61 }, { "epoch": 0.055104988334629484, "grad_norm": 0.5424261689186096, "learning_rate": 0.0001, "loss": 1.238, "step": 62 }, { "epoch": 0.055993778469058994, "grad_norm": 0.47771304845809937, "learning_rate": 0.0001, "loss": 1.2568, "step": 63 }, { "epoch": 0.056882568603488504, "grad_norm": 0.6287993788719177, "learning_rate": 0.0001, "loss": 1.2713, "step": 64 }, { "epoch": 0.057771358737918006, "grad_norm": 0.5192376971244812, "learning_rate": 0.0001, "loss": 1.1294, "step": 65 }, { "epoch": 0.058660148872347516, "grad_norm": 0.5453503131866455, "learning_rate": 0.0001, "loss": 1.1706, "step": 66 }, { "epoch": 0.059548939006777026, "grad_norm": 0.44728267192840576, "learning_rate": 0.0001, "loss": 1.2275, "step": 67 }, { "epoch": 0.060437729141206535, "grad_norm": 0.561209499835968, "learning_rate": 0.0001, "loss": 1.1971, "step": 68 }, { "epoch": 0.06132651927563604, "grad_norm": 0.518390417098999, "learning_rate": 0.0001, "loss": 1.2264, "step": 69 }, { "epoch": 0.06221530941006555, "grad_norm": 0.5499495267868042, "learning_rate": 0.0001, "loss": 1.1627, "step": 70 }, { "epoch": 0.06310409954449506, "grad_norm": 0.46714553236961365, "learning_rate": 0.0001, "loss": 1.1767, "step": 71 }, { "epoch": 0.06399288967892457, "grad_norm": 0.5339136123657227, "learning_rate": 0.0001, "loss": 1.129, "step": 72 }, { "epoch": 0.06488167981335408, "grad_norm": 0.6434009671211243, "learning_rate": 0.0001, "loss": 1.213, "step": 73 }, { "epoch": 0.06577046994778359, "grad_norm": 0.6940732002258301, "learning_rate": 0.0001, "loss": 1.1562, "step": 74 }, { "epoch": 0.06665926008221308, "grad_norm": 0.5630552768707275, "learning_rate": 0.0001, "loss": 1.208, "step": 75 }, { "epoch": 0.06754805021664259, "grad_norm": 0.561046838760376, "learning_rate": 0.0001, "loss": 1.168, "step": 76 }, { "epoch": 0.0684368403510721, "grad_norm": 0.4617985486984253, "learning_rate": 0.0001, "loss": 1.1431, "step": 77 }, { "epoch": 0.06932563048550161, "grad_norm": 0.5184434652328491, "learning_rate": 0.0001, "loss": 1.1934, "step": 78 }, { "epoch": 0.07021442061993112, "grad_norm": 0.6089707016944885, "learning_rate": 0.0001, "loss": 1.2368, "step": 79 }, { "epoch": 0.07110321075436063, "grad_norm": 0.5018100738525391, "learning_rate": 0.0001, "loss": 1.1722, "step": 80 }, { "epoch": 0.07199200088879014, "grad_norm": 0.49356648325920105, "learning_rate": 0.0001, "loss": 1.1999, "step": 81 }, { "epoch": 0.07288079102321965, "grad_norm": 0.544902503490448, "learning_rate": 0.0001, "loss": 1.1496, "step": 82 }, { "epoch": 0.07376958115764914, "grad_norm": 0.534565806388855, "learning_rate": 0.0001, "loss": 1.0753, "step": 83 }, { "epoch": 0.07465837129207865, "grad_norm": 0.6407784223556519, "learning_rate": 0.0001, "loss": 1.1728, "step": 84 }, { "epoch": 0.07554716142650816, "grad_norm": 0.619192361831665, "learning_rate": 0.0001, "loss": 1.179, "step": 85 }, { "epoch": 0.07643595156093767, "grad_norm": 0.5248496532440186, "learning_rate": 0.0001, "loss": 1.1668, "step": 86 }, { "epoch": 0.07732474169536718, "grad_norm": 0.5901935696601868, "learning_rate": 0.0001, "loss": 1.1634, "step": 87 }, { "epoch": 0.07821353182979669, "grad_norm": 0.5717945694923401, "learning_rate": 0.0001, "loss": 1.046, "step": 88 }, { "epoch": 0.0791023219642262, "grad_norm": 0.5313799381256104, "learning_rate": 0.0001, "loss": 1.1541, "step": 89 }, { "epoch": 0.07999111209865571, "grad_norm": 0.5816580057144165, "learning_rate": 0.0001, "loss": 1.1505, "step": 90 }, { "epoch": 0.08087990223308521, "grad_norm": 0.6545852422714233, "learning_rate": 0.0001, "loss": 1.1687, "step": 91 }, { "epoch": 0.08176869236751472, "grad_norm": 0.5649644136428833, "learning_rate": 0.0001, "loss": 1.2657, "step": 92 }, { "epoch": 0.08265748250194423, "grad_norm": 0.5403311848640442, "learning_rate": 0.0001, "loss": 1.1041, "step": 93 }, { "epoch": 0.08354627263637374, "grad_norm": 0.6377498507499695, "learning_rate": 0.0001, "loss": 1.1621, "step": 94 }, { "epoch": 0.08443506277080325, "grad_norm": 0.5470308065414429, "learning_rate": 0.0001, "loss": 1.0845, "step": 95 }, { "epoch": 0.08532385290523276, "grad_norm": 0.4540124833583832, "learning_rate": 0.0001, "loss": 1.1723, "step": 96 }, { "epoch": 0.08621264303966227, "grad_norm": 0.49754512310028076, "learning_rate": 0.0001, "loss": 1.165, "step": 97 }, { "epoch": 0.08710143317409176, "grad_norm": 0.5652042031288147, "learning_rate": 0.0001, "loss": 1.0729, "step": 98 }, { "epoch": 0.08799022330852127, "grad_norm": 0.5990537405014038, "learning_rate": 0.0001, "loss": 1.1192, "step": 99 }, { "epoch": 0.08887901344295078, "grad_norm": 0.49589407444000244, "learning_rate": 0.0001, "loss": 1.0975, "step": 100 }, { "epoch": 0.08976780357738029, "grad_norm": 0.4846036732196808, "learning_rate": 0.0001, "loss": 1.1206, "step": 101 }, { "epoch": 0.0906565937118098, "grad_norm": 0.5197208523750305, "learning_rate": 0.0001, "loss": 1.1676, "step": 102 }, { "epoch": 0.09154538384623931, "grad_norm": 0.500135600566864, "learning_rate": 0.0001, "loss": 1.0938, "step": 103 }, { "epoch": 0.09243417398066882, "grad_norm": 0.48206326365470886, "learning_rate": 0.0001, "loss": 1.1561, "step": 104 }, { "epoch": 0.09332296411509833, "grad_norm": 0.5363126397132874, "learning_rate": 0.0001, "loss": 1.1397, "step": 105 }, { "epoch": 0.09421175424952782, "grad_norm": 0.5342210531234741, "learning_rate": 0.0001, "loss": 1.1213, "step": 106 }, { "epoch": 0.09510054438395733, "grad_norm": 0.47415241599082947, "learning_rate": 0.0001, "loss": 1.1009, "step": 107 }, { "epoch": 0.09598933451838684, "grad_norm": 0.5735252499580383, "learning_rate": 0.0001, "loss": 1.0854, "step": 108 }, { "epoch": 0.09687812465281635, "grad_norm": 0.6142700910568237, "learning_rate": 0.0001, "loss": 1.2542, "step": 109 }, { "epoch": 0.09776691478724586, "grad_norm": 0.5673418045043945, "learning_rate": 0.0001, "loss": 1.0388, "step": 110 }, { "epoch": 0.09865570492167537, "grad_norm": 0.5416164398193359, "learning_rate": 0.0001, "loss": 1.0302, "step": 111 }, { "epoch": 0.09954449505610488, "grad_norm": 0.48338207602500916, "learning_rate": 0.0001, "loss": 1.1465, "step": 112 }, { "epoch": 0.10043328519053439, "grad_norm": 0.49852466583251953, "learning_rate": 0.0001, "loss": 1.1088, "step": 113 }, { "epoch": 0.10132207532496389, "grad_norm": 0.5490244626998901, "learning_rate": 0.0001, "loss": 1.0804, "step": 114 }, { "epoch": 0.1022108654593934, "grad_norm": 7.056009769439697, "learning_rate": 0.0001, "loss": 1.094, "step": 115 }, { "epoch": 0.1030996555938229, "grad_norm": 0.5697766542434692, "learning_rate": 0.0001, "loss": 1.0271, "step": 116 }, { "epoch": 0.10398844572825242, "grad_norm": 0.5138140320777893, "learning_rate": 0.0001, "loss": 1.0057, "step": 117 }, { "epoch": 0.10487723586268193, "grad_norm": 0.4911749064922333, "learning_rate": 0.0001, "loss": 1.1375, "step": 118 }, { "epoch": 0.10576602599711143, "grad_norm": 0.45733195543289185, "learning_rate": 0.0001, "loss": 1.0889, "step": 119 }, { "epoch": 0.10665481613154094, "grad_norm": 0.4950946569442749, "learning_rate": 0.0001, "loss": 1.1143, "step": 120 }, { "epoch": 0.10754360626597045, "grad_norm": 0.5536718964576721, "learning_rate": 0.0001, "loss": 1.1188, "step": 121 }, { "epoch": 0.10843239640039995, "grad_norm": 0.5105839967727661, "learning_rate": 0.0001, "loss": 1.0501, "step": 122 }, { "epoch": 0.10932118653482946, "grad_norm": 0.5183722972869873, "learning_rate": 0.0001, "loss": 1.0766, "step": 123 }, { "epoch": 0.11020997666925897, "grad_norm": 0.5385981798171997, "learning_rate": 0.0001, "loss": 1.0419, "step": 124 }, { "epoch": 0.11109876680368848, "grad_norm": 0.6438923478126526, "learning_rate": 0.0001, "loss": 1.1333, "step": 125 }, { "epoch": 0.11198755693811799, "grad_norm": 0.4993588328361511, "learning_rate": 0.0001, "loss": 1.0504, "step": 126 }, { "epoch": 0.1128763470725475, "grad_norm": 0.5645109415054321, "learning_rate": 0.0001, "loss": 1.0817, "step": 127 }, { "epoch": 0.11376513720697701, "grad_norm": 0.5064727067947388, "learning_rate": 0.0001, "loss": 1.1588, "step": 128 }, { "epoch": 0.11465392734140652, "grad_norm": 0.4977114498615265, "learning_rate": 0.0001, "loss": 1.0903, "step": 129 }, { "epoch": 0.11554271747583601, "grad_norm": 0.5678680539131165, "learning_rate": 0.0001, "loss": 1.1107, "step": 130 }, { "epoch": 0.11643150761026552, "grad_norm": 0.5574091076850891, "learning_rate": 0.0001, "loss": 1.1081, "step": 131 }, { "epoch": 0.11732029774469503, "grad_norm": 0.5073116421699524, "learning_rate": 0.0001, "loss": 1.0487, "step": 132 }, { "epoch": 0.11820908787912454, "grad_norm": 0.4510885179042816, "learning_rate": 0.0001, "loss": 1.0262, "step": 133 }, { "epoch": 0.11909787801355405, "grad_norm": 0.5130484104156494, "learning_rate": 0.0001, "loss": 1.0612, "step": 134 }, { "epoch": 0.11998666814798356, "grad_norm": 0.55837082862854, "learning_rate": 0.0001, "loss": 1.1003, "step": 135 }, { "epoch": 0.12087545828241307, "grad_norm": 0.563966691493988, "learning_rate": 0.0001, "loss": 1.1106, "step": 136 }, { "epoch": 0.12176424841684258, "grad_norm": 0.495710164308548, "learning_rate": 0.0001, "loss": 1.077, "step": 137 }, { "epoch": 0.12265303855127208, "grad_norm": 0.4716346561908722, "learning_rate": 0.0001, "loss": 1.0224, "step": 138 }, { "epoch": 0.12354182868570159, "grad_norm": 0.5098469257354736, "learning_rate": 0.0001, "loss": 1.0821, "step": 139 }, { "epoch": 0.1244306188201311, "grad_norm": 0.4919681251049042, "learning_rate": 0.0001, "loss": 1.0588, "step": 140 }, { "epoch": 0.12531940895456062, "grad_norm": 0.4506363570690155, "learning_rate": 0.0001, "loss": 1.0579, "step": 141 }, { "epoch": 0.12620819908899011, "grad_norm": 0.6020563244819641, "learning_rate": 0.0001, "loss": 1.0176, "step": 142 }, { "epoch": 0.1270969892234196, "grad_norm": 0.6274687051773071, "learning_rate": 0.0001, "loss": 1.2257, "step": 143 }, { "epoch": 0.12798577935784913, "grad_norm": 0.49272266030311584, "learning_rate": 0.0001, "loss": 1.0729, "step": 144 }, { "epoch": 0.12887456949227863, "grad_norm": 0.5412996411323547, "learning_rate": 0.0001, "loss": 1.1025, "step": 145 }, { "epoch": 0.12976335962670815, "grad_norm": 0.5278550386428833, "learning_rate": 0.0001, "loss": 1.136, "step": 146 }, { "epoch": 0.13065214976113765, "grad_norm": 0.5578097701072693, "learning_rate": 0.0001, "loss": 1.0407, "step": 147 }, { "epoch": 0.13154093989556717, "grad_norm": 0.5495776534080505, "learning_rate": 0.0001, "loss": 1.084, "step": 148 }, { "epoch": 0.13242973002999667, "grad_norm": 0.6431498527526855, "learning_rate": 0.0001, "loss": 1.0953, "step": 149 }, { "epoch": 0.13331852016442616, "grad_norm": 0.5592057108879089, "learning_rate": 0.0001, "loss": 1.0847, "step": 150 }, { "epoch": 0.1342073102988557, "grad_norm": 0.6379444599151611, "learning_rate": 0.0001, "loss": 1.0631, "step": 151 }, { "epoch": 0.13509610043328518, "grad_norm": 0.5859350562095642, "learning_rate": 0.0001, "loss": 1.069, "step": 152 }, { "epoch": 0.1359848905677147, "grad_norm": 0.5171725153923035, "learning_rate": 0.0001, "loss": 1.0135, "step": 153 }, { "epoch": 0.1368736807021442, "grad_norm": 0.4534023702144623, "learning_rate": 0.0001, "loss": 1.0122, "step": 154 }, { "epoch": 0.13776247083657372, "grad_norm": 0.49811944365501404, "learning_rate": 0.0001, "loss": 1.0196, "step": 155 }, { "epoch": 0.13865126097100322, "grad_norm": 0.46456894278526306, "learning_rate": 0.0001, "loss": 1.0723, "step": 156 }, { "epoch": 0.13954005110543272, "grad_norm": 0.5349675416946411, "learning_rate": 0.0001, "loss": 1.1131, "step": 157 }, { "epoch": 0.14042884123986224, "grad_norm": 0.466975599527359, "learning_rate": 0.0001, "loss": 1.0752, "step": 158 }, { "epoch": 0.14131763137429174, "grad_norm": 0.5469770431518555, "learning_rate": 0.0001, "loss": 1.0607, "step": 159 }, { "epoch": 0.14220642150872126, "grad_norm": 0.5208465456962585, "learning_rate": 0.0001, "loss": 0.9892, "step": 160 }, { "epoch": 0.14309521164315075, "grad_norm": 0.543846845626831, "learning_rate": 0.0001, "loss": 1.0861, "step": 161 }, { "epoch": 0.14398400177758028, "grad_norm": 0.5700931549072266, "learning_rate": 0.0001, "loss": 0.9992, "step": 162 }, { "epoch": 0.14487279191200977, "grad_norm": 0.5593817830085754, "learning_rate": 0.0001, "loss": 1.0803, "step": 163 }, { "epoch": 0.1457615820464393, "grad_norm": 0.5960149765014648, "learning_rate": 0.0001, "loss": 1.1054, "step": 164 }, { "epoch": 0.1466503721808688, "grad_norm": 0.5227658152580261, "learning_rate": 0.0001, "loss": 1.1041, "step": 165 }, { "epoch": 0.1475391623152983, "grad_norm": 0.5318516492843628, "learning_rate": 0.0001, "loss": 1.1705, "step": 166 }, { "epoch": 0.1484279524497278, "grad_norm": 0.49774935841560364, "learning_rate": 0.0001, "loss": 1.0942, "step": 167 }, { "epoch": 0.1493167425841573, "grad_norm": 0.5588480234146118, "learning_rate": 0.0001, "loss": 1.1274, "step": 168 }, { "epoch": 0.15020553271858683, "grad_norm": 0.5155318379402161, "learning_rate": 0.0001, "loss": 1.0727, "step": 169 }, { "epoch": 0.15109432285301633, "grad_norm": 0.5063637495040894, "learning_rate": 0.0001, "loss": 1.1389, "step": 170 }, { "epoch": 0.15198311298744585, "grad_norm": 0.5275964736938477, "learning_rate": 0.0001, "loss": 1.0594, "step": 171 }, { "epoch": 0.15287190312187535, "grad_norm": 0.48888012766838074, "learning_rate": 0.0001, "loss": 1.0238, "step": 172 }, { "epoch": 0.15376069325630484, "grad_norm": 0.6187731623649597, "learning_rate": 0.0001, "loss": 1.0535, "step": 173 }, { "epoch": 0.15464948339073437, "grad_norm": 0.53126460313797, "learning_rate": 0.0001, "loss": 1.026, "step": 174 }, { "epoch": 0.15553827352516386, "grad_norm": 0.5398485064506531, "learning_rate": 0.0001, "loss": 1.069, "step": 175 }, { "epoch": 0.15642706365959338, "grad_norm": 0.5406534075737, "learning_rate": 0.0001, "loss": 1.0529, "step": 176 }, { "epoch": 0.15731585379402288, "grad_norm": 0.48404720425605774, "learning_rate": 0.0001, "loss": 1.0038, "step": 177 }, { "epoch": 0.1582046439284524, "grad_norm": 0.5885335206985474, "learning_rate": 0.0001, "loss": 1.0029, "step": 178 }, { "epoch": 0.1590934340628819, "grad_norm": 0.5530521869659424, "learning_rate": 0.0001, "loss": 1.1171, "step": 179 }, { "epoch": 0.15998222419731142, "grad_norm": 0.5618231892585754, "learning_rate": 0.0001, "loss": 1.1232, "step": 180 }, { "epoch": 0.16087101433174092, "grad_norm": 0.5092931389808655, "learning_rate": 0.0001, "loss": 1.0364, "step": 181 }, { "epoch": 0.16175980446617041, "grad_norm": 0.44638335704803467, "learning_rate": 0.0001, "loss": 1.0221, "step": 182 }, { "epoch": 0.16264859460059994, "grad_norm": 0.4906376004219055, "learning_rate": 0.0001, "loss": 1.023, "step": 183 }, { "epoch": 0.16353738473502943, "grad_norm": 0.5148441195487976, "learning_rate": 0.0001, "loss": 1.0451, "step": 184 }, { "epoch": 0.16442617486945896, "grad_norm": 0.5504122972488403, "learning_rate": 0.0001, "loss": 1.1301, "step": 185 }, { "epoch": 0.16531496500388845, "grad_norm": 0.5042161345481873, "learning_rate": 0.0001, "loss": 1.0545, "step": 186 }, { "epoch": 0.16620375513831798, "grad_norm": 0.4888613820075989, "learning_rate": 0.0001, "loss": 1.0766, "step": 187 }, { "epoch": 0.16709254527274747, "grad_norm": 0.46498018503189087, "learning_rate": 0.0001, "loss": 1.0935, "step": 188 }, { "epoch": 0.16798133540717697, "grad_norm": 0.4573257863521576, "learning_rate": 0.0001, "loss": 1.0511, "step": 189 }, { "epoch": 0.1688701255416065, "grad_norm": 0.4621419608592987, "learning_rate": 0.0001, "loss": 1.026, "step": 190 }, { "epoch": 0.169758915676036, "grad_norm": 0.5240814685821533, "learning_rate": 0.0001, "loss": 0.9897, "step": 191 }, { "epoch": 0.1706477058104655, "grad_norm": 0.4366868734359741, "learning_rate": 0.0001, "loss": 1.0274, "step": 192 }, { "epoch": 0.171536495944895, "grad_norm": 0.574884831905365, "learning_rate": 0.0001, "loss": 1.0192, "step": 193 }, { "epoch": 0.17242528607932453, "grad_norm": 0.5254986882209778, "learning_rate": 0.0001, "loss": 1.0043, "step": 194 }, { "epoch": 0.17331407621375403, "grad_norm": 0.6957873106002808, "learning_rate": 0.0001, "loss": 1.0281, "step": 195 }, { "epoch": 0.17420286634818352, "grad_norm": 0.5131996273994446, "learning_rate": 0.0001, "loss": 1.0363, "step": 196 }, { "epoch": 0.17509165648261305, "grad_norm": 0.5054270029067993, "learning_rate": 0.0001, "loss": 1.0708, "step": 197 }, { "epoch": 0.17598044661704254, "grad_norm": 0.5029573440551758, "learning_rate": 0.0001, "loss": 1.0562, "step": 198 }, { "epoch": 0.17686923675147206, "grad_norm": 0.5139032602310181, "learning_rate": 0.0001, "loss": 0.995, "step": 199 }, { "epoch": 0.17775802688590156, "grad_norm": 0.4500792324542999, "learning_rate": 0.0001, "loss": 1.1012, "step": 200 }, { "epoch": 0.17864681702033108, "grad_norm": 0.5789426565170288, "learning_rate": 0.0001, "loss": 1.1203, "step": 201 }, { "epoch": 0.17953560715476058, "grad_norm": 0.48242732882499695, "learning_rate": 0.0001, "loss": 1.0759, "step": 202 }, { "epoch": 0.1804243972891901, "grad_norm": 0.5667662024497986, "learning_rate": 0.0001, "loss": 1.0595, "step": 203 }, { "epoch": 0.1813131874236196, "grad_norm": 0.6725609302520752, "learning_rate": 0.0001, "loss": 1.0049, "step": 204 }, { "epoch": 0.1822019775580491, "grad_norm": 0.5460247993469238, "learning_rate": 0.0001, "loss": 1.0646, "step": 205 }, { "epoch": 0.18309076769247862, "grad_norm": 0.49915874004364014, "learning_rate": 0.0001, "loss": 1.1236, "step": 206 }, { "epoch": 0.1839795578269081, "grad_norm": 0.522266149520874, "learning_rate": 0.0001, "loss": 1.0688, "step": 207 }, { "epoch": 0.18486834796133764, "grad_norm": 0.4875168800354004, "learning_rate": 0.0001, "loss": 1.0596, "step": 208 }, { "epoch": 0.18575713809576713, "grad_norm": 0.5118414759635925, "learning_rate": 0.0001, "loss": 1.0139, "step": 209 }, { "epoch": 0.18664592823019666, "grad_norm": 0.5071278214454651, "learning_rate": 0.0001, "loss": 1.1415, "step": 210 }, { "epoch": 0.18753471836462615, "grad_norm": 0.5847178101539612, "learning_rate": 0.0001, "loss": 1.1495, "step": 211 }, { "epoch": 0.18842350849905565, "grad_norm": 0.45916759967803955, "learning_rate": 0.0001, "loss": 1.0898, "step": 212 }, { "epoch": 0.18931229863348517, "grad_norm": 0.4967547655105591, "learning_rate": 0.0001, "loss": 0.946, "step": 213 }, { "epoch": 0.19020108876791467, "grad_norm": 0.48724737763404846, "learning_rate": 0.0001, "loss": 1.0353, "step": 214 }, { "epoch": 0.1910898789023442, "grad_norm": 0.4389554262161255, "learning_rate": 0.0001, "loss": 1.0129, "step": 215 }, { "epoch": 0.19197866903677369, "grad_norm": 0.4678506553173065, "learning_rate": 0.0001, "loss": 1.0903, "step": 216 }, { "epoch": 0.1928674591712032, "grad_norm": 0.5293782353401184, "learning_rate": 0.0001, "loss": 1.0739, "step": 217 }, { "epoch": 0.1937562493056327, "grad_norm": 0.45177680253982544, "learning_rate": 0.0001, "loss": 1.1057, "step": 218 }, { "epoch": 0.19464503944006223, "grad_norm": 0.4647086560726166, "learning_rate": 0.0001, "loss": 1.0894, "step": 219 }, { "epoch": 0.19553382957449172, "grad_norm": 0.5464223027229309, "learning_rate": 0.0001, "loss": 1.0606, "step": 220 }, { "epoch": 0.19642261970892122, "grad_norm": 0.5713245868682861, "learning_rate": 0.0001, "loss": 1.0621, "step": 221 }, { "epoch": 0.19731140984335074, "grad_norm": 0.558101236820221, "learning_rate": 0.0001, "loss": 1.0829, "step": 222 }, { "epoch": 0.19820019997778024, "grad_norm": 0.775971531867981, "learning_rate": 0.0001, "loss": 0.9982, "step": 223 }, { "epoch": 0.19908899011220976, "grad_norm": 0.4644327163696289, "learning_rate": 0.0001, "loss": 1.0883, "step": 224 }, { "epoch": 0.19997778024663926, "grad_norm": 0.4939100444316864, "learning_rate": 0.0001, "loss": 1.0367, "step": 225 }, { "epoch": 0.20086657038106878, "grad_norm": 0.43425729870796204, "learning_rate": 0.0001, "loss": 1.0038, "step": 226 }, { "epoch": 0.20175536051549828, "grad_norm": 0.508567750453949, "learning_rate": 0.0001, "loss": 1.1134, "step": 227 }, { "epoch": 0.20264415064992777, "grad_norm": 0.397948294878006, "learning_rate": 0.0001, "loss": 0.9837, "step": 228 }, { "epoch": 0.2035329407843573, "grad_norm": 0.5290727019309998, "learning_rate": 0.0001, "loss": 1.0965, "step": 229 }, { "epoch": 0.2044217309187868, "grad_norm": 0.5163105130195618, "learning_rate": 0.0001, "loss": 0.9927, "step": 230 }, { "epoch": 0.20531052105321632, "grad_norm": 0.4882635474205017, "learning_rate": 0.0001, "loss": 1.0316, "step": 231 }, { "epoch": 0.2061993111876458, "grad_norm": 0.471646785736084, "learning_rate": 0.0001, "loss": 1.0645, "step": 232 }, { "epoch": 0.20708810132207534, "grad_norm": 0.5159053206443787, "learning_rate": 0.0001, "loss": 1.0898, "step": 233 }, { "epoch": 0.20797689145650483, "grad_norm": 0.5597699880599976, "learning_rate": 0.0001, "loss": 1.0726, "step": 234 }, { "epoch": 0.20886568159093435, "grad_norm": 0.463168203830719, "learning_rate": 0.0001, "loss": 1.0349, "step": 235 }, { "epoch": 0.20975447172536385, "grad_norm": 0.47213372588157654, "learning_rate": 0.0001, "loss": 1.08, "step": 236 }, { "epoch": 0.21064326185979335, "grad_norm": 0.46547219157218933, "learning_rate": 0.0001, "loss": 1.084, "step": 237 }, { "epoch": 0.21153205199422287, "grad_norm": 0.4608129858970642, "learning_rate": 0.0001, "loss": 1.0441, "step": 238 }, { "epoch": 0.21242084212865237, "grad_norm": 0.5103969573974609, "learning_rate": 0.0001, "loss": 1.0845, "step": 239 }, { "epoch": 0.2133096322630819, "grad_norm": 0.48750364780426025, "learning_rate": 0.0001, "loss": 1.01, "step": 240 }, { "epoch": 0.21419842239751138, "grad_norm": 0.4955364465713501, "learning_rate": 0.0001, "loss": 1.0824, "step": 241 }, { "epoch": 0.2150872125319409, "grad_norm": 0.4669419527053833, "learning_rate": 0.0001, "loss": 1.0061, "step": 242 }, { "epoch": 0.2159760026663704, "grad_norm": 0.4938861131668091, "learning_rate": 0.0001, "loss": 0.9739, "step": 243 }, { "epoch": 0.2168647928007999, "grad_norm": 0.5369840860366821, "learning_rate": 0.0001, "loss": 1.1085, "step": 244 }, { "epoch": 0.21775358293522942, "grad_norm": 0.5239328742027283, "learning_rate": 0.0001, "loss": 0.9869, "step": 245 }, { "epoch": 0.21864237306965892, "grad_norm": 0.518187403678894, "learning_rate": 0.0001, "loss": 1.046, "step": 246 }, { "epoch": 0.21953116320408844, "grad_norm": 0.44194117188453674, "learning_rate": 0.0001, "loss": 1.0234, "step": 247 }, { "epoch": 0.22041995333851794, "grad_norm": 0.44847285747528076, "learning_rate": 0.0001, "loss": 1.0692, "step": 248 }, { "epoch": 0.22130874347294746, "grad_norm": 0.4702758491039276, "learning_rate": 0.0001, "loss": 1.1231, "step": 249 }, { "epoch": 0.22219753360737696, "grad_norm": 0.43607068061828613, "learning_rate": 0.0001, "loss": 1.0197, "step": 250 }, { "epoch": 0.22308632374180645, "grad_norm": 0.5397356748580933, "learning_rate": 0.0001, "loss": 1.0527, "step": 251 }, { "epoch": 0.22397511387623598, "grad_norm": 0.5345639586448669, "learning_rate": 0.0001, "loss": 1.0119, "step": 252 }, { "epoch": 0.22486390401066547, "grad_norm": 0.45763713121414185, "learning_rate": 0.0001, "loss": 1.0296, "step": 253 }, { "epoch": 0.225752694145095, "grad_norm": 0.49265143275260925, "learning_rate": 0.0001, "loss": 1.0171, "step": 254 }, { "epoch": 0.2266414842795245, "grad_norm": 0.5668004751205444, "learning_rate": 0.0001, "loss": 1.0085, "step": 255 }, { "epoch": 0.22753027441395401, "grad_norm": 0.5550284385681152, "learning_rate": 0.0001, "loss": 1.0006, "step": 256 }, { "epoch": 0.2284190645483835, "grad_norm": 0.45947736501693726, "learning_rate": 0.0001, "loss": 1.0089, "step": 257 }, { "epoch": 0.22930785468281303, "grad_norm": 0.5964300632476807, "learning_rate": 0.0001, "loss": 1.0854, "step": 258 }, { "epoch": 0.23019664481724253, "grad_norm": 0.5322341322898865, "learning_rate": 0.0001, "loss": 1.0521, "step": 259 }, { "epoch": 0.23108543495167203, "grad_norm": 0.48153069615364075, "learning_rate": 0.0001, "loss": 0.9444, "step": 260 }, { "epoch": 0.23197422508610155, "grad_norm": 0.45373499393463135, "learning_rate": 0.0001, "loss": 1.047, "step": 261 }, { "epoch": 0.23286301522053104, "grad_norm": 0.43222519755363464, "learning_rate": 0.0001, "loss": 0.973, "step": 262 }, { "epoch": 0.23375180535496057, "grad_norm": 0.5555695295333862, "learning_rate": 0.0001, "loss": 1.0127, "step": 263 }, { "epoch": 0.23464059548939006, "grad_norm": 0.47663983702659607, "learning_rate": 0.0001, "loss": 1.0688, "step": 264 }, { "epoch": 0.2355293856238196, "grad_norm": 0.5549951791763306, "learning_rate": 0.0001, "loss": 1.0433, "step": 265 }, { "epoch": 0.23641817575824908, "grad_norm": 0.4606119990348816, "learning_rate": 0.0001, "loss": 0.9985, "step": 266 }, { "epoch": 0.23730696589267858, "grad_norm": 0.5273025631904602, "learning_rate": 0.0001, "loss": 1.018, "step": 267 }, { "epoch": 0.2381957560271081, "grad_norm": 0.49982166290283203, "learning_rate": 0.0001, "loss": 0.9592, "step": 268 }, { "epoch": 0.2390845461615376, "grad_norm": 0.5359534621238708, "learning_rate": 0.0001, "loss": 1.0568, "step": 269 }, { "epoch": 0.23997333629596712, "grad_norm": 0.48276546597480774, "learning_rate": 0.0001, "loss": 1.0315, "step": 270 }, { "epoch": 0.24086212643039662, "grad_norm": 0.48728588223457336, "learning_rate": 0.0001, "loss": 1.0827, "step": 271 }, { "epoch": 0.24175091656482614, "grad_norm": 2.621769428253174, "learning_rate": 0.0001, "loss": 0.997, "step": 272 }, { "epoch": 0.24263970669925564, "grad_norm": 0.5768371820449829, "learning_rate": 0.0001, "loss": 0.9872, "step": 273 }, { "epoch": 0.24352849683368516, "grad_norm": 0.5989674925804138, "learning_rate": 0.0001, "loss": 0.9232, "step": 274 }, { "epoch": 0.24441728696811466, "grad_norm": 0.5455823540687561, "learning_rate": 0.0001, "loss": 1.0062, "step": 275 }, { "epoch": 0.24530607710254415, "grad_norm": 0.5664008855819702, "learning_rate": 0.0001, "loss": 1.012, "step": 276 }, { "epoch": 0.24619486723697367, "grad_norm": 0.5294925570487976, "learning_rate": 0.0001, "loss": 1.0285, "step": 277 }, { "epoch": 0.24708365737140317, "grad_norm": 0.5203514099121094, "learning_rate": 0.0001, "loss": 1.0388, "step": 278 }, { "epoch": 0.2479724475058327, "grad_norm": 0.5114679336547852, "learning_rate": 0.0001, "loss": 1.0388, "step": 279 }, { "epoch": 0.2488612376402622, "grad_norm": 0.48817357420921326, "learning_rate": 0.0001, "loss": 0.9768, "step": 280 }, { "epoch": 0.2497500277746917, "grad_norm": 0.4958358108997345, "learning_rate": 0.0001, "loss": 1.0239, "step": 281 }, { "epoch": 0.25063881790912124, "grad_norm": 0.4603129029273987, "learning_rate": 0.0001, "loss": 1.0371, "step": 282 }, { "epoch": 0.25152760804355073, "grad_norm": 0.48228880763053894, "learning_rate": 0.0001, "loss": 1.04, "step": 283 }, { "epoch": 0.25241639817798023, "grad_norm": 0.5364854335784912, "learning_rate": 0.0001, "loss": 1.0049, "step": 284 }, { "epoch": 0.2533051883124097, "grad_norm": 0.46868863701820374, "learning_rate": 0.0001, "loss": 0.9842, "step": 285 }, { "epoch": 0.2541939784468392, "grad_norm": 0.4731464385986328, "learning_rate": 0.0001, "loss": 1.0195, "step": 286 }, { "epoch": 0.25508276858126877, "grad_norm": 0.5144749879837036, "learning_rate": 0.0001, "loss": 1.0673, "step": 287 }, { "epoch": 0.25597155871569827, "grad_norm": 0.44826894998550415, "learning_rate": 0.0001, "loss": 0.9981, "step": 288 }, { "epoch": 0.25686034885012776, "grad_norm": 0.4612467885017395, "learning_rate": 0.0001, "loss": 1.0318, "step": 289 }, { "epoch": 0.25774913898455726, "grad_norm": 0.4774060845375061, "learning_rate": 0.0001, "loss": 0.9806, "step": 290 }, { "epoch": 0.25863792911898675, "grad_norm": 0.4614105820655823, "learning_rate": 0.0001, "loss": 1.0404, "step": 291 }, { "epoch": 0.2595267192534163, "grad_norm": 0.47314900159835815, "learning_rate": 0.0001, "loss": 0.9932, "step": 292 }, { "epoch": 0.2604155093878458, "grad_norm": 0.5009798407554626, "learning_rate": 0.0001, "loss": 1.1416, "step": 293 }, { "epoch": 0.2613042995222753, "grad_norm": 0.4358547329902649, "learning_rate": 0.0001, "loss": 1.0379, "step": 294 }, { "epoch": 0.2621930896567048, "grad_norm": 0.5527064204216003, "learning_rate": 0.0001, "loss": 0.9843, "step": 295 }, { "epoch": 0.26308187979113434, "grad_norm": 0.47958534955978394, "learning_rate": 0.0001, "loss": 1.0235, "step": 296 }, { "epoch": 0.26397066992556384, "grad_norm": 0.4394091069698334, "learning_rate": 0.0001, "loss": 0.9711, "step": 297 }, { "epoch": 0.26485946005999333, "grad_norm": 0.5663338899612427, "learning_rate": 0.0001, "loss": 1.029, "step": 298 }, { "epoch": 0.26574825019442283, "grad_norm": 0.5038536787033081, "learning_rate": 0.0001, "loss": 1.0097, "step": 299 }, { "epoch": 0.2666370403288523, "grad_norm": 0.5386257767677307, "learning_rate": 0.0001, "loss": 1.0539, "step": 300 }, { "epoch": 0.2675258304632819, "grad_norm": 0.4892950654029846, "learning_rate": 0.0001, "loss": 0.99, "step": 301 }, { "epoch": 0.2684146205977114, "grad_norm": 0.5311320424079895, "learning_rate": 0.0001, "loss": 1.0922, "step": 302 }, { "epoch": 0.26930341073214087, "grad_norm": 0.4981628656387329, "learning_rate": 0.0001, "loss": 1.0332, "step": 303 }, { "epoch": 0.27019220086657036, "grad_norm": 0.6175600290298462, "learning_rate": 0.0001, "loss": 1.0552, "step": 304 }, { "epoch": 0.2710809910009999, "grad_norm": 0.5665920376777649, "learning_rate": 0.0001, "loss": 0.9807, "step": 305 }, { "epoch": 0.2719697811354294, "grad_norm": 0.4487343430519104, "learning_rate": 0.0001, "loss": 0.9961, "step": 306 }, { "epoch": 0.2728585712698589, "grad_norm": 0.4994884133338928, "learning_rate": 0.0001, "loss": 0.9809, "step": 307 }, { "epoch": 0.2737473614042884, "grad_norm": 0.5361630916595459, "learning_rate": 0.0001, "loss": 1.0396, "step": 308 }, { "epoch": 0.2746361515387179, "grad_norm": 0.49830833077430725, "learning_rate": 0.0001, "loss": 0.966, "step": 309 }, { "epoch": 0.27552494167314745, "grad_norm": 0.5319890975952148, "learning_rate": 0.0001, "loss": 1.0121, "step": 310 }, { "epoch": 0.27641373180757695, "grad_norm": 0.5802849531173706, "learning_rate": 0.0001, "loss": 1.0198, "step": 311 }, { "epoch": 0.27730252194200644, "grad_norm": 0.5048151016235352, "learning_rate": 0.0001, "loss": 0.9996, "step": 312 }, { "epoch": 0.27819131207643594, "grad_norm": 0.44712725281715393, "learning_rate": 0.0001, "loss": 0.9757, "step": 313 }, { "epoch": 0.27908010221086543, "grad_norm": 0.48854947090148926, "learning_rate": 0.0001, "loss": 1.0514, "step": 314 }, { "epoch": 0.279968892345295, "grad_norm": 0.479911208152771, "learning_rate": 0.0001, "loss": 0.9588, "step": 315 }, { "epoch": 0.2808576824797245, "grad_norm": 0.46088626980781555, "learning_rate": 0.0001, "loss": 1.0577, "step": 316 }, { "epoch": 0.281746472614154, "grad_norm": 0.5798197388648987, "learning_rate": 0.0001, "loss": 1.0011, "step": 317 }, { "epoch": 0.28263526274858347, "grad_norm": 0.45959633588790894, "learning_rate": 0.0001, "loss": 0.9358, "step": 318 }, { "epoch": 0.283524052883013, "grad_norm": 0.4906099736690521, "learning_rate": 0.0001, "loss": 0.9432, "step": 319 }, { "epoch": 0.2844128430174425, "grad_norm": 0.4779829680919647, "learning_rate": 0.0001, "loss": 0.9471, "step": 320 }, { "epoch": 0.285301633151872, "grad_norm": 0.5950011014938354, "learning_rate": 0.0001, "loss": 1.0712, "step": 321 }, { "epoch": 0.2861904232863015, "grad_norm": 0.5539191365242004, "learning_rate": 0.0001, "loss": 1.0329, "step": 322 }, { "epoch": 0.287079213420731, "grad_norm": 0.44738510251045227, "learning_rate": 0.0001, "loss": 0.9935, "step": 323 }, { "epoch": 0.28796800355516056, "grad_norm": 0.4675084054470062, "learning_rate": 0.0001, "loss": 0.9979, "step": 324 }, { "epoch": 0.28885679368959005, "grad_norm": 0.38852187991142273, "learning_rate": 0.0001, "loss": 0.9497, "step": 325 }, { "epoch": 0.28974558382401955, "grad_norm": 0.5008799433708191, "learning_rate": 0.0001, "loss": 0.9904, "step": 326 }, { "epoch": 0.29063437395844904, "grad_norm": 0.4492400288581848, "learning_rate": 0.0001, "loss": 1.0489, "step": 327 }, { "epoch": 0.2915231640928786, "grad_norm": 0.48956772685050964, "learning_rate": 0.0001, "loss": 1.0061, "step": 328 }, { "epoch": 0.2924119542273081, "grad_norm": 0.5612773299217224, "learning_rate": 0.0001, "loss": 1.0947, "step": 329 }, { "epoch": 0.2933007443617376, "grad_norm": 0.5352462530136108, "learning_rate": 0.0001, "loss": 1.1113, "step": 330 }, { "epoch": 0.2941895344961671, "grad_norm": 0.43017029762268066, "learning_rate": 0.0001, "loss": 1.0291, "step": 331 }, { "epoch": 0.2950783246305966, "grad_norm": 0.5087767839431763, "learning_rate": 0.0001, "loss": 1.0897, "step": 332 }, { "epoch": 0.29596711476502613, "grad_norm": 0.38609907031059265, "learning_rate": 0.0001, "loss": 0.9629, "step": 333 }, { "epoch": 0.2968559048994556, "grad_norm": 0.4797438979148865, "learning_rate": 0.0001, "loss": 0.9808, "step": 334 }, { "epoch": 0.2977446950338851, "grad_norm": 0.4882568418979645, "learning_rate": 0.0001, "loss": 1.0812, "step": 335 }, { "epoch": 0.2986334851683146, "grad_norm": 0.4409843385219574, "learning_rate": 0.0001, "loss": 1.0034, "step": 336 }, { "epoch": 0.2995222753027441, "grad_norm": 0.5104953646659851, "learning_rate": 0.0001, "loss": 0.9186, "step": 337 }, { "epoch": 0.30041106543717366, "grad_norm": 0.45493751764297485, "learning_rate": 0.0001, "loss": 0.9059, "step": 338 }, { "epoch": 0.30129985557160316, "grad_norm": 0.4311971068382263, "learning_rate": 0.0001, "loss": 1.042, "step": 339 }, { "epoch": 0.30218864570603265, "grad_norm": 0.43054771423339844, "learning_rate": 0.0001, "loss": 1.0336, "step": 340 }, { "epoch": 0.30307743584046215, "grad_norm": 0.4950178861618042, "learning_rate": 0.0001, "loss": 1.0515, "step": 341 }, { "epoch": 0.3039662259748917, "grad_norm": 0.4074246287345886, "learning_rate": 0.0001, "loss": 0.9163, "step": 342 }, { "epoch": 0.3048550161093212, "grad_norm": 0.45394015312194824, "learning_rate": 0.0001, "loss": 1.0499, "step": 343 }, { "epoch": 0.3057438062437507, "grad_norm": 0.453685998916626, "learning_rate": 0.0001, "loss": 1.021, "step": 344 }, { "epoch": 0.3066325963781802, "grad_norm": 0.4687478542327881, "learning_rate": 0.0001, "loss": 1.0305, "step": 345 }, { "epoch": 0.3075213865126097, "grad_norm": 0.4402327239513397, "learning_rate": 0.0001, "loss": 0.9547, "step": 346 }, { "epoch": 0.30841017664703924, "grad_norm": 0.41369926929473877, "learning_rate": 0.0001, "loss": 0.981, "step": 347 }, { "epoch": 0.30929896678146873, "grad_norm": 0.4821121096611023, "learning_rate": 0.0001, "loss": 0.9427, "step": 348 }, { "epoch": 0.3101877569158982, "grad_norm": 0.4524393379688263, "learning_rate": 0.0001, "loss": 1.0277, "step": 349 }, { "epoch": 0.3110765470503277, "grad_norm": 0.49870559573173523, "learning_rate": 0.0001, "loss": 1.0075, "step": 350 }, { "epoch": 0.3119653371847573, "grad_norm": 0.5591082572937012, "learning_rate": 0.0001, "loss": 0.9901, "step": 351 }, { "epoch": 0.31285412731918677, "grad_norm": 0.4724654257297516, "learning_rate": 0.0001, "loss": 0.9909, "step": 352 }, { "epoch": 0.31374291745361627, "grad_norm": 0.4514271020889282, "learning_rate": 0.0001, "loss": 1.0259, "step": 353 }, { "epoch": 0.31463170758804576, "grad_norm": 0.5684020519256592, "learning_rate": 0.0001, "loss": 0.9933, "step": 354 }, { "epoch": 0.31552049772247526, "grad_norm": 0.442047119140625, "learning_rate": 0.0001, "loss": 1.0394, "step": 355 }, { "epoch": 0.3164092878569048, "grad_norm": 0.47112616896629333, "learning_rate": 0.0001, "loss": 0.9287, "step": 356 }, { "epoch": 0.3172980779913343, "grad_norm": 0.5187697410583496, "learning_rate": 0.0001, "loss": 0.9476, "step": 357 }, { "epoch": 0.3181868681257638, "grad_norm": 0.43961942195892334, "learning_rate": 0.0001, "loss": 0.9474, "step": 358 }, { "epoch": 0.3190756582601933, "grad_norm": 0.47375670075416565, "learning_rate": 0.0001, "loss": 1.0088, "step": 359 }, { "epoch": 0.31996444839462285, "grad_norm": 0.4810321629047394, "learning_rate": 0.0001, "loss": 0.9628, "step": 360 }, { "epoch": 0.32085323852905234, "grad_norm": 0.5187336802482605, "learning_rate": 0.0001, "loss": 1.0055, "step": 361 }, { "epoch": 0.32174202866348184, "grad_norm": 0.47308310866355896, "learning_rate": 0.0001, "loss": 1.0005, "step": 362 }, { "epoch": 0.32263081879791133, "grad_norm": 1.5481101274490356, "learning_rate": 0.0001, "loss": 1.0754, "step": 363 }, { "epoch": 0.32351960893234083, "grad_norm": 0.4808347523212433, "learning_rate": 0.0001, "loss": 1.0567, "step": 364 }, { "epoch": 0.3244083990667704, "grad_norm": 0.40874922275543213, "learning_rate": 0.0001, "loss": 1.0153, "step": 365 }, { "epoch": 0.3252971892011999, "grad_norm": 0.5171230435371399, "learning_rate": 0.0001, "loss": 0.9808, "step": 366 }, { "epoch": 0.3261859793356294, "grad_norm": 0.48159992694854736, "learning_rate": 0.0001, "loss": 0.9873, "step": 367 }, { "epoch": 0.32707476947005887, "grad_norm": 0.44044238328933716, "learning_rate": 0.0001, "loss": 0.9608, "step": 368 }, { "epoch": 0.32796355960448836, "grad_norm": 0.4674980342388153, "learning_rate": 0.0001, "loss": 0.9263, "step": 369 }, { "epoch": 0.3288523497389179, "grad_norm": 0.5395987033843994, "learning_rate": 0.0001, "loss": 0.9548, "step": 370 }, { "epoch": 0.3297411398733474, "grad_norm": 0.49539071321487427, "learning_rate": 0.0001, "loss": 1.0202, "step": 371 }, { "epoch": 0.3306299300077769, "grad_norm": 0.4859803020954132, "learning_rate": 0.0001, "loss": 0.9388, "step": 372 }, { "epoch": 0.3315187201422064, "grad_norm": 0.4504952132701874, "learning_rate": 0.0001, "loss": 1.0092, "step": 373 }, { "epoch": 0.33240751027663595, "grad_norm": 0.5386714339256287, "learning_rate": 0.0001, "loss": 1.0461, "step": 374 }, { "epoch": 0.33329630041106545, "grad_norm": 0.5611424446105957, "learning_rate": 0.0001, "loss": 1.0967, "step": 375 }, { "epoch": 0.33418509054549494, "grad_norm": 0.44047975540161133, "learning_rate": 0.0001, "loss": 0.8973, "step": 376 }, { "epoch": 0.33507388067992444, "grad_norm": 0.5137032866477966, "learning_rate": 0.0001, "loss": 1.0303, "step": 377 }, { "epoch": 0.33596267081435394, "grad_norm": 0.47674351930618286, "learning_rate": 0.0001, "loss": 1.0713, "step": 378 }, { "epoch": 0.3368514609487835, "grad_norm": 0.4222189486026764, "learning_rate": 0.0001, "loss": 0.9894, "step": 379 }, { "epoch": 0.337740251083213, "grad_norm": 0.41975629329681396, "learning_rate": 0.0001, "loss": 0.9526, "step": 380 }, { "epoch": 0.3386290412176425, "grad_norm": 0.4654853045940399, "learning_rate": 0.0001, "loss": 0.8748, "step": 381 }, { "epoch": 0.339517831352072, "grad_norm": 0.39208483695983887, "learning_rate": 0.0001, "loss": 0.9799, "step": 382 }, { "epoch": 0.3404066214865015, "grad_norm": 0.5432955026626587, "learning_rate": 0.0001, "loss": 1.061, "step": 383 }, { "epoch": 0.341295411620931, "grad_norm": 0.44643473625183105, "learning_rate": 0.0001, "loss": 0.9823, "step": 384 }, { "epoch": 0.3421842017553605, "grad_norm": 0.5307298302650452, "learning_rate": 0.0001, "loss": 1.0406, "step": 385 }, { "epoch": 0.34307299188979, "grad_norm": 0.5106935501098633, "learning_rate": 0.0001, "loss": 1.0473, "step": 386 }, { "epoch": 0.3439617820242195, "grad_norm": 0.4799475371837616, "learning_rate": 0.0001, "loss": 1.0546, "step": 387 }, { "epoch": 0.34485057215864906, "grad_norm": 0.37563350796699524, "learning_rate": 0.0001, "loss": 1.0025, "step": 388 }, { "epoch": 0.34573936229307856, "grad_norm": 0.43214109539985657, "learning_rate": 0.0001, "loss": 0.9771, "step": 389 }, { "epoch": 0.34662815242750805, "grad_norm": 0.41184356808662415, "learning_rate": 0.0001, "loss": 0.9997, "step": 390 }, { "epoch": 0.34751694256193755, "grad_norm": 0.49145662784576416, "learning_rate": 0.0001, "loss": 1.0838, "step": 391 }, { "epoch": 0.34840573269636704, "grad_norm": 0.39192360639572144, "learning_rate": 0.0001, "loss": 0.9671, "step": 392 }, { "epoch": 0.3492945228307966, "grad_norm": 0.49615415930747986, "learning_rate": 0.0001, "loss": 0.9974, "step": 393 }, { "epoch": 0.3501833129652261, "grad_norm": 0.48595911264419556, "learning_rate": 0.0001, "loss": 0.9552, "step": 394 }, { "epoch": 0.3510721030996556, "grad_norm": 0.4774535894393921, "learning_rate": 0.0001, "loss": 0.9908, "step": 395 }, { "epoch": 0.3519608932340851, "grad_norm": 0.42019304633140564, "learning_rate": 0.0001, "loss": 0.9919, "step": 396 }, { "epoch": 0.35284968336851463, "grad_norm": 0.4863130450248718, "learning_rate": 0.0001, "loss": 0.9888, "step": 397 }, { "epoch": 0.35373847350294413, "grad_norm": 0.5684654712677002, "learning_rate": 0.0001, "loss": 1.0284, "step": 398 }, { "epoch": 0.3546272636373736, "grad_norm": 0.466160386800766, "learning_rate": 0.0001, "loss": 0.999, "step": 399 }, { "epoch": 0.3555160537718031, "grad_norm": 0.4259321689605713, "learning_rate": 0.0001, "loss": 0.9475, "step": 400 }, { "epoch": 0.3564048439062326, "grad_norm": 0.4329473376274109, "learning_rate": 0.0001, "loss": 0.971, "step": 401 }, { "epoch": 0.35729363404066217, "grad_norm": 0.44069400429725647, "learning_rate": 0.0001, "loss": 1.0213, "step": 402 }, { "epoch": 0.35818242417509166, "grad_norm": 0.46225759387016296, "learning_rate": 0.0001, "loss": 0.9891, "step": 403 }, { "epoch": 0.35907121430952116, "grad_norm": 0.4280588924884796, "learning_rate": 0.0001, "loss": 1.1054, "step": 404 }, { "epoch": 0.35996000444395065, "grad_norm": 0.4039415419101715, "learning_rate": 0.0001, "loss": 0.9852, "step": 405 }, { "epoch": 0.3608487945783802, "grad_norm": 0.45364150404930115, "learning_rate": 0.0001, "loss": 1.0471, "step": 406 }, { "epoch": 0.3617375847128097, "grad_norm": 0.5891258716583252, "learning_rate": 0.0001, "loss": 1.103, "step": 407 }, { "epoch": 0.3626263748472392, "grad_norm": 0.47017595171928406, "learning_rate": 0.0001, "loss": 0.8947, "step": 408 }, { "epoch": 0.3635151649816687, "grad_norm": 0.43023166060447693, "learning_rate": 0.0001, "loss": 1.0324, "step": 409 }, { "epoch": 0.3644039551160982, "grad_norm": 0.45753541588783264, "learning_rate": 0.0001, "loss": 0.9737, "step": 410 }, { "epoch": 0.36529274525052774, "grad_norm": 0.48378103971481323, "learning_rate": 0.0001, "loss": 1.059, "step": 411 }, { "epoch": 0.36618153538495724, "grad_norm": 0.4665151834487915, "learning_rate": 0.0001, "loss": 0.9722, "step": 412 }, { "epoch": 0.36707032551938673, "grad_norm": 5.9081315994262695, "learning_rate": 0.0001, "loss": 0.9235, "step": 413 }, { "epoch": 0.3679591156538162, "grad_norm": 0.42533057928085327, "learning_rate": 0.0001, "loss": 0.9788, "step": 414 }, { "epoch": 0.3688479057882458, "grad_norm": 0.5004814267158508, "learning_rate": 0.0001, "loss": 1.003, "step": 415 }, { "epoch": 0.3697366959226753, "grad_norm": 0.610554039478302, "learning_rate": 0.0001, "loss": 1.0865, "step": 416 }, { "epoch": 0.37062548605710477, "grad_norm": 0.49171337485313416, "learning_rate": 0.0001, "loss": 1.0182, "step": 417 }, { "epoch": 0.37151427619153427, "grad_norm": 0.47732532024383545, "learning_rate": 0.0001, "loss": 0.9888, "step": 418 }, { "epoch": 0.37240306632596376, "grad_norm": 0.418824702501297, "learning_rate": 0.0001, "loss": 1.0082, "step": 419 }, { "epoch": 0.3732918564603933, "grad_norm": 0.4232107698917389, "learning_rate": 0.0001, "loss": 0.9977, "step": 420 }, { "epoch": 0.3741806465948228, "grad_norm": 0.45239681005477905, "learning_rate": 0.0001, "loss": 0.9741, "step": 421 }, { "epoch": 0.3750694367292523, "grad_norm": 0.4842644929885864, "learning_rate": 0.0001, "loss": 0.9677, "step": 422 }, { "epoch": 0.3759582268636818, "grad_norm": 0.478947252035141, "learning_rate": 0.0001, "loss": 0.9718, "step": 423 }, { "epoch": 0.3768470169981113, "grad_norm": 0.5387521386146545, "learning_rate": 0.0001, "loss": 1.0737, "step": 424 }, { "epoch": 0.37773580713254085, "grad_norm": 0.5804430246353149, "learning_rate": 0.0001, "loss": 1.0273, "step": 425 }, { "epoch": 0.37862459726697034, "grad_norm": 0.4965243935585022, "learning_rate": 0.0001, "loss": 1.1163, "step": 426 }, { "epoch": 0.37951338740139984, "grad_norm": 0.5330107808113098, "learning_rate": 0.0001, "loss": 1.0256, "step": 427 }, { "epoch": 0.38040217753582933, "grad_norm": 0.4444815218448639, "learning_rate": 0.0001, "loss": 0.9643, "step": 428 }, { "epoch": 0.3812909676702589, "grad_norm": 0.5239233374595642, "learning_rate": 0.0001, "loss": 0.9893, "step": 429 }, { "epoch": 0.3821797578046884, "grad_norm": 0.4272307753562927, "learning_rate": 0.0001, "loss": 0.9896, "step": 430 }, { "epoch": 0.3830685479391179, "grad_norm": 0.422820121049881, "learning_rate": 0.0001, "loss": 0.9722, "step": 431 }, { "epoch": 0.38395733807354737, "grad_norm": 0.45066556334495544, "learning_rate": 0.0001, "loss": 0.9266, "step": 432 }, { "epoch": 0.38484612820797687, "grad_norm": 0.40709954500198364, "learning_rate": 0.0001, "loss": 0.9006, "step": 433 }, { "epoch": 0.3857349183424064, "grad_norm": 0.45301303267478943, "learning_rate": 0.0001, "loss": 0.9844, "step": 434 }, { "epoch": 0.3866237084768359, "grad_norm": 0.4441263675689697, "learning_rate": 0.0001, "loss": 1.0039, "step": 435 }, { "epoch": 0.3875124986112654, "grad_norm": 0.4564574062824249, "learning_rate": 0.0001, "loss": 1.0397, "step": 436 }, { "epoch": 0.3884012887456949, "grad_norm": 0.5104243755340576, "learning_rate": 0.0001, "loss": 1.06, "step": 437 }, { "epoch": 0.38929007888012446, "grad_norm": 0.4639466106891632, "learning_rate": 0.0001, "loss": 1.028, "step": 438 }, { "epoch": 0.39017886901455395, "grad_norm": 0.4268662929534912, "learning_rate": 0.0001, "loss": 0.9971, "step": 439 }, { "epoch": 0.39106765914898345, "grad_norm": 0.4981948435306549, "learning_rate": 0.0001, "loss": 0.9946, "step": 440 }, { "epoch": 0.39195644928341294, "grad_norm": 0.4488162696361542, "learning_rate": 0.0001, "loss": 0.9536, "step": 441 }, { "epoch": 0.39284523941784244, "grad_norm": 0.4636482298374176, "learning_rate": 0.0001, "loss": 1.1277, "step": 442 }, { "epoch": 0.393734029552272, "grad_norm": 0.46133679151535034, "learning_rate": 0.0001, "loss": 0.9451, "step": 443 }, { "epoch": 0.3946228196867015, "grad_norm": 0.4021439850330353, "learning_rate": 0.0001, "loss": 1.0127, "step": 444 }, { "epoch": 0.395511609821131, "grad_norm": 0.46771371364593506, "learning_rate": 0.0001, "loss": 1.0103, "step": 445 }, { "epoch": 0.3964003999555605, "grad_norm": 0.5152266025543213, "learning_rate": 0.0001, "loss": 1.0373, "step": 446 }, { "epoch": 0.39728919008999, "grad_norm": 0.42209741473197937, "learning_rate": 0.0001, "loss": 0.9229, "step": 447 }, { "epoch": 0.3981779802244195, "grad_norm": 0.46712151169776917, "learning_rate": 0.0001, "loss": 0.9512, "step": 448 }, { "epoch": 0.399066770358849, "grad_norm": 4.181483268737793, "learning_rate": 0.0001, "loss": 0.9367, "step": 449 }, { "epoch": 0.3999555604932785, "grad_norm": 0.3845648467540741, "learning_rate": 0.0001, "loss": 0.9741, "step": 450 }, { "epoch": 0.400844350627708, "grad_norm": 2.03074049949646, "learning_rate": 0.0001, "loss": 0.9775, "step": 451 }, { "epoch": 0.40173314076213756, "grad_norm": 0.5015456080436707, "learning_rate": 0.0001, "loss": 1.0234, "step": 452 }, { "epoch": 0.40262193089656706, "grad_norm": 0.4838273525238037, "learning_rate": 0.0001, "loss": 0.9653, "step": 453 }, { "epoch": 0.40351072103099656, "grad_norm": 0.5604076981544495, "learning_rate": 0.0001, "loss": 0.9732, "step": 454 }, { "epoch": 0.40439951116542605, "grad_norm": 0.5440881252288818, "learning_rate": 0.0001, "loss": 1.0301, "step": 455 }, { "epoch": 0.40528830129985555, "grad_norm": 0.9089745879173279, "learning_rate": 0.0001, "loss": 1.0371, "step": 456 }, { "epoch": 0.4061770914342851, "grad_norm": 0.4472905993461609, "learning_rate": 0.0001, "loss": 1.0375, "step": 457 }, { "epoch": 0.4070658815687146, "grad_norm": 0.4706718325614929, "learning_rate": 0.0001, "loss": 0.9966, "step": 458 }, { "epoch": 0.4079546717031441, "grad_norm": 0.48551270365715027, "learning_rate": 0.0001, "loss": 0.9577, "step": 459 }, { "epoch": 0.4088434618375736, "grad_norm": 0.45593079924583435, "learning_rate": 0.0001, "loss": 0.99, "step": 460 }, { "epoch": 0.40973225197200314, "grad_norm": 0.4240357577800751, "learning_rate": 0.0001, "loss": 1.0105, "step": 461 }, { "epoch": 0.41062104210643263, "grad_norm": 0.44513946771621704, "learning_rate": 0.0001, "loss": 0.9697, "step": 462 }, { "epoch": 0.41150983224086213, "grad_norm": 0.43077144026756287, "learning_rate": 0.0001, "loss": 1.0168, "step": 463 }, { "epoch": 0.4123986223752916, "grad_norm": 0.47029492259025574, "learning_rate": 0.0001, "loss": 0.94, "step": 464 }, { "epoch": 0.4132874125097211, "grad_norm": 0.44067198038101196, "learning_rate": 0.0001, "loss": 0.9443, "step": 465 }, { "epoch": 0.41417620264415067, "grad_norm": 0.6291742324829102, "learning_rate": 0.0001, "loss": 0.9927, "step": 466 }, { "epoch": 0.41506499277858017, "grad_norm": 0.49682438373565674, "learning_rate": 0.0001, "loss": 0.9411, "step": 467 }, { "epoch": 0.41595378291300966, "grad_norm": 0.4587923586368561, "learning_rate": 0.0001, "loss": 1.0423, "step": 468 }, { "epoch": 0.41684257304743916, "grad_norm": 0.5601244568824768, "learning_rate": 0.0001, "loss": 1.0682, "step": 469 }, { "epoch": 0.4177313631818687, "grad_norm": 0.4534125328063965, "learning_rate": 0.0001, "loss": 1.0591, "step": 470 }, { "epoch": 0.4186201533162982, "grad_norm": 0.469804972410202, "learning_rate": 0.0001, "loss": 1.0893, "step": 471 }, { "epoch": 0.4195089434507277, "grad_norm": 0.4769747257232666, "learning_rate": 0.0001, "loss": 0.9974, "step": 472 }, { "epoch": 0.4203977335851572, "grad_norm": 0.4488048553466797, "learning_rate": 0.0001, "loss": 0.9681, "step": 473 }, { "epoch": 0.4212865237195867, "grad_norm": 0.4423130750656128, "learning_rate": 0.0001, "loss": 1.0732, "step": 474 }, { "epoch": 0.42217531385401624, "grad_norm": 0.40248462557792664, "learning_rate": 0.0001, "loss": 0.9631, "step": 475 }, { "epoch": 0.42306410398844574, "grad_norm": 0.4668256640434265, "learning_rate": 0.0001, "loss": 0.9771, "step": 476 }, { "epoch": 0.42395289412287523, "grad_norm": 0.46433717012405396, "learning_rate": 0.0001, "loss": 1.02, "step": 477 }, { "epoch": 0.42484168425730473, "grad_norm": 0.8196445107460022, "learning_rate": 0.0001, "loss": 0.9596, "step": 478 }, { "epoch": 0.4257304743917342, "grad_norm": 0.5228718519210815, "learning_rate": 0.0001, "loss": 1.0023, "step": 479 }, { "epoch": 0.4266192645261638, "grad_norm": 0.4933236241340637, "learning_rate": 0.0001, "loss": 0.936, "step": 480 }, { "epoch": 0.4275080546605933, "grad_norm": 0.4738243818283081, "learning_rate": 0.0001, "loss": 0.9954, "step": 481 }, { "epoch": 0.42839684479502277, "grad_norm": 0.4303346872329712, "learning_rate": 0.0001, "loss": 1.0311, "step": 482 }, { "epoch": 0.42928563492945226, "grad_norm": 0.3946980834007263, "learning_rate": 0.0001, "loss": 0.9655, "step": 483 }, { "epoch": 0.4301744250638818, "grad_norm": 0.4781205952167511, "learning_rate": 0.0001, "loss": 1.0161, "step": 484 }, { "epoch": 0.4310632151983113, "grad_norm": 0.41279280185699463, "learning_rate": 0.0001, "loss": 0.984, "step": 485 }, { "epoch": 0.4319520053327408, "grad_norm": 0.42448437213897705, "learning_rate": 0.0001, "loss": 1.0044, "step": 486 }, { "epoch": 0.4328407954671703, "grad_norm": 0.42571067810058594, "learning_rate": 0.0001, "loss": 0.9963, "step": 487 }, { "epoch": 0.4337295856015998, "grad_norm": 0.43724721670150757, "learning_rate": 0.0001, "loss": 0.9868, "step": 488 }, { "epoch": 0.43461837573602935, "grad_norm": 0.506829023361206, "learning_rate": 0.0001, "loss": 1.045, "step": 489 }, { "epoch": 0.43550716587045885, "grad_norm": 0.3961758315563202, "learning_rate": 0.0001, "loss": 0.9235, "step": 490 }, { "epoch": 0.43639595600488834, "grad_norm": 0.41079434752464294, "learning_rate": 0.0001, "loss": 0.9944, "step": 491 }, { "epoch": 0.43728474613931784, "grad_norm": 0.4370504915714264, "learning_rate": 0.0001, "loss": 0.9925, "step": 492 }, { "epoch": 0.4381735362737474, "grad_norm": 0.4316272735595703, "learning_rate": 0.0001, "loss": 1.0229, "step": 493 }, { "epoch": 0.4390623264081769, "grad_norm": 0.4933658540248871, "learning_rate": 0.0001, "loss": 1.0974, "step": 494 }, { "epoch": 0.4399511165426064, "grad_norm": 0.42228391766548157, "learning_rate": 0.0001, "loss": 1.0277, "step": 495 }, { "epoch": 0.4408399066770359, "grad_norm": 0.45421552658081055, "learning_rate": 0.0001, "loss": 0.8842, "step": 496 }, { "epoch": 0.44172869681146537, "grad_norm": 0.4748377501964569, "learning_rate": 0.0001, "loss": 0.9488, "step": 497 }, { "epoch": 0.4426174869458949, "grad_norm": 0.4040847718715668, "learning_rate": 0.0001, "loss": 0.9696, "step": 498 }, { "epoch": 0.4435062770803244, "grad_norm": 0.42285656929016113, "learning_rate": 0.0001, "loss": 0.9637, "step": 499 }, { "epoch": 0.4443950672147539, "grad_norm": 0.3885044455528259, "learning_rate": 0.0001, "loss": 0.985, "step": 500 }, { "epoch": 0.4452838573491834, "grad_norm": 0.45078715682029724, "learning_rate": 0.0001, "loss": 0.9538, "step": 501 }, { "epoch": 0.4461726474836129, "grad_norm": 0.4214499294757843, "learning_rate": 0.0001, "loss": 0.8488, "step": 502 }, { "epoch": 0.44706143761804246, "grad_norm": 0.6104442477226257, "learning_rate": 0.0001, "loss": 1.0681, "step": 503 }, { "epoch": 0.44795022775247195, "grad_norm": 0.49673840403556824, "learning_rate": 0.0001, "loss": 0.9875, "step": 504 }, { "epoch": 0.44883901788690145, "grad_norm": 0.4738848805427551, "learning_rate": 0.0001, "loss": 0.9738, "step": 505 }, { "epoch": 0.44972780802133094, "grad_norm": 0.4084794223308563, "learning_rate": 0.0001, "loss": 0.9617, "step": 506 }, { "epoch": 0.4506165981557605, "grad_norm": 0.43893682956695557, "learning_rate": 0.0001, "loss": 0.9553, "step": 507 }, { "epoch": 0.45150538829019, "grad_norm": 0.4741009771823883, "learning_rate": 0.0001, "loss": 0.9863, "step": 508 }, { "epoch": 0.4523941784246195, "grad_norm": 0.42300963401794434, "learning_rate": 0.0001, "loss": 0.9669, "step": 509 }, { "epoch": 0.453282968559049, "grad_norm": 0.3663196265697479, "learning_rate": 0.0001, "loss": 0.9606, "step": 510 }, { "epoch": 0.4541717586934785, "grad_norm": 0.4289178252220154, "learning_rate": 0.0001, "loss": 0.9093, "step": 511 }, { "epoch": 0.45506054882790803, "grad_norm": 0.41236793994903564, "learning_rate": 0.0001, "loss": 0.9582, "step": 512 }, { "epoch": 0.4559493389623375, "grad_norm": 0.38569167256355286, "learning_rate": 0.0001, "loss": 0.9804, "step": 513 }, { "epoch": 0.456838129096767, "grad_norm": 0.42629268765449524, "learning_rate": 0.0001, "loss": 0.9111, "step": 514 }, { "epoch": 0.4577269192311965, "grad_norm": 0.4302125573158264, "learning_rate": 0.0001, "loss": 0.9435, "step": 515 }, { "epoch": 0.45861570936562607, "grad_norm": 0.46809178590774536, "learning_rate": 0.0001, "loss": 1.0059, "step": 516 }, { "epoch": 0.45950449950005556, "grad_norm": 0.4712200164794922, "learning_rate": 0.0001, "loss": 0.9681, "step": 517 }, { "epoch": 0.46039328963448506, "grad_norm": 0.4966319501399994, "learning_rate": 0.0001, "loss": 1.0278, "step": 518 }, { "epoch": 0.46128207976891455, "grad_norm": 0.41810038685798645, "learning_rate": 0.0001, "loss": 1.0128, "step": 519 }, { "epoch": 0.46217086990334405, "grad_norm": 0.4766371250152588, "learning_rate": 0.0001, "loss": 0.9306, "step": 520 }, { "epoch": 0.4630596600377736, "grad_norm": 0.47531601786613464, "learning_rate": 0.0001, "loss": 1.0213, "step": 521 }, { "epoch": 0.4639484501722031, "grad_norm": 0.4246899485588074, "learning_rate": 0.0001, "loss": 0.9415, "step": 522 }, { "epoch": 0.4648372403066326, "grad_norm": 0.38327568769454956, "learning_rate": 0.0001, "loss": 0.976, "step": 523 }, { "epoch": 0.4657260304410621, "grad_norm": 0.45601171255111694, "learning_rate": 0.0001, "loss": 0.9481, "step": 524 }, { "epoch": 0.4666148205754916, "grad_norm": 0.4424237608909607, "learning_rate": 0.0001, "loss": 0.974, "step": 525 }, { "epoch": 0.46750361070992114, "grad_norm": 0.45187127590179443, "learning_rate": 0.0001, "loss": 1.1636, "step": 526 }, { "epoch": 0.46839240084435063, "grad_norm": 0.44865912199020386, "learning_rate": 0.0001, "loss": 0.9951, "step": 527 }, { "epoch": 0.4692811909787801, "grad_norm": 0.40876081585884094, "learning_rate": 0.0001, "loss": 0.9765, "step": 528 }, { "epoch": 0.4701699811132096, "grad_norm": 0.3936661183834076, "learning_rate": 0.0001, "loss": 0.9935, "step": 529 }, { "epoch": 0.4710587712476392, "grad_norm": 0.422152578830719, "learning_rate": 0.0001, "loss": 0.9956, "step": 530 }, { "epoch": 0.47194756138206867, "grad_norm": 0.40520817041397095, "learning_rate": 0.0001, "loss": 0.9609, "step": 531 }, { "epoch": 0.47283635151649817, "grad_norm": 0.43614640831947327, "learning_rate": 0.0001, "loss": 1.0283, "step": 532 }, { "epoch": 0.47372514165092766, "grad_norm": 0.4783385694026947, "learning_rate": 0.0001, "loss": 1.0388, "step": 533 }, { "epoch": 0.47461393178535716, "grad_norm": 0.4622490704059601, "learning_rate": 0.0001, "loss": 1.088, "step": 534 }, { "epoch": 0.4755027219197867, "grad_norm": 0.43488460779190063, "learning_rate": 0.0001, "loss": 0.9386, "step": 535 }, { "epoch": 0.4763915120542162, "grad_norm": 0.40164855122566223, "learning_rate": 0.0001, "loss": 0.9476, "step": 536 }, { "epoch": 0.4772803021886457, "grad_norm": 0.35324132442474365, "learning_rate": 0.0001, "loss": 0.9957, "step": 537 }, { "epoch": 0.4781690923230752, "grad_norm": 0.39218297600746155, "learning_rate": 0.0001, "loss": 0.9682, "step": 538 }, { "epoch": 0.47905788245750475, "grad_norm": 0.4563474953174591, "learning_rate": 0.0001, "loss": 0.9479, "step": 539 }, { "epoch": 0.47994667259193424, "grad_norm": 0.4176347255706787, "learning_rate": 0.0001, "loss": 0.9101, "step": 540 }, { "epoch": 0.48083546272636374, "grad_norm": 0.38945290446281433, "learning_rate": 0.0001, "loss": 0.9083, "step": 541 }, { "epoch": 0.48172425286079323, "grad_norm": 0.35722264647483826, "learning_rate": 0.0001, "loss": 0.8814, "step": 542 }, { "epoch": 0.48261304299522273, "grad_norm": 0.43666157126426697, "learning_rate": 0.0001, "loss": 0.9738, "step": 543 }, { "epoch": 0.4835018331296523, "grad_norm": 0.462503045797348, "learning_rate": 0.0001, "loss": 0.9315, "step": 544 }, { "epoch": 0.4843906232640818, "grad_norm": 0.48999452590942383, "learning_rate": 0.0001, "loss": 1.024, "step": 545 }, { "epoch": 0.4852794133985113, "grad_norm": 0.5173038840293884, "learning_rate": 0.0001, "loss": 0.885, "step": 546 }, { "epoch": 0.48616820353294077, "grad_norm": 0.442202627658844, "learning_rate": 0.0001, "loss": 0.9667, "step": 547 }, { "epoch": 0.4870569936673703, "grad_norm": 0.5005183219909668, "learning_rate": 0.0001, "loss": 1.0566, "step": 548 }, { "epoch": 0.4879457838017998, "grad_norm": 0.3552423417568207, "learning_rate": 0.0001, "loss": 0.9279, "step": 549 }, { "epoch": 0.4888345739362293, "grad_norm": 0.46777617931365967, "learning_rate": 0.0001, "loss": 0.9534, "step": 550 }, { "epoch": 0.4897233640706588, "grad_norm": 0.43491941690444946, "learning_rate": 0.0001, "loss": 1.069, "step": 551 }, { "epoch": 0.4906121542050883, "grad_norm": 0.4217972755432129, "learning_rate": 0.0001, "loss": 1.0236, "step": 552 }, { "epoch": 0.49150094433951785, "grad_norm": 0.43809306621551514, "learning_rate": 0.0001, "loss": 0.9967, "step": 553 }, { "epoch": 0.49238973447394735, "grad_norm": 0.41759732365608215, "learning_rate": 0.0001, "loss": 0.9486, "step": 554 }, { "epoch": 0.49327852460837684, "grad_norm": 0.4331734776496887, "learning_rate": 0.0001, "loss": 0.9531, "step": 555 }, { "epoch": 0.49416731474280634, "grad_norm": 0.4199782907962799, "learning_rate": 0.0001, "loss": 0.9904, "step": 556 }, { "epoch": 0.49505610487723584, "grad_norm": 0.4305680990219116, "learning_rate": 0.0001, "loss": 0.909, "step": 557 }, { "epoch": 0.4959448950116654, "grad_norm": 0.5136706233024597, "learning_rate": 0.0001, "loss": 1.121, "step": 558 }, { "epoch": 0.4968336851460949, "grad_norm": 0.429557204246521, "learning_rate": 0.0001, "loss": 0.9809, "step": 559 }, { "epoch": 0.4977224752805244, "grad_norm": 0.41944217681884766, "learning_rate": 0.0001, "loss": 0.9802, "step": 560 }, { "epoch": 0.4986112654149539, "grad_norm": 0.43246370553970337, "learning_rate": 0.0001, "loss": 0.9973, "step": 561 }, { "epoch": 0.4995000555493834, "grad_norm": 0.3798202872276306, "learning_rate": 0.0001, "loss": 0.9367, "step": 562 }, { "epoch": 0.5003888456838129, "grad_norm": 0.43042704463005066, "learning_rate": 0.0001, "loss": 1.075, "step": 563 }, { "epoch": 0.5012776358182425, "grad_norm": 0.3733251392841339, "learning_rate": 0.0001, "loss": 0.9731, "step": 564 }, { "epoch": 0.5021664259526719, "grad_norm": 0.423252135515213, "learning_rate": 0.0001, "loss": 0.9788, "step": 565 }, { "epoch": 0.5030552160871015, "grad_norm": 0.43651118874549866, "learning_rate": 0.0001, "loss": 0.9289, "step": 566 }, { "epoch": 0.5039440062215309, "grad_norm": 0.44451677799224854, "learning_rate": 0.0001, "loss": 0.938, "step": 567 }, { "epoch": 0.5048327963559605, "grad_norm": 0.4594007730484009, "learning_rate": 0.0001, "loss": 0.9562, "step": 568 }, { "epoch": 0.50572158649039, "grad_norm": 0.5364981889724731, "learning_rate": 0.0001, "loss": 0.9239, "step": 569 }, { "epoch": 0.5066103766248194, "grad_norm": 0.45558059215545654, "learning_rate": 0.0001, "loss": 1.0017, "step": 570 }, { "epoch": 0.507499166759249, "grad_norm": 0.42612048983573914, "learning_rate": 0.0001, "loss": 0.9401, "step": 571 }, { "epoch": 0.5083879568936784, "grad_norm": 0.4538538157939911, "learning_rate": 0.0001, "loss": 0.9979, "step": 572 }, { "epoch": 0.509276747028108, "grad_norm": 1.2720234394073486, "learning_rate": 0.0001, "loss": 1.0537, "step": 573 }, { "epoch": 0.5101655371625375, "grad_norm": 0.4332892894744873, "learning_rate": 0.0001, "loss": 0.9529, "step": 574 }, { "epoch": 0.511054327296967, "grad_norm": 0.42090901732444763, "learning_rate": 0.0001, "loss": 0.8948, "step": 575 }, { "epoch": 0.5119431174313965, "grad_norm": 0.4180974066257477, "learning_rate": 0.0001, "loss": 0.9472, "step": 576 }, { "epoch": 0.512831907565826, "grad_norm": 0.44674041867256165, "learning_rate": 0.0001, "loss": 1.0032, "step": 577 }, { "epoch": 0.5137206977002555, "grad_norm": 0.38132181763648987, "learning_rate": 0.0001, "loss": 0.9722, "step": 578 }, { "epoch": 0.5146094878346851, "grad_norm": 0.425112247467041, "learning_rate": 0.0001, "loss": 1.0104, "step": 579 }, { "epoch": 0.5154982779691145, "grad_norm": 0.4420212507247925, "learning_rate": 0.0001, "loss": 0.8951, "step": 580 }, { "epoch": 0.5163870681035441, "grad_norm": 0.4047195613384247, "learning_rate": 0.0001, "loss": 0.9103, "step": 581 }, { "epoch": 0.5172758582379735, "grad_norm": 0.4206266701221466, "learning_rate": 0.0001, "loss": 0.9872, "step": 582 }, { "epoch": 0.5181646483724031, "grad_norm": 0.46447429060935974, "learning_rate": 0.0001, "loss": 0.9636, "step": 583 }, { "epoch": 0.5190534385068326, "grad_norm": 0.467122882604599, "learning_rate": 0.0001, "loss": 0.9556, "step": 584 }, { "epoch": 0.519942228641262, "grad_norm": 0.4438915252685547, "learning_rate": 0.0001, "loss": 0.9775, "step": 585 }, { "epoch": 0.5208310187756916, "grad_norm": 0.4225422441959381, "learning_rate": 0.0001, "loss": 0.9315, "step": 586 }, { "epoch": 0.5217198089101212, "grad_norm": 0.3874359726905823, "learning_rate": 0.0001, "loss": 0.9652, "step": 587 }, { "epoch": 0.5226085990445506, "grad_norm": 0.4789721369743347, "learning_rate": 0.0001, "loss": 0.9336, "step": 588 }, { "epoch": 0.5234973891789801, "grad_norm": 0.4012058675289154, "learning_rate": 0.0001, "loss": 0.9792, "step": 589 }, { "epoch": 0.5243861793134096, "grad_norm": 0.4494520425796509, "learning_rate": 0.0001, "loss": 0.9143, "step": 590 }, { "epoch": 0.5252749694478391, "grad_norm": 0.44614073634147644, "learning_rate": 0.0001, "loss": 0.9286, "step": 591 }, { "epoch": 0.5261637595822687, "grad_norm": 0.4147653579711914, "learning_rate": 0.0001, "loss": 0.8982, "step": 592 }, { "epoch": 0.5270525497166981, "grad_norm": 0.4356852173805237, "learning_rate": 0.0001, "loss": 0.9263, "step": 593 }, { "epoch": 0.5279413398511277, "grad_norm": 0.4420524537563324, "learning_rate": 0.0001, "loss": 0.969, "step": 594 }, { "epoch": 0.5288301299855571, "grad_norm": 0.4878003001213074, "learning_rate": 0.0001, "loss": 1.0299, "step": 595 }, { "epoch": 0.5297189201199867, "grad_norm": 0.4248897433280945, "learning_rate": 0.0001, "loss": 1.0166, "step": 596 }, { "epoch": 0.5306077102544162, "grad_norm": 0.3686140179634094, "learning_rate": 0.0001, "loss": 0.9452, "step": 597 }, { "epoch": 0.5314965003888457, "grad_norm": 0.3767171800136566, "learning_rate": 0.0001, "loss": 1.0059, "step": 598 }, { "epoch": 0.5323852905232752, "grad_norm": 0.41753292083740234, "learning_rate": 0.0001, "loss": 0.9117, "step": 599 }, { "epoch": 0.5332740806577047, "grad_norm": 0.45836883783340454, "learning_rate": 0.0001, "loss": 1.0302, "step": 600 }, { "epoch": 0.5341628707921342, "grad_norm": 0.40416955947875977, "learning_rate": 0.0001, "loss": 0.9387, "step": 601 }, { "epoch": 0.5350516609265638, "grad_norm": 0.4028719365596771, "learning_rate": 0.0001, "loss": 1.0046, "step": 602 }, { "epoch": 0.5359404510609932, "grad_norm": 0.42004886269569397, "learning_rate": 0.0001, "loss": 1.0475, "step": 603 }, { "epoch": 0.5368292411954227, "grad_norm": 0.44581007957458496, "learning_rate": 0.0001, "loss": 0.8798, "step": 604 }, { "epoch": 0.5377180313298522, "grad_norm": 0.4007730782032013, "learning_rate": 0.0001, "loss": 0.9578, "step": 605 }, { "epoch": 0.5386068214642817, "grad_norm": 0.45472684502601624, "learning_rate": 0.0001, "loss": 0.9609, "step": 606 }, { "epoch": 0.5394956115987113, "grad_norm": 0.45508822798728943, "learning_rate": 0.0001, "loss": 0.9699, "step": 607 }, { "epoch": 0.5403844017331407, "grad_norm": 0.40522894263267517, "learning_rate": 0.0001, "loss": 0.9791, "step": 608 }, { "epoch": 0.5412731918675703, "grad_norm": 0.42480531334877014, "learning_rate": 0.0001, "loss": 1.041, "step": 609 }, { "epoch": 0.5421619820019998, "grad_norm": 0.3351707458496094, "learning_rate": 0.0001, "loss": 1.0673, "step": 610 }, { "epoch": 0.5430507721364293, "grad_norm": 0.5073234438896179, "learning_rate": 0.0001, "loss": 0.9928, "step": 611 }, { "epoch": 0.5439395622708588, "grad_norm": 0.4208507239818573, "learning_rate": 0.0001, "loss": 0.9584, "step": 612 }, { "epoch": 0.5448283524052883, "grad_norm": 0.4468097686767578, "learning_rate": 0.0001, "loss": 0.9225, "step": 613 }, { "epoch": 0.5457171425397178, "grad_norm": 0.44044622778892517, "learning_rate": 0.0001, "loss": 0.9906, "step": 614 }, { "epoch": 0.5466059326741474, "grad_norm": 0.5258206725120544, "learning_rate": 0.0001, "loss": 0.9886, "step": 615 }, { "epoch": 0.5474947228085768, "grad_norm": 0.4392997622489929, "learning_rate": 0.0001, "loss": 1.0121, "step": 616 }, { "epoch": 0.5483835129430064, "grad_norm": 0.4318770170211792, "learning_rate": 0.0001, "loss": 1.0073, "step": 617 }, { "epoch": 0.5492723030774358, "grad_norm": 0.4536917805671692, "learning_rate": 0.0001, "loss": 0.9971, "step": 618 }, { "epoch": 0.5501610932118653, "grad_norm": 0.39293742179870605, "learning_rate": 0.0001, "loss": 0.9785, "step": 619 }, { "epoch": 0.5510498833462949, "grad_norm": 0.4010220766067505, "learning_rate": 0.0001, "loss": 0.9656, "step": 620 }, { "epoch": 0.5519386734807243, "grad_norm": 0.43340232968330383, "learning_rate": 0.0001, "loss": 0.8983, "step": 621 }, { "epoch": 0.5528274636151539, "grad_norm": 0.3749978840351105, "learning_rate": 0.0001, "loss": 0.8759, "step": 622 }, { "epoch": 0.5537162537495833, "grad_norm": 0.47160354256629944, "learning_rate": 0.0001, "loss": 1.0252, "step": 623 }, { "epoch": 0.5546050438840129, "grad_norm": 0.3324509859085083, "learning_rate": 0.0001, "loss": 0.9065, "step": 624 }, { "epoch": 0.5554938340184424, "grad_norm": 0.3825232982635498, "learning_rate": 0.0001, "loss": 0.9536, "step": 625 }, { "epoch": 0.5563826241528719, "grad_norm": 0.4211728870868683, "learning_rate": 0.0001, "loss": 0.8948, "step": 626 }, { "epoch": 0.5572714142873014, "grad_norm": 0.40888741612434387, "learning_rate": 0.0001, "loss": 1.0129, "step": 627 }, { "epoch": 0.5581602044217309, "grad_norm": 0.3999336063861847, "learning_rate": 0.0001, "loss": 0.9486, "step": 628 }, { "epoch": 0.5590489945561604, "grad_norm": 0.3953676223754883, "learning_rate": 0.0001, "loss": 0.8835, "step": 629 }, { "epoch": 0.55993778469059, "grad_norm": 3.312321901321411, "learning_rate": 0.0001, "loss": 1.0447, "step": 630 }, { "epoch": 0.5608265748250194, "grad_norm": 0.6516892313957214, "learning_rate": 0.0001, "loss": 0.8964, "step": 631 }, { "epoch": 0.561715364959449, "grad_norm": 0.43332597613334656, "learning_rate": 0.0001, "loss": 0.9386, "step": 632 }, { "epoch": 0.5626041550938785, "grad_norm": 0.36072951555252075, "learning_rate": 0.0001, "loss": 0.9489, "step": 633 }, { "epoch": 0.563492945228308, "grad_norm": 0.4039455056190491, "learning_rate": 0.0001, "loss": 0.9512, "step": 634 }, { "epoch": 0.5643817353627375, "grad_norm": 0.40944796800613403, "learning_rate": 0.0001, "loss": 1.0909, "step": 635 }, { "epoch": 0.5652705254971669, "grad_norm": 0.4490579068660736, "learning_rate": 0.0001, "loss": 0.955, "step": 636 }, { "epoch": 0.5661593156315965, "grad_norm": 0.4226183593273163, "learning_rate": 0.0001, "loss": 0.9833, "step": 637 }, { "epoch": 0.567048105766026, "grad_norm": 0.4099627733230591, "learning_rate": 0.0001, "loss": 0.9909, "step": 638 }, { "epoch": 0.5679368959004555, "grad_norm": 0.39125704765319824, "learning_rate": 0.0001, "loss": 0.9875, "step": 639 }, { "epoch": 0.568825686034885, "grad_norm": 0.3506604731082916, "learning_rate": 0.0001, "loss": 0.8747, "step": 640 }, { "epoch": 0.5697144761693145, "grad_norm": 0.6626682281494141, "learning_rate": 0.0001, "loss": 1.0328, "step": 641 }, { "epoch": 0.570603266303744, "grad_norm": 0.3994938135147095, "learning_rate": 0.0001, "loss": 0.9255, "step": 642 }, { "epoch": 0.5714920564381736, "grad_norm": 0.4206639528274536, "learning_rate": 0.0001, "loss": 0.9222, "step": 643 }, { "epoch": 0.572380846572603, "grad_norm": 0.4152818024158478, "learning_rate": 0.0001, "loss": 0.9515, "step": 644 }, { "epoch": 0.5732696367070326, "grad_norm": 0.3834103047847748, "learning_rate": 0.0001, "loss": 0.9008, "step": 645 }, { "epoch": 0.574158426841462, "grad_norm": 0.3906621038913727, "learning_rate": 0.0001, "loss": 1.0196, "step": 646 }, { "epoch": 0.5750472169758916, "grad_norm": 0.4065912067890167, "learning_rate": 0.0001, "loss": 0.9741, "step": 647 }, { "epoch": 0.5759360071103211, "grad_norm": 0.3870736360549927, "learning_rate": 0.0001, "loss": 0.9403, "step": 648 }, { "epoch": 0.5768247972447506, "grad_norm": 0.40144529938697815, "learning_rate": 0.0001, "loss": 0.9931, "step": 649 }, { "epoch": 0.5777135873791801, "grad_norm": 0.40022167563438416, "learning_rate": 0.0001, "loss": 0.9741, "step": 650 }, { "epoch": 0.5786023775136095, "grad_norm": 0.4081610143184662, "learning_rate": 0.0001, "loss": 0.9509, "step": 651 }, { "epoch": 0.5794911676480391, "grad_norm": 0.3786165714263916, "learning_rate": 0.0001, "loss": 0.9435, "step": 652 }, { "epoch": 0.5803799577824686, "grad_norm": 0.3807113468647003, "learning_rate": 0.0001, "loss": 0.9534, "step": 653 }, { "epoch": 0.5812687479168981, "grad_norm": 0.4126400053501129, "learning_rate": 0.0001, "loss": 0.8815, "step": 654 }, { "epoch": 0.5821575380513276, "grad_norm": 0.46695202589035034, "learning_rate": 0.0001, "loss": 1.0453, "step": 655 }, { "epoch": 0.5830463281857572, "grad_norm": 0.43947726488113403, "learning_rate": 0.0001, "loss": 0.9558, "step": 656 }, { "epoch": 0.5839351183201866, "grad_norm": 0.4180644154548645, "learning_rate": 0.0001, "loss": 0.9144, "step": 657 }, { "epoch": 0.5848239084546162, "grad_norm": 0.48162519931793213, "learning_rate": 0.0001, "loss": 1.0027, "step": 658 }, { "epoch": 0.5857126985890456, "grad_norm": 0.42843928933143616, "learning_rate": 0.0001, "loss": 0.9259, "step": 659 }, { "epoch": 0.5866014887234752, "grad_norm": 0.42587754130363464, "learning_rate": 0.0001, "loss": 0.9438, "step": 660 }, { "epoch": 0.5874902788579047, "grad_norm": 0.42820873856544495, "learning_rate": 0.0001, "loss": 0.9788, "step": 661 }, { "epoch": 0.5883790689923342, "grad_norm": 0.44260185956954956, "learning_rate": 0.0001, "loss": 0.951, "step": 662 }, { "epoch": 0.5892678591267637, "grad_norm": 0.38898608088493347, "learning_rate": 0.0001, "loss": 0.9226, "step": 663 }, { "epoch": 0.5901566492611932, "grad_norm": 0.4538962244987488, "learning_rate": 0.0001, "loss": 0.9951, "step": 664 }, { "epoch": 0.5910454393956227, "grad_norm": 0.4434801936149597, "learning_rate": 0.0001, "loss": 0.9968, "step": 665 }, { "epoch": 0.5919342295300523, "grad_norm": 2.7702479362487793, "learning_rate": 0.0001, "loss": 0.9657, "step": 666 }, { "epoch": 0.5928230196644817, "grad_norm": 0.4107935130596161, "learning_rate": 0.0001, "loss": 0.8901, "step": 667 }, { "epoch": 0.5937118097989112, "grad_norm": 5.212909698486328, "learning_rate": 0.0001, "loss": 0.8686, "step": 668 }, { "epoch": 0.5946005999333407, "grad_norm": 0.4474343955516815, "learning_rate": 0.0001, "loss": 0.8641, "step": 669 }, { "epoch": 0.5954893900677702, "grad_norm": 0.39664575457572937, "learning_rate": 0.0001, "loss": 0.9605, "step": 670 }, { "epoch": 0.5963781802021998, "grad_norm": 0.38908809423446655, "learning_rate": 0.0001, "loss": 0.8851, "step": 671 }, { "epoch": 0.5972669703366292, "grad_norm": 0.39720067381858826, "learning_rate": 0.0001, "loss": 0.9376, "step": 672 }, { "epoch": 0.5981557604710588, "grad_norm": 0.444224089384079, "learning_rate": 0.0001, "loss": 0.9879, "step": 673 }, { "epoch": 0.5990445506054882, "grad_norm": 0.461745023727417, "learning_rate": 0.0001, "loss": 1.0298, "step": 674 }, { "epoch": 0.5999333407399178, "grad_norm": 0.38060104846954346, "learning_rate": 0.0001, "loss": 0.9133, "step": 675 }, { "epoch": 0.6008221308743473, "grad_norm": 0.4152204990386963, "learning_rate": 0.0001, "loss": 1.0037, "step": 676 }, { "epoch": 0.6017109210087768, "grad_norm": 0.4251076281070709, "learning_rate": 0.0001, "loss": 0.9217, "step": 677 }, { "epoch": 0.6025997111432063, "grad_norm": 0.4050005376338959, "learning_rate": 0.0001, "loss": 1.0411, "step": 678 }, { "epoch": 0.6034885012776359, "grad_norm": 0.4802723228931427, "learning_rate": 0.0001, "loss": 0.9966, "step": 679 }, { "epoch": 0.6043772914120653, "grad_norm": 0.4158213436603546, "learning_rate": 0.0001, "loss": 0.9684, "step": 680 }, { "epoch": 0.6052660815464949, "grad_norm": 0.4008488059043884, "learning_rate": 0.0001, "loss": 0.9684, "step": 681 }, { "epoch": 0.6061548716809243, "grad_norm": 0.4021485149860382, "learning_rate": 0.0001, "loss": 0.8944, "step": 682 }, { "epoch": 0.6070436618153539, "grad_norm": 0.36115503311157227, "learning_rate": 0.0001, "loss": 0.9677, "step": 683 }, { "epoch": 0.6079324519497834, "grad_norm": 0.3866066336631775, "learning_rate": 0.0001, "loss": 0.9938, "step": 684 }, { "epoch": 0.6088212420842128, "grad_norm": 0.47491082549095154, "learning_rate": 0.0001, "loss": 0.9778, "step": 685 }, { "epoch": 0.6097100322186424, "grad_norm": 0.44795575737953186, "learning_rate": 0.0001, "loss": 0.9964, "step": 686 }, { "epoch": 0.6105988223530718, "grad_norm": 0.48861074447631836, "learning_rate": 0.0001, "loss": 0.9276, "step": 687 }, { "epoch": 0.6114876124875014, "grad_norm": 0.4077586233615875, "learning_rate": 0.0001, "loss": 0.9454, "step": 688 }, { "epoch": 0.6123764026219309, "grad_norm": 0.3845427930355072, "learning_rate": 0.0001, "loss": 0.9704, "step": 689 }, { "epoch": 0.6132651927563604, "grad_norm": 0.44110408425331116, "learning_rate": 0.0001, "loss": 1.0017, "step": 690 }, { "epoch": 0.6141539828907899, "grad_norm": 0.38786497712135315, "learning_rate": 0.0001, "loss": 0.9824, "step": 691 }, { "epoch": 0.6150427730252194, "grad_norm": 0.40381374955177307, "learning_rate": 0.0001, "loss": 0.9184, "step": 692 }, { "epoch": 0.6159315631596489, "grad_norm": 0.4073936641216278, "learning_rate": 0.0001, "loss": 0.948, "step": 693 }, { "epoch": 0.6168203532940785, "grad_norm": 0.39875441789627075, "learning_rate": 0.0001, "loss": 0.9917, "step": 694 }, { "epoch": 0.6177091434285079, "grad_norm": 0.4240407943725586, "learning_rate": 0.0001, "loss": 0.9696, "step": 695 }, { "epoch": 0.6185979335629375, "grad_norm": 0.4107338488101959, "learning_rate": 0.0001, "loss": 1.0108, "step": 696 }, { "epoch": 0.619486723697367, "grad_norm": 0.4649637043476105, "learning_rate": 0.0001, "loss": 1.0222, "step": 697 }, { "epoch": 0.6203755138317965, "grad_norm": 0.398387610912323, "learning_rate": 0.0001, "loss": 0.9952, "step": 698 }, { "epoch": 0.621264303966226, "grad_norm": 0.4626375734806061, "learning_rate": 0.0001, "loss": 1.0333, "step": 699 }, { "epoch": 0.6221530941006554, "grad_norm": 0.39919617772102356, "learning_rate": 0.0001, "loss": 0.8835, "step": 700 }, { "epoch": 0.623041884235085, "grad_norm": 0.3973129391670227, "learning_rate": 0.0001, "loss": 1.0366, "step": 701 }, { "epoch": 0.6239306743695145, "grad_norm": 0.4347308576107025, "learning_rate": 0.0001, "loss": 0.98, "step": 702 }, { "epoch": 0.624819464503944, "grad_norm": 0.42371174693107605, "learning_rate": 0.0001, "loss": 0.9625, "step": 703 }, { "epoch": 0.6257082546383735, "grad_norm": 0.3887942135334015, "learning_rate": 0.0001, "loss": 0.9995, "step": 704 }, { "epoch": 0.626597044772803, "grad_norm": 0.41830766201019287, "learning_rate": 0.0001, "loss": 1.0613, "step": 705 }, { "epoch": 0.6274858349072325, "grad_norm": 0.37216201424598694, "learning_rate": 0.0001, "loss": 0.9003, "step": 706 }, { "epoch": 0.6283746250416621, "grad_norm": 0.3734080493450165, "learning_rate": 0.0001, "loss": 1.0583, "step": 707 }, { "epoch": 0.6292634151760915, "grad_norm": 0.4222297966480255, "learning_rate": 0.0001, "loss": 0.8877, "step": 708 }, { "epoch": 0.6301522053105211, "grad_norm": 0.4466676712036133, "learning_rate": 0.0001, "loss": 0.9868, "step": 709 }, { "epoch": 0.6310409954449505, "grad_norm": 0.4170881509780884, "learning_rate": 0.0001, "loss": 0.9235, "step": 710 }, { "epoch": 0.6319297855793801, "grad_norm": 0.42569923400878906, "learning_rate": 0.0001, "loss": 1.0111, "step": 711 }, { "epoch": 0.6328185757138096, "grad_norm": 0.469657838344574, "learning_rate": 0.0001, "loss": 1.1188, "step": 712 }, { "epoch": 0.6337073658482391, "grad_norm": 0.4612179696559906, "learning_rate": 0.0001, "loss": 1.0049, "step": 713 }, { "epoch": 0.6345961559826686, "grad_norm": 0.4614596664905548, "learning_rate": 0.0001, "loss": 0.9497, "step": 714 }, { "epoch": 0.635484946117098, "grad_norm": 0.4139200747013092, "learning_rate": 0.0001, "loss": 0.9691, "step": 715 }, { "epoch": 0.6363737362515276, "grad_norm": 0.42002081871032715, "learning_rate": 0.0001, "loss": 0.9838, "step": 716 }, { "epoch": 0.6372625263859572, "grad_norm": 0.4355138838291168, "learning_rate": 0.0001, "loss": 1.0074, "step": 717 }, { "epoch": 0.6381513165203866, "grad_norm": 0.42147886753082275, "learning_rate": 0.0001, "loss": 0.9929, "step": 718 }, { "epoch": 0.6390401066548161, "grad_norm": 0.4365898370742798, "learning_rate": 0.0001, "loss": 1.0325, "step": 719 }, { "epoch": 0.6399288967892457, "grad_norm": 0.4107733964920044, "learning_rate": 0.0001, "loss": 0.9229, "step": 720 }, { "epoch": 0.6408176869236751, "grad_norm": 0.4189467430114746, "learning_rate": 0.0001, "loss": 1.0041, "step": 721 }, { "epoch": 0.6417064770581047, "grad_norm": 0.4112345278263092, "learning_rate": 0.0001, "loss": 1.017, "step": 722 }, { "epoch": 0.6425952671925341, "grad_norm": 0.39325135946273804, "learning_rate": 0.0001, "loss": 0.9264, "step": 723 }, { "epoch": 0.6434840573269637, "grad_norm": 0.38210329413414, "learning_rate": 0.0001, "loss": 0.9947, "step": 724 }, { "epoch": 0.6443728474613932, "grad_norm": 0.4400337040424347, "learning_rate": 0.0001, "loss": 0.9118, "step": 725 }, { "epoch": 0.6452616375958227, "grad_norm": 0.41924870014190674, "learning_rate": 0.0001, "loss": 1.0176, "step": 726 }, { "epoch": 0.6461504277302522, "grad_norm": 0.37929868698120117, "learning_rate": 0.0001, "loss": 0.906, "step": 727 }, { "epoch": 0.6470392178646817, "grad_norm": 0.41478589177131653, "learning_rate": 0.0001, "loss": 0.9984, "step": 728 }, { "epoch": 0.6479280079991112, "grad_norm": 0.3930301368236542, "learning_rate": 0.0001, "loss": 0.9251, "step": 729 }, { "epoch": 0.6488167981335408, "grad_norm": 0.3816480338573456, "learning_rate": 0.0001, "loss": 0.9501, "step": 730 }, { "epoch": 0.6497055882679702, "grad_norm": 0.40965738892555237, "learning_rate": 0.0001, "loss": 0.8565, "step": 731 }, { "epoch": 0.6505943784023998, "grad_norm": 0.42315876483917236, "learning_rate": 0.0001, "loss": 1.0507, "step": 732 }, { "epoch": 0.6514831685368292, "grad_norm": 0.3918554484844208, "learning_rate": 0.0001, "loss": 0.9513, "step": 733 }, { "epoch": 0.6523719586712587, "grad_norm": 0.42279428243637085, "learning_rate": 0.0001, "loss": 0.9699, "step": 734 }, { "epoch": 0.6532607488056883, "grad_norm": 0.41292804479599, "learning_rate": 0.0001, "loss": 0.9509, "step": 735 }, { "epoch": 0.6541495389401177, "grad_norm": 0.3694443106651306, "learning_rate": 0.0001, "loss": 0.8812, "step": 736 }, { "epoch": 0.6550383290745473, "grad_norm": 0.42552173137664795, "learning_rate": 0.0001, "loss": 0.9628, "step": 737 }, { "epoch": 0.6559271192089767, "grad_norm": 0.4035997986793518, "learning_rate": 0.0001, "loss": 0.9955, "step": 738 }, { "epoch": 0.6568159093434063, "grad_norm": 0.38716575503349304, "learning_rate": 0.0001, "loss": 0.9339, "step": 739 }, { "epoch": 0.6577046994778358, "grad_norm": 0.37816059589385986, "learning_rate": 0.0001, "loss": 1.0141, "step": 740 }, { "epoch": 0.6585934896122653, "grad_norm": 0.39885637164115906, "learning_rate": 0.0001, "loss": 0.9982, "step": 741 }, { "epoch": 0.6594822797466948, "grad_norm": 0.41604557633399963, "learning_rate": 0.0001, "loss": 0.9505, "step": 742 }, { "epoch": 0.6603710698811244, "grad_norm": 0.3828152120113373, "learning_rate": 0.0001, "loss": 0.9055, "step": 743 }, { "epoch": 0.6612598600155538, "grad_norm": 0.39052262902259827, "learning_rate": 0.0001, "loss": 0.9922, "step": 744 }, { "epoch": 0.6621486501499834, "grad_norm": 0.36966073513031006, "learning_rate": 0.0001, "loss": 1.0097, "step": 745 }, { "epoch": 0.6630374402844128, "grad_norm": 0.40489739179611206, "learning_rate": 0.0001, "loss": 0.9734, "step": 746 }, { "epoch": 0.6639262304188424, "grad_norm": 0.4752523899078369, "learning_rate": 0.0001, "loss": 1.0276, "step": 747 }, { "epoch": 0.6648150205532719, "grad_norm": 0.42752546072006226, "learning_rate": 0.0001, "loss": 0.9565, "step": 748 }, { "epoch": 0.6657038106877013, "grad_norm": 0.3752210736274719, "learning_rate": 0.0001, "loss": 0.9709, "step": 749 }, { "epoch": 0.6665926008221309, "grad_norm": 0.4873553514480591, "learning_rate": 0.0001, "loss": 1.0433, "step": 750 }, { "epoch": 0.6674813909565603, "grad_norm": 0.4026888906955719, "learning_rate": 0.0001, "loss": 1.0882, "step": 751 }, { "epoch": 0.6683701810909899, "grad_norm": 0.42212575674057007, "learning_rate": 0.0001, "loss": 0.9615, "step": 752 }, { "epoch": 0.6692589712254194, "grad_norm": 0.40002021193504333, "learning_rate": 0.0001, "loss": 1.0106, "step": 753 }, { "epoch": 0.6701477613598489, "grad_norm": 0.4148339331150055, "learning_rate": 0.0001, "loss": 0.9292, "step": 754 }, { "epoch": 0.6710365514942784, "grad_norm": 0.37192678451538086, "learning_rate": 0.0001, "loss": 0.9844, "step": 755 }, { "epoch": 0.6719253416287079, "grad_norm": 0.3779515326023102, "learning_rate": 0.0001, "loss": 0.8999, "step": 756 }, { "epoch": 0.6728141317631374, "grad_norm": 0.38838016986846924, "learning_rate": 0.0001, "loss": 0.9855, "step": 757 }, { "epoch": 0.673702921897567, "grad_norm": 0.3978157937526703, "learning_rate": 0.0001, "loss": 0.9253, "step": 758 }, { "epoch": 0.6745917120319964, "grad_norm": 0.38334494829177856, "learning_rate": 0.0001, "loss": 0.8678, "step": 759 }, { "epoch": 0.675480502166426, "grad_norm": 0.3856591582298279, "learning_rate": 0.0001, "loss": 0.9614, "step": 760 }, { "epoch": 0.6763692923008554, "grad_norm": 0.6821391582489014, "learning_rate": 0.0001, "loss": 0.9884, "step": 761 }, { "epoch": 0.677258082435285, "grad_norm": 0.4022596478462219, "learning_rate": 0.0001, "loss": 0.9512, "step": 762 }, { "epoch": 0.6781468725697145, "grad_norm": 0.3981231153011322, "learning_rate": 0.0001, "loss": 0.9457, "step": 763 }, { "epoch": 0.679035662704144, "grad_norm": 0.466782808303833, "learning_rate": 0.0001, "loss": 1.0009, "step": 764 }, { "epoch": 0.6799244528385735, "grad_norm": 0.39416226744651794, "learning_rate": 0.0001, "loss": 0.9204, "step": 765 }, { "epoch": 0.680813242973003, "grad_norm": 0.3830525279045105, "learning_rate": 0.0001, "loss": 0.9206, "step": 766 }, { "epoch": 0.6817020331074325, "grad_norm": 0.3653806447982788, "learning_rate": 0.0001, "loss": 0.9601, "step": 767 }, { "epoch": 0.682590823241862, "grad_norm": 0.4308638274669647, "learning_rate": 0.0001, "loss": 0.9666, "step": 768 }, { "epoch": 0.6834796133762915, "grad_norm": 0.3821423649787903, "learning_rate": 0.0001, "loss": 1.0252, "step": 769 }, { "epoch": 0.684368403510721, "grad_norm": 0.459150493144989, "learning_rate": 0.0001, "loss": 0.8923, "step": 770 }, { "epoch": 0.6852571936451506, "grad_norm": 0.4109600782394409, "learning_rate": 0.0001, "loss": 0.9337, "step": 771 }, { "epoch": 0.68614598377958, "grad_norm": 0.4196016788482666, "learning_rate": 0.0001, "loss": 1.0004, "step": 772 }, { "epoch": 0.6870347739140096, "grad_norm": 0.39674344658851624, "learning_rate": 0.0001, "loss": 0.9676, "step": 773 }, { "epoch": 0.687923564048439, "grad_norm": 0.3917883634567261, "learning_rate": 0.0001, "loss": 0.9255, "step": 774 }, { "epoch": 0.6888123541828686, "grad_norm": 0.4198206663131714, "learning_rate": 0.0001, "loss": 0.9159, "step": 775 }, { "epoch": 0.6897011443172981, "grad_norm": 0.4153868556022644, "learning_rate": 0.0001, "loss": 1.0006, "step": 776 }, { "epoch": 0.6905899344517276, "grad_norm": 0.425462007522583, "learning_rate": 0.0001, "loss": 0.843, "step": 777 }, { "epoch": 0.6914787245861571, "grad_norm": 0.40152454376220703, "learning_rate": 0.0001, "loss": 0.8695, "step": 778 }, { "epoch": 0.6923675147205866, "grad_norm": 0.39553624391555786, "learning_rate": 0.0001, "loss": 0.8722, "step": 779 }, { "epoch": 0.6932563048550161, "grad_norm": 0.4519752860069275, "learning_rate": 0.0001, "loss": 1.0222, "step": 780 }, { "epoch": 0.6941450949894457, "grad_norm": 0.3791448473930359, "learning_rate": 0.0001, "loss": 0.8787, "step": 781 }, { "epoch": 0.6950338851238751, "grad_norm": 0.3689649701118469, "learning_rate": 0.0001, "loss": 0.973, "step": 782 }, { "epoch": 0.6959226752583046, "grad_norm": 0.36836689710617065, "learning_rate": 0.0001, "loss": 0.924, "step": 783 }, { "epoch": 0.6968114653927341, "grad_norm": 0.4386933147907257, "learning_rate": 0.0001, "loss": 1.0039, "step": 784 }, { "epoch": 0.6977002555271636, "grad_norm": 0.39001014828681946, "learning_rate": 0.0001, "loss": 0.9655, "step": 785 }, { "epoch": 0.6985890456615932, "grad_norm": 0.36727163195610046, "learning_rate": 0.0001, "loss": 0.9681, "step": 786 }, { "epoch": 0.6994778357960226, "grad_norm": 0.3847578465938568, "learning_rate": 0.0001, "loss": 0.9809, "step": 787 }, { "epoch": 0.7003666259304522, "grad_norm": 0.3912128806114197, "learning_rate": 0.0001, "loss": 0.9828, "step": 788 }, { "epoch": 0.7012554160648817, "grad_norm": 0.3891061842441559, "learning_rate": 0.0001, "loss": 0.9816, "step": 789 }, { "epoch": 0.7021442061993112, "grad_norm": 0.40884703397750854, "learning_rate": 0.0001, "loss": 0.9406, "step": 790 }, { "epoch": 0.7030329963337407, "grad_norm": 0.40863409638404846, "learning_rate": 0.0001, "loss": 0.9473, "step": 791 }, { "epoch": 0.7039217864681702, "grad_norm": 0.4846546947956085, "learning_rate": 0.0001, "loss": 1.0337, "step": 792 }, { "epoch": 0.7048105766025997, "grad_norm": 0.4108925759792328, "learning_rate": 0.0001, "loss": 0.8871, "step": 793 }, { "epoch": 0.7056993667370293, "grad_norm": 0.41259604692459106, "learning_rate": 0.0001, "loss": 0.9616, "step": 794 }, { "epoch": 0.7065881568714587, "grad_norm": 0.39563241600990295, "learning_rate": 0.0001, "loss": 0.917, "step": 795 }, { "epoch": 0.7074769470058883, "grad_norm": 0.44878554344177246, "learning_rate": 0.0001, "loss": 0.9133, "step": 796 }, { "epoch": 0.7083657371403177, "grad_norm": 0.4403840899467468, "learning_rate": 0.0001, "loss": 0.961, "step": 797 }, { "epoch": 0.7092545272747472, "grad_norm": 0.47209668159484863, "learning_rate": 0.0001, "loss": 0.9692, "step": 798 }, { "epoch": 0.7101433174091768, "grad_norm": 0.3841542601585388, "learning_rate": 0.0001, "loss": 0.9458, "step": 799 }, { "epoch": 0.7110321075436062, "grad_norm": 0.46148043870925903, "learning_rate": 0.0001, "loss": 0.8355, "step": 800 }, { "epoch": 0.7119208976780358, "grad_norm": 0.6854037046432495, "learning_rate": 0.0001, "loss": 0.9275, "step": 801 }, { "epoch": 0.7128096878124652, "grad_norm": 0.38869839906692505, "learning_rate": 0.0001, "loss": 0.9882, "step": 802 }, { "epoch": 0.7136984779468948, "grad_norm": 0.4120323956012726, "learning_rate": 0.0001, "loss": 0.87, "step": 803 }, { "epoch": 0.7145872680813243, "grad_norm": 0.43215325474739075, "learning_rate": 0.0001, "loss": 0.9797, "step": 804 }, { "epoch": 0.7154760582157538, "grad_norm": 0.4285464882850647, "learning_rate": 0.0001, "loss": 0.9303, "step": 805 }, { "epoch": 0.7163648483501833, "grad_norm": 0.3982923626899719, "learning_rate": 0.0001, "loss": 0.9645, "step": 806 }, { "epoch": 0.7172536384846128, "grad_norm": 0.3845166563987732, "learning_rate": 0.0001, "loss": 0.8746, "step": 807 }, { "epoch": 0.7181424286190423, "grad_norm": 0.34493544697761536, "learning_rate": 0.0001, "loss": 0.8451, "step": 808 }, { "epoch": 0.7190312187534719, "grad_norm": 0.4371100962162018, "learning_rate": 0.0001, "loss": 1.0098, "step": 809 }, { "epoch": 0.7199200088879013, "grad_norm": 0.4431426525115967, "learning_rate": 0.0001, "loss": 0.9671, "step": 810 }, { "epoch": 0.7208087990223309, "grad_norm": 0.39591118693351746, "learning_rate": 0.0001, "loss": 0.9669, "step": 811 }, { "epoch": 0.7216975891567604, "grad_norm": 0.4144839942455292, "learning_rate": 0.0001, "loss": 1.0489, "step": 812 }, { "epoch": 0.7225863792911899, "grad_norm": 0.36368703842163086, "learning_rate": 0.0001, "loss": 0.9269, "step": 813 }, { "epoch": 0.7234751694256194, "grad_norm": 0.34978145360946655, "learning_rate": 0.0001, "loss": 0.925, "step": 814 }, { "epoch": 0.7243639595600488, "grad_norm": 0.38028684258461, "learning_rate": 0.0001, "loss": 1.0324, "step": 815 }, { "epoch": 0.7252527496944784, "grad_norm": 0.3885200023651123, "learning_rate": 0.0001, "loss": 0.8994, "step": 816 }, { "epoch": 0.726141539828908, "grad_norm": 0.3828847110271454, "learning_rate": 0.0001, "loss": 0.977, "step": 817 }, { "epoch": 0.7270303299633374, "grad_norm": 0.41237783432006836, "learning_rate": 0.0001, "loss": 0.9004, "step": 818 }, { "epoch": 0.7279191200977669, "grad_norm": 0.3976728916168213, "learning_rate": 0.0001, "loss": 0.8497, "step": 819 }, { "epoch": 0.7288079102321964, "grad_norm": 0.3727687895298004, "learning_rate": 0.0001, "loss": 0.9518, "step": 820 }, { "epoch": 0.7296967003666259, "grad_norm": 0.36306801438331604, "learning_rate": 0.0001, "loss": 0.9451, "step": 821 }, { "epoch": 0.7305854905010555, "grad_norm": 0.38123902678489685, "learning_rate": 0.0001, "loss": 0.9649, "step": 822 }, { "epoch": 0.7314742806354849, "grad_norm": 0.4005994200706482, "learning_rate": 0.0001, "loss": 0.9271, "step": 823 }, { "epoch": 0.7323630707699145, "grad_norm": 0.36427778005599976, "learning_rate": 0.0001, "loss": 0.8705, "step": 824 }, { "epoch": 0.7332518609043439, "grad_norm": 0.39238572120666504, "learning_rate": 0.0001, "loss": 1.0382, "step": 825 }, { "epoch": 0.7341406510387735, "grad_norm": 0.3285076320171356, "learning_rate": 0.0001, "loss": 0.8946, "step": 826 }, { "epoch": 0.735029441173203, "grad_norm": 0.4472292363643646, "learning_rate": 0.0001, "loss": 1.0144, "step": 827 }, { "epoch": 0.7359182313076325, "grad_norm": 0.43173158168792725, "learning_rate": 0.0001, "loss": 0.918, "step": 828 }, { "epoch": 0.736807021442062, "grad_norm": 0.3840146064758301, "learning_rate": 0.0001, "loss": 0.825, "step": 829 }, { "epoch": 0.7376958115764916, "grad_norm": 0.3785271644592285, "learning_rate": 0.0001, "loss": 0.9523, "step": 830 }, { "epoch": 0.738584601710921, "grad_norm": 0.4263155460357666, "learning_rate": 0.0001, "loss": 0.9, "step": 831 }, { "epoch": 0.7394733918453505, "grad_norm": 0.3733021020889282, "learning_rate": 0.0001, "loss": 0.9072, "step": 832 }, { "epoch": 0.74036218197978, "grad_norm": 0.4950011074542999, "learning_rate": 0.0001, "loss": 1.0596, "step": 833 }, { "epoch": 0.7412509721142095, "grad_norm": 0.4155547320842743, "learning_rate": 0.0001, "loss": 0.9846, "step": 834 }, { "epoch": 0.7421397622486391, "grad_norm": 0.39016374945640564, "learning_rate": 0.0001, "loss": 0.9511, "step": 835 }, { "epoch": 0.7430285523830685, "grad_norm": 0.44248321652412415, "learning_rate": 0.0001, "loss": 0.9117, "step": 836 }, { "epoch": 0.7439173425174981, "grad_norm": 0.4027865529060364, "learning_rate": 0.0001, "loss": 0.9217, "step": 837 }, { "epoch": 0.7448061326519275, "grad_norm": 0.4021622836589813, "learning_rate": 0.0001, "loss": 0.9234, "step": 838 }, { "epoch": 0.7456949227863571, "grad_norm": 0.4510715901851654, "learning_rate": 0.0001, "loss": 1.0242, "step": 839 }, { "epoch": 0.7465837129207866, "grad_norm": 0.38624054193496704, "learning_rate": 0.0001, "loss": 0.9285, "step": 840 }, { "epoch": 0.7474725030552161, "grad_norm": 0.48192909359931946, "learning_rate": 0.0001, "loss": 1.0023, "step": 841 }, { "epoch": 0.7483612931896456, "grad_norm": 0.4182127118110657, "learning_rate": 0.0001, "loss": 1.04, "step": 842 }, { "epoch": 0.7492500833240751, "grad_norm": 0.3804481029510498, "learning_rate": 0.0001, "loss": 0.9602, "step": 843 }, { "epoch": 0.7501388734585046, "grad_norm": 0.3628024458885193, "learning_rate": 0.0001, "loss": 0.955, "step": 844 }, { "epoch": 0.7510276635929342, "grad_norm": 0.3579307496547699, "learning_rate": 0.0001, "loss": 0.9355, "step": 845 }, { "epoch": 0.7519164537273636, "grad_norm": 0.36626923084259033, "learning_rate": 0.0001, "loss": 0.9324, "step": 846 }, { "epoch": 0.7528052438617931, "grad_norm": 0.37422046065330505, "learning_rate": 0.0001, "loss": 1.0267, "step": 847 }, { "epoch": 0.7536940339962226, "grad_norm": 0.3965827226638794, "learning_rate": 0.0001, "loss": 0.9784, "step": 848 }, { "epoch": 0.7545828241306521, "grad_norm": 0.36925482749938965, "learning_rate": 0.0001, "loss": 0.9801, "step": 849 }, { "epoch": 0.7554716142650817, "grad_norm": 0.372070848941803, "learning_rate": 0.0001, "loss": 0.9474, "step": 850 }, { "epoch": 0.7563604043995111, "grad_norm": 0.4122565984725952, "learning_rate": 0.0001, "loss": 0.9738, "step": 851 }, { "epoch": 0.7572491945339407, "grad_norm": 0.36244523525238037, "learning_rate": 0.0001, "loss": 0.9174, "step": 852 }, { "epoch": 0.7581379846683702, "grad_norm": 0.3738951086997986, "learning_rate": 0.0001, "loss": 0.9236, "step": 853 }, { "epoch": 0.7590267748027997, "grad_norm": 0.3978452980518341, "learning_rate": 0.0001, "loss": 1.0203, "step": 854 }, { "epoch": 0.7599155649372292, "grad_norm": 0.38576042652130127, "learning_rate": 0.0001, "loss": 0.9305, "step": 855 }, { "epoch": 0.7608043550716587, "grad_norm": 0.39469850063323975, "learning_rate": 0.0001, "loss": 0.9165, "step": 856 }, { "epoch": 0.7616931452060882, "grad_norm": 0.4054928719997406, "learning_rate": 0.0001, "loss": 0.8531, "step": 857 }, { "epoch": 0.7625819353405178, "grad_norm": 0.42798909544944763, "learning_rate": 0.0001, "loss": 0.8747, "step": 858 }, { "epoch": 0.7634707254749472, "grad_norm": 0.42001426219940186, "learning_rate": 0.0001, "loss": 0.9606, "step": 859 }, { "epoch": 0.7643595156093768, "grad_norm": 0.3773418366909027, "learning_rate": 0.0001, "loss": 0.9564, "step": 860 }, { "epoch": 0.7652483057438062, "grad_norm": 0.3583545982837677, "learning_rate": 0.0001, "loss": 0.9112, "step": 861 }, { "epoch": 0.7661370958782358, "grad_norm": 0.4381794035434723, "learning_rate": 0.0001, "loss": 1.0014, "step": 862 }, { "epoch": 0.7670258860126653, "grad_norm": 0.40912652015686035, "learning_rate": 0.0001, "loss": 0.9742, "step": 863 }, { "epoch": 0.7679146761470947, "grad_norm": 0.3959810733795166, "learning_rate": 0.0001, "loss": 0.9635, "step": 864 }, { "epoch": 0.7688034662815243, "grad_norm": 0.3853726387023926, "learning_rate": 0.0001, "loss": 0.9379, "step": 865 }, { "epoch": 0.7696922564159537, "grad_norm": 0.4313822388648987, "learning_rate": 0.0001, "loss": 1.0387, "step": 866 }, { "epoch": 0.7705810465503833, "grad_norm": 0.34767740964889526, "learning_rate": 0.0001, "loss": 0.953, "step": 867 }, { "epoch": 0.7714698366848128, "grad_norm": 0.414546936750412, "learning_rate": 0.0001, "loss": 0.9447, "step": 868 }, { "epoch": 0.7723586268192423, "grad_norm": 0.3723791539669037, "learning_rate": 0.0001, "loss": 0.9898, "step": 869 }, { "epoch": 0.7732474169536718, "grad_norm": 0.3587020933628082, "learning_rate": 0.0001, "loss": 0.9793, "step": 870 }, { "epoch": 0.7741362070881013, "grad_norm": 0.37663257122039795, "learning_rate": 0.0001, "loss": 0.898, "step": 871 }, { "epoch": 0.7750249972225308, "grad_norm": 0.4134519398212433, "learning_rate": 0.0001, "loss": 0.8667, "step": 872 }, { "epoch": 0.7759137873569604, "grad_norm": 0.3675340414047241, "learning_rate": 0.0001, "loss": 0.9326, "step": 873 }, { "epoch": 0.7768025774913898, "grad_norm": 0.355490118265152, "learning_rate": 0.0001, "loss": 1.0111, "step": 874 }, { "epoch": 0.7776913676258194, "grad_norm": 0.34475648403167725, "learning_rate": 0.0001, "loss": 0.9417, "step": 875 }, { "epoch": 0.7785801577602489, "grad_norm": 0.3858400285243988, "learning_rate": 0.0001, "loss": 0.9454, "step": 876 }, { "epoch": 0.7794689478946784, "grad_norm": 0.41093963384628296, "learning_rate": 0.0001, "loss": 0.9135, "step": 877 }, { "epoch": 0.7803577380291079, "grad_norm": 0.3808945417404175, "learning_rate": 0.0001, "loss": 1.0214, "step": 878 }, { "epoch": 0.7812465281635373, "grad_norm": 0.3945116400718689, "learning_rate": 0.0001, "loss": 0.8784, "step": 879 }, { "epoch": 0.7821353182979669, "grad_norm": 0.34549954533576965, "learning_rate": 0.0001, "loss": 0.8422, "step": 880 }, { "epoch": 0.7830241084323964, "grad_norm": 0.39158105850219727, "learning_rate": 0.0001, "loss": 0.8915, "step": 881 }, { "epoch": 0.7839128985668259, "grad_norm": 0.41858598589897156, "learning_rate": 0.0001, "loss": 1.0106, "step": 882 }, { "epoch": 0.7848016887012554, "grad_norm": 0.3741881251335144, "learning_rate": 0.0001, "loss": 0.9062, "step": 883 }, { "epoch": 0.7856904788356849, "grad_norm": 0.42688676714897156, "learning_rate": 0.0001, "loss": 1.0078, "step": 884 }, { "epoch": 0.7865792689701144, "grad_norm": 0.4139765799045563, "learning_rate": 0.0001, "loss": 0.9226, "step": 885 }, { "epoch": 0.787468059104544, "grad_norm": 0.34299251437187195, "learning_rate": 0.0001, "loss": 0.8292, "step": 886 }, { "epoch": 0.7883568492389734, "grad_norm": 0.37689974904060364, "learning_rate": 0.0001, "loss": 0.9543, "step": 887 }, { "epoch": 0.789245639373403, "grad_norm": 0.4176059365272522, "learning_rate": 0.0001, "loss": 0.9437, "step": 888 }, { "epoch": 0.7901344295078324, "grad_norm": 0.3841492235660553, "learning_rate": 0.0001, "loss": 1.0038, "step": 889 }, { "epoch": 0.791023219642262, "grad_norm": 0.3431592285633087, "learning_rate": 0.0001, "loss": 0.9337, "step": 890 }, { "epoch": 0.7919120097766915, "grad_norm": 0.4018877148628235, "learning_rate": 0.0001, "loss": 0.9502, "step": 891 }, { "epoch": 0.792800799911121, "grad_norm": 0.4328785836696625, "learning_rate": 0.0001, "loss": 1.0362, "step": 892 }, { "epoch": 0.7936895900455505, "grad_norm": 0.41478991508483887, "learning_rate": 0.0001, "loss": 0.9226, "step": 893 }, { "epoch": 0.79457838017998, "grad_norm": 0.3804970979690552, "learning_rate": 0.0001, "loss": 0.9237, "step": 894 }, { "epoch": 0.7954671703144095, "grad_norm": 0.41232219338417053, "learning_rate": 0.0001, "loss": 0.8931, "step": 895 }, { "epoch": 0.796355960448839, "grad_norm": 0.3760983645915985, "learning_rate": 0.0001, "loss": 0.9839, "step": 896 }, { "epoch": 0.7972447505832685, "grad_norm": 0.3917902410030365, "learning_rate": 0.0001, "loss": 0.8802, "step": 897 }, { "epoch": 0.798133540717698, "grad_norm": 0.3652724623680115, "learning_rate": 0.0001, "loss": 0.9058, "step": 898 }, { "epoch": 0.7990223308521276, "grad_norm": 0.3827455937862396, "learning_rate": 0.0001, "loss": 0.9821, "step": 899 }, { "epoch": 0.799911120986557, "grad_norm": 0.4203377664089203, "learning_rate": 0.0001, "loss": 1.0054, "step": 900 }, { "epoch": 0.8007999111209866, "grad_norm": 0.3544522821903229, "learning_rate": 0.0001, "loss": 1.0044, "step": 901 }, { "epoch": 0.801688701255416, "grad_norm": 0.39687561988830566, "learning_rate": 0.0001, "loss": 0.9896, "step": 902 }, { "epoch": 0.8025774913898456, "grad_norm": 0.3697212040424347, "learning_rate": 0.0001, "loss": 0.9282, "step": 903 }, { "epoch": 0.8034662815242751, "grad_norm": 0.37844914197921753, "learning_rate": 0.0001, "loss": 0.9032, "step": 904 }, { "epoch": 0.8043550716587046, "grad_norm": 0.36820754408836365, "learning_rate": 0.0001, "loss": 0.9314, "step": 905 }, { "epoch": 0.8052438617931341, "grad_norm": 0.4033227264881134, "learning_rate": 0.0001, "loss": 0.9648, "step": 906 }, { "epoch": 0.8061326519275636, "grad_norm": 0.3827173411846161, "learning_rate": 0.0001, "loss": 0.8304, "step": 907 }, { "epoch": 0.8070214420619931, "grad_norm": 0.3617156147956848, "learning_rate": 0.0001, "loss": 0.8478, "step": 908 }, { "epoch": 0.8079102321964227, "grad_norm": 0.38975533843040466, "learning_rate": 0.0001, "loss": 0.8794, "step": 909 }, { "epoch": 0.8087990223308521, "grad_norm": 0.39395904541015625, "learning_rate": 0.0001, "loss": 0.8944, "step": 910 }, { "epoch": 0.8096878124652817, "grad_norm": 0.31278157234191895, "learning_rate": 0.0001, "loss": 0.8736, "step": 911 }, { "epoch": 0.8105766025997111, "grad_norm": 0.3955362141132355, "learning_rate": 0.0001, "loss": 1.012, "step": 912 }, { "epoch": 0.8114653927341406, "grad_norm": 0.41375401616096497, "learning_rate": 0.0001, "loss": 0.9767, "step": 913 }, { "epoch": 0.8123541828685702, "grad_norm": 0.38847875595092773, "learning_rate": 0.0001, "loss": 0.9193, "step": 914 }, { "epoch": 0.8132429730029996, "grad_norm": 0.42238739132881165, "learning_rate": 0.0001, "loss": 1.0099, "step": 915 }, { "epoch": 0.8141317631374292, "grad_norm": 0.3599422872066498, "learning_rate": 0.0001, "loss": 0.8548, "step": 916 }, { "epoch": 0.8150205532718586, "grad_norm": 0.369069367647171, "learning_rate": 0.0001, "loss": 0.9032, "step": 917 }, { "epoch": 0.8159093434062882, "grad_norm": 0.3789387047290802, "learning_rate": 0.0001, "loss": 0.9597, "step": 918 }, { "epoch": 0.8167981335407177, "grad_norm": 0.37680602073669434, "learning_rate": 0.0001, "loss": 0.9284, "step": 919 }, { "epoch": 0.8176869236751472, "grad_norm": 0.4254203140735626, "learning_rate": 0.0001, "loss": 0.9765, "step": 920 }, { "epoch": 0.8185757138095767, "grad_norm": 0.3756076991558075, "learning_rate": 0.0001, "loss": 0.9469, "step": 921 }, { "epoch": 0.8194645039440063, "grad_norm": 0.37767136096954346, "learning_rate": 0.0001, "loss": 0.926, "step": 922 }, { "epoch": 0.8203532940784357, "grad_norm": 0.43672624230384827, "learning_rate": 0.0001, "loss": 0.9305, "step": 923 }, { "epoch": 0.8212420842128653, "grad_norm": 0.36939603090286255, "learning_rate": 0.0001, "loss": 0.8828, "step": 924 }, { "epoch": 0.8221308743472947, "grad_norm": 0.41821223497390747, "learning_rate": 0.0001, "loss": 0.9346, "step": 925 }, { "epoch": 0.8230196644817243, "grad_norm": 0.3761398494243622, "learning_rate": 0.0001, "loss": 0.8826, "step": 926 }, { "epoch": 0.8239084546161538, "grad_norm": 0.4117630124092102, "learning_rate": 0.0001, "loss": 0.9253, "step": 927 }, { "epoch": 0.8247972447505832, "grad_norm": 0.31008240580558777, "learning_rate": 0.0001, "loss": 0.9083, "step": 928 }, { "epoch": 0.8256860348850128, "grad_norm": 0.3615438938140869, "learning_rate": 0.0001, "loss": 0.8833, "step": 929 }, { "epoch": 0.8265748250194422, "grad_norm": 0.3704020082950592, "learning_rate": 0.0001, "loss": 1.0443, "step": 930 }, { "epoch": 0.8274636151538718, "grad_norm": 0.3424414098262787, "learning_rate": 0.0001, "loss": 0.959, "step": 931 }, { "epoch": 0.8283524052883013, "grad_norm": 0.41668984293937683, "learning_rate": 0.0001, "loss": 1.0217, "step": 932 }, { "epoch": 0.8292411954227308, "grad_norm": 0.35622361302375793, "learning_rate": 0.0001, "loss": 0.9546, "step": 933 }, { "epoch": 0.8301299855571603, "grad_norm": 0.36572763323783875, "learning_rate": 0.0001, "loss": 0.9514, "step": 934 }, { "epoch": 0.8310187756915898, "grad_norm": 0.46444347500801086, "learning_rate": 0.0001, "loss": 1.0018, "step": 935 }, { "epoch": 0.8319075658260193, "grad_norm": 0.39167654514312744, "learning_rate": 0.0001, "loss": 0.9338, "step": 936 }, { "epoch": 0.8327963559604489, "grad_norm": 0.4243658781051636, "learning_rate": 0.0001, "loss": 0.9801, "step": 937 }, { "epoch": 0.8336851460948783, "grad_norm": 2.1183691024780273, "learning_rate": 0.0001, "loss": 0.9376, "step": 938 }, { "epoch": 0.8345739362293079, "grad_norm": 0.4078412353992462, "learning_rate": 0.0001, "loss": 0.9533, "step": 939 }, { "epoch": 0.8354627263637374, "grad_norm": 0.36636313796043396, "learning_rate": 0.0001, "loss": 0.993, "step": 940 }, { "epoch": 0.8363515164981669, "grad_norm": 0.42190858721733093, "learning_rate": 0.0001, "loss": 0.9789, "step": 941 }, { "epoch": 0.8372403066325964, "grad_norm": 0.34506893157958984, "learning_rate": 0.0001, "loss": 0.9759, "step": 942 }, { "epoch": 0.8381290967670258, "grad_norm": 0.43273380398750305, "learning_rate": 0.0001, "loss": 0.9753, "step": 943 }, { "epoch": 0.8390178869014554, "grad_norm": 0.36178067326545715, "learning_rate": 0.0001, "loss": 0.9842, "step": 944 }, { "epoch": 0.839906677035885, "grad_norm": 0.3905907869338989, "learning_rate": 0.0001, "loss": 0.9809, "step": 945 }, { "epoch": 0.8407954671703144, "grad_norm": 0.3678842782974243, "learning_rate": 0.0001, "loss": 0.939, "step": 946 }, { "epoch": 0.8416842573047439, "grad_norm": 0.3797389268875122, "learning_rate": 0.0001, "loss": 0.8508, "step": 947 }, { "epoch": 0.8425730474391734, "grad_norm": 0.43192946910858154, "learning_rate": 0.0001, "loss": 0.9417, "step": 948 }, { "epoch": 0.8434618375736029, "grad_norm": 0.4288097620010376, "learning_rate": 0.0001, "loss": 0.9428, "step": 949 }, { "epoch": 0.8443506277080325, "grad_norm": 0.3306112587451935, "learning_rate": 0.0001, "loss": 0.9606, "step": 950 }, { "epoch": 0.8452394178424619, "grad_norm": 0.3389092683792114, "learning_rate": 0.0001, "loss": 0.8848, "step": 951 }, { "epoch": 0.8461282079768915, "grad_norm": 0.38675200939178467, "learning_rate": 0.0001, "loss": 0.9428, "step": 952 }, { "epoch": 0.8470169981113209, "grad_norm": 0.3907453417778015, "learning_rate": 0.0001, "loss": 0.9628, "step": 953 }, { "epoch": 0.8479057882457505, "grad_norm": 0.3519386649131775, "learning_rate": 0.0001, "loss": 0.9262, "step": 954 }, { "epoch": 0.84879457838018, "grad_norm": 0.38831275701522827, "learning_rate": 0.0001, "loss": 0.904, "step": 955 }, { "epoch": 0.8496833685146095, "grad_norm": 0.39892178773880005, "learning_rate": 0.0001, "loss": 0.9897, "step": 956 }, { "epoch": 0.850572158649039, "grad_norm": 0.4237317144870758, "learning_rate": 0.0001, "loss": 0.9679, "step": 957 }, { "epoch": 0.8514609487834685, "grad_norm": 0.35414162278175354, "learning_rate": 0.0001, "loss": 0.9362, "step": 958 }, { "epoch": 0.852349738917898, "grad_norm": 0.34299251437187195, "learning_rate": 0.0001, "loss": 0.9223, "step": 959 }, { "epoch": 0.8532385290523276, "grad_norm": 0.3854987621307373, "learning_rate": 0.0001, "loss": 0.9285, "step": 960 }, { "epoch": 0.854127319186757, "grad_norm": 0.3863328993320465, "learning_rate": 0.0001, "loss": 0.9039, "step": 961 }, { "epoch": 0.8550161093211865, "grad_norm": 0.3316647708415985, "learning_rate": 0.0001, "loss": 0.9445, "step": 962 }, { "epoch": 0.8559048994556161, "grad_norm": 0.39452093839645386, "learning_rate": 0.0001, "loss": 0.8977, "step": 963 }, { "epoch": 0.8567936895900455, "grad_norm": 0.3937751352787018, "learning_rate": 0.0001, "loss": 0.914, "step": 964 }, { "epoch": 0.8576824797244751, "grad_norm": 0.31849080324172974, "learning_rate": 0.0001, "loss": 0.8676, "step": 965 }, { "epoch": 0.8585712698589045, "grad_norm": 0.3330874741077423, "learning_rate": 0.0001, "loss": 0.8927, "step": 966 }, { "epoch": 0.8594600599933341, "grad_norm": 0.3816487789154053, "learning_rate": 0.0001, "loss": 0.9715, "step": 967 }, { "epoch": 0.8603488501277636, "grad_norm": 0.4189969301223755, "learning_rate": 0.0001, "loss": 0.9714, "step": 968 }, { "epoch": 0.8612376402621931, "grad_norm": 0.3965904712677002, "learning_rate": 0.0001, "loss": 0.9532, "step": 969 }, { "epoch": 0.8621264303966226, "grad_norm": 0.3900628089904785, "learning_rate": 0.0001, "loss": 0.9577, "step": 970 }, { "epoch": 0.8630152205310521, "grad_norm": 0.43104588985443115, "learning_rate": 0.0001, "loss": 1.0495, "step": 971 }, { "epoch": 0.8639040106654816, "grad_norm": 0.39500030875205994, "learning_rate": 0.0001, "loss": 0.9637, "step": 972 }, { "epoch": 0.8647928007999112, "grad_norm": 0.3971693813800812, "learning_rate": 0.0001, "loss": 1.0178, "step": 973 }, { "epoch": 0.8656815909343406, "grad_norm": 0.3862766921520233, "learning_rate": 0.0001, "loss": 1.0042, "step": 974 }, { "epoch": 0.8665703810687702, "grad_norm": 0.3328218162059784, "learning_rate": 0.0001, "loss": 0.9846, "step": 975 }, { "epoch": 0.8674591712031996, "grad_norm": 0.36906227469444275, "learning_rate": 0.0001, "loss": 0.9593, "step": 976 }, { "epoch": 0.8683479613376291, "grad_norm": 0.40187719464302063, "learning_rate": 0.0001, "loss": 0.97, "step": 977 }, { "epoch": 0.8692367514720587, "grad_norm": 0.3088963031768799, "learning_rate": 0.0001, "loss": 0.8898, "step": 978 }, { "epoch": 0.8701255416064881, "grad_norm": 0.4896550178527832, "learning_rate": 0.0001, "loss": 0.9039, "step": 979 }, { "epoch": 0.8710143317409177, "grad_norm": 0.39287492632865906, "learning_rate": 0.0001, "loss": 0.8972, "step": 980 }, { "epoch": 0.8719031218753471, "grad_norm": 0.3530255854129791, "learning_rate": 0.0001, "loss": 0.949, "step": 981 }, { "epoch": 0.8727919120097767, "grad_norm": 0.4530492126941681, "learning_rate": 0.0001, "loss": 1.0146, "step": 982 }, { "epoch": 0.8736807021442062, "grad_norm": 0.3938184082508087, "learning_rate": 0.0001, "loss": 0.9926, "step": 983 }, { "epoch": 0.8745694922786357, "grad_norm": 0.41301020979881287, "learning_rate": 0.0001, "loss": 0.9626, "step": 984 }, { "epoch": 0.8754582824130652, "grad_norm": 0.5657033920288086, "learning_rate": 0.0001, "loss": 0.9557, "step": 985 }, { "epoch": 0.8763470725474948, "grad_norm": 0.39124661684036255, "learning_rate": 0.0001, "loss": 0.9334, "step": 986 }, { "epoch": 0.8772358626819242, "grad_norm": 0.367252916097641, "learning_rate": 0.0001, "loss": 0.9568, "step": 987 }, { "epoch": 0.8781246528163538, "grad_norm": 0.45111268758773804, "learning_rate": 0.0001, "loss": 0.9269, "step": 988 }, { "epoch": 0.8790134429507832, "grad_norm": 0.3949245810508728, "learning_rate": 0.0001, "loss": 0.9541, "step": 989 }, { "epoch": 0.8799022330852128, "grad_norm": 0.3697701096534729, "learning_rate": 0.0001, "loss": 0.858, "step": 990 }, { "epoch": 0.8807910232196423, "grad_norm": 0.3903849720954895, "learning_rate": 0.0001, "loss": 0.8917, "step": 991 }, { "epoch": 0.8816798133540718, "grad_norm": 0.3852884769439697, "learning_rate": 0.0001, "loss": 0.9524, "step": 992 }, { "epoch": 0.8825686034885013, "grad_norm": 0.425076425075531, "learning_rate": 0.0001, "loss": 0.9229, "step": 993 }, { "epoch": 0.8834573936229307, "grad_norm": 0.3908687233924866, "learning_rate": 0.0001, "loss": 0.8766, "step": 994 }, { "epoch": 0.8843461837573603, "grad_norm": 0.3858028054237366, "learning_rate": 0.0001, "loss": 0.9996, "step": 995 }, { "epoch": 0.8852349738917898, "grad_norm": 0.4030248820781708, "learning_rate": 0.0001, "loss": 1.0158, "step": 996 }, { "epoch": 0.8861237640262193, "grad_norm": 0.3604998290538788, "learning_rate": 0.0001, "loss": 0.8926, "step": 997 }, { "epoch": 0.8870125541606488, "grad_norm": 0.38563668727874756, "learning_rate": 0.0001, "loss": 0.8513, "step": 998 }, { "epoch": 0.8879013442950783, "grad_norm": 0.40275925397872925, "learning_rate": 0.0001, "loss": 1.0191, "step": 999 }, { "epoch": 0.8887901344295078, "grad_norm": 0.3867271840572357, "learning_rate": 0.0001, "loss": 0.8962, "step": 1000 }, { "epoch": 0.8896789245639374, "grad_norm": 0.39497697353363037, "learning_rate": 0.0001, "loss": 0.983, "step": 1001 }, { "epoch": 0.8905677146983668, "grad_norm": 0.3594026267528534, "learning_rate": 0.0001, "loss": 0.9519, "step": 1002 }, { "epoch": 0.8914565048327964, "grad_norm": 0.3522585332393646, "learning_rate": 0.0001, "loss": 0.9274, "step": 1003 }, { "epoch": 0.8923452949672258, "grad_norm": 0.41415807604789734, "learning_rate": 0.0001, "loss": 0.9674, "step": 1004 }, { "epoch": 0.8932340851016554, "grad_norm": 0.46825456619262695, "learning_rate": 0.0001, "loss": 0.9694, "step": 1005 }, { "epoch": 0.8941228752360849, "grad_norm": 0.3260214328765869, "learning_rate": 0.0001, "loss": 0.9656, "step": 1006 }, { "epoch": 0.8950116653705144, "grad_norm": 0.36279264092445374, "learning_rate": 0.0001, "loss": 0.9663, "step": 1007 }, { "epoch": 0.8959004555049439, "grad_norm": 0.4032383859157562, "learning_rate": 0.0001, "loss": 0.9276, "step": 1008 }, { "epoch": 0.8967892456393735, "grad_norm": 0.4371550679206848, "learning_rate": 0.0001, "loss": 1.0294, "step": 1009 }, { "epoch": 0.8976780357738029, "grad_norm": 0.36339619755744934, "learning_rate": 0.0001, "loss": 0.9453, "step": 1010 }, { "epoch": 0.8985668259082324, "grad_norm": 0.38631105422973633, "learning_rate": 0.0001, "loss": 0.9966, "step": 1011 }, { "epoch": 0.8994556160426619, "grad_norm": 0.34750688076019287, "learning_rate": 0.0001, "loss": 0.8757, "step": 1012 }, { "epoch": 0.9003444061770914, "grad_norm": 0.3377476632595062, "learning_rate": 0.0001, "loss": 0.973, "step": 1013 }, { "epoch": 0.901233196311521, "grad_norm": 0.39975231885910034, "learning_rate": 0.0001, "loss": 0.9392, "step": 1014 }, { "epoch": 0.9021219864459504, "grad_norm": 0.33771857619285583, "learning_rate": 0.0001, "loss": 0.9934, "step": 1015 }, { "epoch": 0.90301077658038, "grad_norm": 0.33681410551071167, "learning_rate": 0.0001, "loss": 0.9024, "step": 1016 }, { "epoch": 0.9038995667148094, "grad_norm": 0.39895641803741455, "learning_rate": 0.0001, "loss": 0.9482, "step": 1017 }, { "epoch": 0.904788356849239, "grad_norm": 0.3800975978374481, "learning_rate": 0.0001, "loss": 0.9504, "step": 1018 }, { "epoch": 0.9056771469836685, "grad_norm": 0.3816840350627899, "learning_rate": 0.0001, "loss": 0.9613, "step": 1019 }, { "epoch": 0.906565937118098, "grad_norm": 0.3548993766307831, "learning_rate": 0.0001, "loss": 0.9234, "step": 1020 }, { "epoch": 0.9074547272525275, "grad_norm": 0.38023990392684937, "learning_rate": 0.0001, "loss": 0.9697, "step": 1021 }, { "epoch": 0.908343517386957, "grad_norm": 0.35391300916671753, "learning_rate": 0.0001, "loss": 0.8196, "step": 1022 }, { "epoch": 0.9092323075213865, "grad_norm": 0.4617893397808075, "learning_rate": 0.0001, "loss": 1.0236, "step": 1023 }, { "epoch": 0.9101210976558161, "grad_norm": 0.3808736503124237, "learning_rate": 0.0001, "loss": 0.9539, "step": 1024 }, { "epoch": 0.9110098877902455, "grad_norm": 0.41231459379196167, "learning_rate": 0.0001, "loss": 0.9251, "step": 1025 }, { "epoch": 0.911898677924675, "grad_norm": 0.34969326853752136, "learning_rate": 0.0001, "loss": 0.9151, "step": 1026 }, { "epoch": 0.9127874680591045, "grad_norm": 0.3669031262397766, "learning_rate": 0.0001, "loss": 0.8832, "step": 1027 }, { "epoch": 0.913676258193534, "grad_norm": 0.3683227598667145, "learning_rate": 0.0001, "loss": 0.9324, "step": 1028 }, { "epoch": 0.9145650483279636, "grad_norm": 0.3768469989299774, "learning_rate": 0.0001, "loss": 0.8713, "step": 1029 }, { "epoch": 0.915453838462393, "grad_norm": 0.40044888854026794, "learning_rate": 0.0001, "loss": 1.0428, "step": 1030 }, { "epoch": 0.9163426285968226, "grad_norm": 0.3713497519493103, "learning_rate": 0.0001, "loss": 0.8803, "step": 1031 }, { "epoch": 0.9172314187312521, "grad_norm": 0.33332690596580505, "learning_rate": 0.0001, "loss": 0.9579, "step": 1032 }, { "epoch": 0.9181202088656816, "grad_norm": 0.3899736702442169, "learning_rate": 0.0001, "loss": 0.9934, "step": 1033 }, { "epoch": 0.9190089990001111, "grad_norm": 0.3549093008041382, "learning_rate": 0.0001, "loss": 0.9093, "step": 1034 }, { "epoch": 0.9198977891345406, "grad_norm": 0.37367865443229675, "learning_rate": 0.0001, "loss": 0.9748, "step": 1035 }, { "epoch": 0.9207865792689701, "grad_norm": 0.3680754601955414, "learning_rate": 0.0001, "loss": 1.0113, "step": 1036 }, { "epoch": 0.9216753694033997, "grad_norm": 0.36346691846847534, "learning_rate": 0.0001, "loss": 0.8892, "step": 1037 }, { "epoch": 0.9225641595378291, "grad_norm": 0.3745006322860718, "learning_rate": 0.0001, "loss": 0.9352, "step": 1038 }, { "epoch": 0.9234529496722587, "grad_norm": 0.3873980641365051, "learning_rate": 0.0001, "loss": 0.8809, "step": 1039 }, { "epoch": 0.9243417398066881, "grad_norm": 0.42303210496902466, "learning_rate": 0.0001, "loss": 1.0008, "step": 1040 }, { "epoch": 0.9252305299411177, "grad_norm": 0.38883471488952637, "learning_rate": 0.0001, "loss": 0.9839, "step": 1041 }, { "epoch": 0.9261193200755472, "grad_norm": 0.39919066429138184, "learning_rate": 0.0001, "loss": 0.9246, "step": 1042 }, { "epoch": 0.9270081102099766, "grad_norm": 0.3721344769001007, "learning_rate": 0.0001, "loss": 0.8927, "step": 1043 }, { "epoch": 0.9278969003444062, "grad_norm": 0.4106079638004303, "learning_rate": 0.0001, "loss": 0.9501, "step": 1044 }, { "epoch": 0.9287856904788356, "grad_norm": 0.35994967818260193, "learning_rate": 0.0001, "loss": 0.9483, "step": 1045 }, { "epoch": 0.9296744806132652, "grad_norm": 0.3131955862045288, "learning_rate": 0.0001, "loss": 0.9175, "step": 1046 }, { "epoch": 0.9305632707476947, "grad_norm": 0.37486889958381653, "learning_rate": 0.0001, "loss": 0.9459, "step": 1047 }, { "epoch": 0.9314520608821242, "grad_norm": 0.36107298731803894, "learning_rate": 0.0001, "loss": 0.9201, "step": 1048 }, { "epoch": 0.9323408510165537, "grad_norm": 0.3575218915939331, "learning_rate": 0.0001, "loss": 0.9254, "step": 1049 }, { "epoch": 0.9332296411509832, "grad_norm": 0.36310702562332153, "learning_rate": 0.0001, "loss": 0.9542, "step": 1050 }, { "epoch": 0.9341184312854127, "grad_norm": 0.3741386830806732, "learning_rate": 0.0001, "loss": 0.8913, "step": 1051 }, { "epoch": 0.9350072214198423, "grad_norm": 0.3636328876018524, "learning_rate": 0.0001, "loss": 0.9218, "step": 1052 }, { "epoch": 0.9358960115542717, "grad_norm": 0.3283315598964691, "learning_rate": 0.0001, "loss": 0.9341, "step": 1053 }, { "epoch": 0.9367848016887013, "grad_norm": 0.4130898118019104, "learning_rate": 0.0001, "loss": 0.9666, "step": 1054 }, { "epoch": 0.9376735918231308, "grad_norm": 0.3228664696216583, "learning_rate": 0.0001, "loss": 0.8924, "step": 1055 }, { "epoch": 0.9385623819575603, "grad_norm": 0.42613714933395386, "learning_rate": 0.0001, "loss": 0.9347, "step": 1056 }, { "epoch": 0.9394511720919898, "grad_norm": 0.4059881567955017, "learning_rate": 0.0001, "loss": 0.906, "step": 1057 }, { "epoch": 0.9403399622264192, "grad_norm": 0.4185310900211334, "learning_rate": 0.0001, "loss": 1.0162, "step": 1058 }, { "epoch": 0.9412287523608488, "grad_norm": 0.38086622953414917, "learning_rate": 0.0001, "loss": 1.0297, "step": 1059 }, { "epoch": 0.9421175424952783, "grad_norm": 0.3949964642524719, "learning_rate": 0.0001, "loss": 0.9184, "step": 1060 }, { "epoch": 0.9430063326297078, "grad_norm": 0.3886110186576843, "learning_rate": 0.0001, "loss": 0.9339, "step": 1061 }, { "epoch": 0.9438951227641373, "grad_norm": 0.37949997186660767, "learning_rate": 0.0001, "loss": 0.9141, "step": 1062 }, { "epoch": 0.9447839128985668, "grad_norm": 0.34117576479911804, "learning_rate": 0.0001, "loss": 0.9153, "step": 1063 }, { "epoch": 0.9456727030329963, "grad_norm": 0.36747774481773376, "learning_rate": 0.0001, "loss": 0.9275, "step": 1064 }, { "epoch": 0.9465614931674259, "grad_norm": 0.3943864703178406, "learning_rate": 0.0001, "loss": 0.8985, "step": 1065 }, { "epoch": 0.9474502833018553, "grad_norm": 0.3849271237850189, "learning_rate": 0.0001, "loss": 0.956, "step": 1066 }, { "epoch": 0.9483390734362849, "grad_norm": 0.3633384108543396, "learning_rate": 0.0001, "loss": 0.9836, "step": 1067 }, { "epoch": 0.9492278635707143, "grad_norm": 0.3905209004878998, "learning_rate": 0.0001, "loss": 0.9753, "step": 1068 }, { "epoch": 0.9501166537051439, "grad_norm": 0.36972206830978394, "learning_rate": 0.0001, "loss": 0.9108, "step": 1069 }, { "epoch": 0.9510054438395734, "grad_norm": 0.38415467739105225, "learning_rate": 0.0001, "loss": 0.9539, "step": 1070 }, { "epoch": 0.9518942339740029, "grad_norm": 0.3597318232059479, "learning_rate": 0.0001, "loss": 0.8953, "step": 1071 }, { "epoch": 0.9527830241084324, "grad_norm": 0.374531090259552, "learning_rate": 0.0001, "loss": 0.981, "step": 1072 }, { "epoch": 0.953671814242862, "grad_norm": 0.3501724302768707, "learning_rate": 0.0001, "loss": 0.9589, "step": 1073 }, { "epoch": 0.9545606043772914, "grad_norm": 0.3414580821990967, "learning_rate": 0.0001, "loss": 0.901, "step": 1074 }, { "epoch": 0.955449394511721, "grad_norm": 0.3523072600364685, "learning_rate": 0.0001, "loss": 0.901, "step": 1075 }, { "epoch": 0.9563381846461504, "grad_norm": 0.38694173097610474, "learning_rate": 0.0001, "loss": 0.9884, "step": 1076 }, { "epoch": 0.9572269747805799, "grad_norm": 0.41436851024627686, "learning_rate": 0.0001, "loss": 0.9123, "step": 1077 }, { "epoch": 0.9581157649150095, "grad_norm": 0.34676891565322876, "learning_rate": 0.0001, "loss": 0.9113, "step": 1078 }, { "epoch": 0.9590045550494389, "grad_norm": 0.3826766908168793, "learning_rate": 0.0001, "loss": 0.9213, "step": 1079 }, { "epoch": 0.9598933451838685, "grad_norm": 0.34997478127479553, "learning_rate": 0.0001, "loss": 0.9178, "step": 1080 }, { "epoch": 0.9607821353182979, "grad_norm": 0.40335193276405334, "learning_rate": 0.0001, "loss": 1.0525, "step": 1081 }, { "epoch": 0.9616709254527275, "grad_norm": 0.4107684791088104, "learning_rate": 0.0001, "loss": 1.0239, "step": 1082 }, { "epoch": 0.962559715587157, "grad_norm": 0.3917597830295563, "learning_rate": 0.0001, "loss": 0.8393, "step": 1083 }, { "epoch": 0.9634485057215865, "grad_norm": 0.3460274040699005, "learning_rate": 0.0001, "loss": 0.9266, "step": 1084 }, { "epoch": 0.964337295856016, "grad_norm": 0.36212509870529175, "learning_rate": 0.0001, "loss": 0.9578, "step": 1085 }, { "epoch": 0.9652260859904455, "grad_norm": 0.4016794264316559, "learning_rate": 0.0001, "loss": 0.9461, "step": 1086 }, { "epoch": 0.966114876124875, "grad_norm": 0.3891691267490387, "learning_rate": 0.0001, "loss": 0.9576, "step": 1087 }, { "epoch": 0.9670036662593046, "grad_norm": 0.35440635681152344, "learning_rate": 0.0001, "loss": 0.9546, "step": 1088 }, { "epoch": 0.967892456393734, "grad_norm": 0.3960229158401489, "learning_rate": 0.0001, "loss": 1.0514, "step": 1089 }, { "epoch": 0.9687812465281636, "grad_norm": 0.38155800104141235, "learning_rate": 0.0001, "loss": 0.8884, "step": 1090 }, { "epoch": 0.969670036662593, "grad_norm": 0.3513757884502411, "learning_rate": 0.0001, "loss": 0.8801, "step": 1091 }, { "epoch": 0.9705588267970225, "grad_norm": 0.38408926129341125, "learning_rate": 0.0001, "loss": 0.8309, "step": 1092 }, { "epoch": 0.9714476169314521, "grad_norm": 0.378099262714386, "learning_rate": 0.0001, "loss": 0.9089, "step": 1093 }, { "epoch": 0.9723364070658815, "grad_norm": 0.3182670474052429, "learning_rate": 0.0001, "loss": 0.9489, "step": 1094 }, { "epoch": 0.9732251972003111, "grad_norm": 0.3988575041294098, "learning_rate": 0.0001, "loss": 0.9364, "step": 1095 }, { "epoch": 0.9741139873347406, "grad_norm": 0.4182455837726593, "learning_rate": 0.0001, "loss": 1.0097, "step": 1096 }, { "epoch": 0.9750027774691701, "grad_norm": 0.3374396562576294, "learning_rate": 0.0001, "loss": 0.9467, "step": 1097 }, { "epoch": 0.9758915676035996, "grad_norm": 0.36527591943740845, "learning_rate": 0.0001, "loss": 0.9365, "step": 1098 }, { "epoch": 0.9767803577380291, "grad_norm": 0.3679245710372925, "learning_rate": 0.0001, "loss": 0.9387, "step": 1099 }, { "epoch": 0.9776691478724586, "grad_norm": 0.3694630265235901, "learning_rate": 0.0001, "loss": 0.8805, "step": 1100 }, { "epoch": 0.9785579380068882, "grad_norm": 0.3750841021537781, "learning_rate": 0.0001, "loss": 0.9024, "step": 1101 }, { "epoch": 0.9794467281413176, "grad_norm": 0.4152679145336151, "learning_rate": 0.0001, "loss": 1.0473, "step": 1102 }, { "epoch": 0.9803355182757472, "grad_norm": 0.3526078164577484, "learning_rate": 0.0001, "loss": 0.9227, "step": 1103 }, { "epoch": 0.9812243084101766, "grad_norm": 0.3585357069969177, "learning_rate": 0.0001, "loss": 0.961, "step": 1104 }, { "epoch": 0.9821130985446062, "grad_norm": 0.38510292768478394, "learning_rate": 0.0001, "loss": 0.9997, "step": 1105 }, { "epoch": 0.9830018886790357, "grad_norm": 0.34511297941207886, "learning_rate": 0.0001, "loss": 0.895, "step": 1106 }, { "epoch": 0.9838906788134651, "grad_norm": 0.36190247535705566, "learning_rate": 0.0001, "loss": 0.9789, "step": 1107 }, { "epoch": 0.9847794689478947, "grad_norm": 0.3606034219264984, "learning_rate": 0.0001, "loss": 0.9512, "step": 1108 }, { "epoch": 0.9856682590823241, "grad_norm": 0.31674253940582275, "learning_rate": 0.0001, "loss": 0.9767, "step": 1109 }, { "epoch": 0.9865570492167537, "grad_norm": 0.3847641944885254, "learning_rate": 0.0001, "loss": 0.9811, "step": 1110 }, { "epoch": 0.9874458393511832, "grad_norm": 0.28332382440567017, "learning_rate": 0.0001, "loss": 0.9064, "step": 1111 }, { "epoch": 0.9883346294856127, "grad_norm": 0.3163788318634033, "learning_rate": 0.0001, "loss": 1.0134, "step": 1112 }, { "epoch": 0.9892234196200422, "grad_norm": 0.37489238381385803, "learning_rate": 0.0001, "loss": 0.9254, "step": 1113 }, { "epoch": 0.9901122097544717, "grad_norm": 0.3543959856033325, "learning_rate": 0.0001, "loss": 0.9415, "step": 1114 }, { "epoch": 0.9910009998889012, "grad_norm": 0.42274850606918335, "learning_rate": 0.0001, "loss": 0.9142, "step": 1115 }, { "epoch": 0.9918897900233308, "grad_norm": 0.38619187474250793, "learning_rate": 0.0001, "loss": 0.9387, "step": 1116 }, { "epoch": 0.9927785801577602, "grad_norm": 0.379362553358078, "learning_rate": 0.0001, "loss": 0.9879, "step": 1117 }, { "epoch": 0.9936673702921898, "grad_norm": 0.4227985441684723, "learning_rate": 0.0001, "loss": 1.0083, "step": 1118 }, { "epoch": 0.9945561604266193, "grad_norm": 0.4064882695674896, "learning_rate": 0.0001, "loss": 0.9726, "step": 1119 }, { "epoch": 0.9954449505610488, "grad_norm": 0.35058096051216125, "learning_rate": 0.0001, "loss": 0.9862, "step": 1120 }, { "epoch": 0.9963337406954783, "grad_norm": 0.3268047869205475, "learning_rate": 0.0001, "loss": 0.8753, "step": 1121 }, { "epoch": 0.9972225308299077, "grad_norm": 0.3554556667804718, "learning_rate": 0.0001, "loss": 1.0007, "step": 1122 }, { "epoch": 0.9981113209643373, "grad_norm": 0.368964284658432, "learning_rate": 0.0001, "loss": 0.9934, "step": 1123 }, { "epoch": 0.9990001110987669, "grad_norm": 0.39982903003692627, "learning_rate": 0.0001, "loss": 0.8933, "step": 1124 }, { "epoch": 0.9998889012331963, "grad_norm": 0.3517761528491974, "learning_rate": 0.0001, "loss": 0.9454, "step": 1125 }, { "epoch": 1.0007776913676258, "grad_norm": 0.3954550623893738, "learning_rate": 0.0001, "loss": 0.9681, "step": 1126 }, { "epoch": 1.0016664815020553, "grad_norm": 0.377174973487854, "learning_rate": 0.0001, "loss": 0.8917, "step": 1127 }, { "epoch": 1.002555271636485, "grad_norm": 0.4547029435634613, "learning_rate": 0.0001, "loss": 0.8837, "step": 1128 }, { "epoch": 1.0034440617709144, "grad_norm": 0.4161812961101532, "learning_rate": 0.0001, "loss": 1.0238, "step": 1129 }, { "epoch": 1.0043328519053438, "grad_norm": 0.398183673620224, "learning_rate": 0.0001, "loss": 0.9245, "step": 1130 }, { "epoch": 1.0052216420397733, "grad_norm": 0.41381561756134033, "learning_rate": 0.0001, "loss": 0.909, "step": 1131 }, { "epoch": 1.006110432174203, "grad_norm": 0.37161487340927124, "learning_rate": 0.0001, "loss": 0.9355, "step": 1132 }, { "epoch": 1.0069992223086324, "grad_norm": 0.378110408782959, "learning_rate": 0.0001, "loss": 0.973, "step": 1133 }, { "epoch": 1.0078880124430618, "grad_norm": 0.3747970163822174, "learning_rate": 0.0001, "loss": 0.8684, "step": 1134 }, { "epoch": 1.0087768025774915, "grad_norm": 0.3934114873409271, "learning_rate": 0.0001, "loss": 0.9361, "step": 1135 }, { "epoch": 1.009665592711921, "grad_norm": 0.41165485978126526, "learning_rate": 0.0001, "loss": 0.9385, "step": 1136 }, { "epoch": 1.0105543828463504, "grad_norm": 0.6741541624069214, "learning_rate": 0.0001, "loss": 0.8731, "step": 1137 }, { "epoch": 1.01144317298078, "grad_norm": 0.3466031551361084, "learning_rate": 0.0001, "loss": 0.8852, "step": 1138 }, { "epoch": 1.0123319631152095, "grad_norm": 0.3946625590324402, "learning_rate": 0.0001, "loss": 0.9609, "step": 1139 }, { "epoch": 1.013220753249639, "grad_norm": 0.3739851713180542, "learning_rate": 0.0001, "loss": 0.7588, "step": 1140 }, { "epoch": 1.0141095433840683, "grad_norm": 0.3292269706726074, "learning_rate": 0.0001, "loss": 0.9439, "step": 1141 }, { "epoch": 1.014998333518498, "grad_norm": 0.4359779953956604, "learning_rate": 0.0001, "loss": 0.9791, "step": 1142 }, { "epoch": 1.0158871236529274, "grad_norm": 0.392539381980896, "learning_rate": 0.0001, "loss": 0.8939, "step": 1143 }, { "epoch": 1.0167759137873569, "grad_norm": 0.36536142230033875, "learning_rate": 0.0001, "loss": 0.9454, "step": 1144 }, { "epoch": 1.0176647039217865, "grad_norm": 0.4121522903442383, "learning_rate": 0.0001, "loss": 0.951, "step": 1145 }, { "epoch": 1.018553494056216, "grad_norm": 0.3721693456172943, "learning_rate": 0.0001, "loss": 0.9443, "step": 1146 }, { "epoch": 1.0194422841906454, "grad_norm": 0.42354851961135864, "learning_rate": 0.0001, "loss": 0.903, "step": 1147 }, { "epoch": 1.020331074325075, "grad_norm": 0.3700781464576721, "learning_rate": 0.0001, "loss": 0.9729, "step": 1148 }, { "epoch": 1.0212198644595045, "grad_norm": 0.37510499358177185, "learning_rate": 0.0001, "loss": 0.9149, "step": 1149 }, { "epoch": 1.022108654593934, "grad_norm": 0.3892500400543213, "learning_rate": 0.0001, "loss": 0.9894, "step": 1150 }, { "epoch": 1.0229974447283636, "grad_norm": 0.4017266631126404, "learning_rate": 0.0001, "loss": 0.9369, "step": 1151 }, { "epoch": 1.023886234862793, "grad_norm": 0.37094780802726746, "learning_rate": 0.0001, "loss": 1.0178, "step": 1152 }, { "epoch": 1.0247750249972225, "grad_norm": 0.3605554401874542, "learning_rate": 0.0001, "loss": 0.8953, "step": 1153 }, { "epoch": 1.025663815131652, "grad_norm": 0.3322463035583496, "learning_rate": 0.0001, "loss": 0.8531, "step": 1154 }, { "epoch": 1.0265526052660816, "grad_norm": 0.35462141036987305, "learning_rate": 0.0001, "loss": 0.9225, "step": 1155 }, { "epoch": 1.027441395400511, "grad_norm": 0.3974057137966156, "learning_rate": 0.0001, "loss": 0.9839, "step": 1156 }, { "epoch": 1.0283301855349405, "grad_norm": 0.3556059002876282, "learning_rate": 0.0001, "loss": 0.9588, "step": 1157 }, { "epoch": 1.0292189756693702, "grad_norm": 0.37689536809921265, "learning_rate": 0.0001, "loss": 0.923, "step": 1158 }, { "epoch": 1.0301077658037996, "grad_norm": 0.36872097849845886, "learning_rate": 0.0001, "loss": 0.9578, "step": 1159 }, { "epoch": 1.030996555938229, "grad_norm": 0.44219520688056946, "learning_rate": 0.0001, "loss": 1.0076, "step": 1160 }, { "epoch": 1.0318853460726587, "grad_norm": 0.3993608355522156, "learning_rate": 0.0001, "loss": 0.9316, "step": 1161 }, { "epoch": 1.0327741362070881, "grad_norm": 0.3941507041454315, "learning_rate": 0.0001, "loss": 0.9119, "step": 1162 }, { "epoch": 1.0336629263415176, "grad_norm": 0.43561699986457825, "learning_rate": 0.0001, "loss": 0.8983, "step": 1163 }, { "epoch": 1.034551716475947, "grad_norm": 0.3520337641239166, "learning_rate": 0.0001, "loss": 0.8171, "step": 1164 }, { "epoch": 1.0354405066103767, "grad_norm": 0.376632958650589, "learning_rate": 0.0001, "loss": 0.8648, "step": 1165 }, { "epoch": 1.0363292967448061, "grad_norm": 0.6677507162094116, "learning_rate": 0.0001, "loss": 0.9802, "step": 1166 }, { "epoch": 1.0372180868792356, "grad_norm": 0.3786594271659851, "learning_rate": 0.0001, "loss": 1.0064, "step": 1167 }, { "epoch": 1.0381068770136652, "grad_norm": 0.3683249056339264, "learning_rate": 0.0001, "loss": 0.8562, "step": 1168 }, { "epoch": 1.0389956671480947, "grad_norm": 0.38416755199432373, "learning_rate": 0.0001, "loss": 0.9727, "step": 1169 }, { "epoch": 1.039884457282524, "grad_norm": 0.3452812135219574, "learning_rate": 0.0001, "loss": 0.8639, "step": 1170 }, { "epoch": 1.0407732474169538, "grad_norm": 0.3969337046146393, "learning_rate": 0.0001, "loss": 0.9049, "step": 1171 }, { "epoch": 1.0416620375513832, "grad_norm": 0.3651241362094879, "learning_rate": 0.0001, "loss": 0.8987, "step": 1172 }, { "epoch": 1.0425508276858126, "grad_norm": 0.4599679410457611, "learning_rate": 0.0001, "loss": 1.0484, "step": 1173 }, { "epoch": 1.0434396178202423, "grad_norm": 0.4104447066783905, "learning_rate": 0.0001, "loss": 0.9513, "step": 1174 }, { "epoch": 1.0443284079546717, "grad_norm": 0.39843717217445374, "learning_rate": 0.0001, "loss": 1.0321, "step": 1175 }, { "epoch": 1.0452171980891012, "grad_norm": 0.3388933837413788, "learning_rate": 0.0001, "loss": 0.8599, "step": 1176 }, { "epoch": 1.0461059882235306, "grad_norm": 0.3476930856704712, "learning_rate": 0.0001, "loss": 0.8383, "step": 1177 }, { "epoch": 1.0469947783579603, "grad_norm": 0.37614160776138306, "learning_rate": 0.0001, "loss": 0.9211, "step": 1178 }, { "epoch": 1.0478835684923897, "grad_norm": 0.37879347801208496, "learning_rate": 0.0001, "loss": 0.9369, "step": 1179 }, { "epoch": 1.0487723586268192, "grad_norm": 0.3917993903160095, "learning_rate": 0.0001, "loss": 0.9324, "step": 1180 }, { "epoch": 1.0496611487612488, "grad_norm": 0.32946088910102844, "learning_rate": 0.0001, "loss": 0.913, "step": 1181 }, { "epoch": 1.0505499388956783, "grad_norm": 0.362324595451355, "learning_rate": 0.0001, "loss": 0.9393, "step": 1182 }, { "epoch": 1.0514387290301077, "grad_norm": 0.411262184381485, "learning_rate": 0.0001, "loss": 0.9152, "step": 1183 }, { "epoch": 1.0523275191645374, "grad_norm": 0.3655692934989929, "learning_rate": 0.0001, "loss": 0.8978, "step": 1184 }, { "epoch": 1.0532163092989668, "grad_norm": 0.4027964174747467, "learning_rate": 0.0001, "loss": 0.954, "step": 1185 }, { "epoch": 1.0541050994333963, "grad_norm": 0.362482488155365, "learning_rate": 0.0001, "loss": 0.8665, "step": 1186 }, { "epoch": 1.0549938895678257, "grad_norm": 0.43920066952705383, "learning_rate": 0.0001, "loss": 0.9462, "step": 1187 }, { "epoch": 1.0558826797022554, "grad_norm": 0.4220573604106903, "learning_rate": 0.0001, "loss": 1.0071, "step": 1188 }, { "epoch": 1.0567714698366848, "grad_norm": 0.3542270064353943, "learning_rate": 0.0001, "loss": 0.8843, "step": 1189 }, { "epoch": 1.0576602599711142, "grad_norm": 0.3698994815349579, "learning_rate": 0.0001, "loss": 0.883, "step": 1190 }, { "epoch": 1.058549050105544, "grad_norm": 0.39016515016555786, "learning_rate": 0.0001, "loss": 1.0091, "step": 1191 }, { "epoch": 1.0594378402399733, "grad_norm": 0.377370685338974, "learning_rate": 0.0001, "loss": 0.9807, "step": 1192 }, { "epoch": 1.0603266303744028, "grad_norm": 0.34474626183509827, "learning_rate": 0.0001, "loss": 0.9282, "step": 1193 }, { "epoch": 1.0612154205088324, "grad_norm": 0.35169529914855957, "learning_rate": 0.0001, "loss": 0.9907, "step": 1194 }, { "epoch": 1.0621042106432619, "grad_norm": 0.3528148829936981, "learning_rate": 0.0001, "loss": 0.8829, "step": 1195 }, { "epoch": 1.0629930007776913, "grad_norm": 0.382842481136322, "learning_rate": 0.0001, "loss": 1.0002, "step": 1196 }, { "epoch": 1.063881790912121, "grad_norm": 0.35213178396224976, "learning_rate": 0.0001, "loss": 0.8423, "step": 1197 }, { "epoch": 1.0647705810465504, "grad_norm": 0.3510130047798157, "learning_rate": 0.0001, "loss": 0.8727, "step": 1198 }, { "epoch": 1.0656593711809799, "grad_norm": 0.3832014501094818, "learning_rate": 0.0001, "loss": 0.9299, "step": 1199 }, { "epoch": 1.0665481613154093, "grad_norm": 0.3763968050479889, "learning_rate": 0.0001, "loss": 0.9403, "step": 1200 }, { "epoch": 1.067436951449839, "grad_norm": 0.4154573678970337, "learning_rate": 0.0001, "loss": 0.9413, "step": 1201 }, { "epoch": 1.0683257415842684, "grad_norm": 0.4002286195755005, "learning_rate": 0.0001, "loss": 1.0054, "step": 1202 }, { "epoch": 1.0692145317186978, "grad_norm": 0.40614718198776245, "learning_rate": 0.0001, "loss": 0.9248, "step": 1203 }, { "epoch": 1.0701033218531275, "grad_norm": 0.3200342357158661, "learning_rate": 0.0001, "loss": 0.9438, "step": 1204 }, { "epoch": 1.070992111987557, "grad_norm": 0.4085826575756073, "learning_rate": 0.0001, "loss": 0.9417, "step": 1205 }, { "epoch": 1.0718809021219864, "grad_norm": 0.3780025541782379, "learning_rate": 0.0001, "loss": 0.9076, "step": 1206 }, { "epoch": 1.072769692256416, "grad_norm": 0.42114147543907166, "learning_rate": 0.0001, "loss": 0.9071, "step": 1207 }, { "epoch": 1.0736584823908455, "grad_norm": 0.41555988788604736, "learning_rate": 0.0001, "loss": 0.9986, "step": 1208 }, { "epoch": 1.074547272525275, "grad_norm": 0.4004509747028351, "learning_rate": 0.0001, "loss": 0.9048, "step": 1209 }, { "epoch": 1.0754360626597044, "grad_norm": 1.0510995388031006, "learning_rate": 0.0001, "loss": 0.9838, "step": 1210 }, { "epoch": 1.076324852794134, "grad_norm": 0.3881635069847107, "learning_rate": 0.0001, "loss": 0.978, "step": 1211 }, { "epoch": 1.0772136429285635, "grad_norm": 0.4103749096393585, "learning_rate": 0.0001, "loss": 0.9817, "step": 1212 }, { "epoch": 1.078102433062993, "grad_norm": 0.4124666750431061, "learning_rate": 0.0001, "loss": 0.9354, "step": 1213 }, { "epoch": 1.0789912231974226, "grad_norm": 0.433636337518692, "learning_rate": 0.0001, "loss": 0.9371, "step": 1214 }, { "epoch": 1.079880013331852, "grad_norm": 0.41402560472488403, "learning_rate": 0.0001, "loss": 0.9656, "step": 1215 }, { "epoch": 1.0807688034662815, "grad_norm": 0.36485034227371216, "learning_rate": 0.0001, "loss": 0.9697, "step": 1216 }, { "epoch": 1.0816575936007111, "grad_norm": 0.41599103808403015, "learning_rate": 0.0001, "loss": 0.8941, "step": 1217 }, { "epoch": 1.0825463837351406, "grad_norm": 0.38060900568962097, "learning_rate": 0.0001, "loss": 0.8354, "step": 1218 }, { "epoch": 1.08343517386957, "grad_norm": 0.40148356556892395, "learning_rate": 0.0001, "loss": 0.9132, "step": 1219 }, { "epoch": 1.0843239640039997, "grad_norm": 0.3883506655693054, "learning_rate": 0.0001, "loss": 0.9233, "step": 1220 }, { "epoch": 1.085212754138429, "grad_norm": 0.34674960374832153, "learning_rate": 0.0001, "loss": 0.8928, "step": 1221 }, { "epoch": 1.0861015442728585, "grad_norm": 0.35441455245018005, "learning_rate": 0.0001, "loss": 0.8653, "step": 1222 }, { "epoch": 1.086990334407288, "grad_norm": 0.37511923909187317, "learning_rate": 0.0001, "loss": 0.9179, "step": 1223 }, { "epoch": 1.0878791245417176, "grad_norm": 0.35857459902763367, "learning_rate": 0.0001, "loss": 0.8866, "step": 1224 }, { "epoch": 1.088767914676147, "grad_norm": 0.3715518116950989, "learning_rate": 0.0001, "loss": 0.9263, "step": 1225 }, { "epoch": 1.0896567048105765, "grad_norm": 0.3738372325897217, "learning_rate": 0.0001, "loss": 0.9759, "step": 1226 }, { "epoch": 1.0905454949450062, "grad_norm": 0.41106364130973816, "learning_rate": 0.0001, "loss": 0.9539, "step": 1227 }, { "epoch": 1.0914342850794356, "grad_norm": 0.36827585101127625, "learning_rate": 0.0001, "loss": 0.949, "step": 1228 }, { "epoch": 1.092323075213865, "grad_norm": 0.39710530638694763, "learning_rate": 0.0001, "loss": 0.9351, "step": 1229 }, { "epoch": 1.0932118653482947, "grad_norm": 0.3501657247543335, "learning_rate": 0.0001, "loss": 0.9026, "step": 1230 }, { "epoch": 1.0941006554827242, "grad_norm": 0.4335416853427887, "learning_rate": 0.0001, "loss": 0.9438, "step": 1231 }, { "epoch": 1.0949894456171536, "grad_norm": 0.3835698962211609, "learning_rate": 0.0001, "loss": 0.8951, "step": 1232 }, { "epoch": 1.095878235751583, "grad_norm": 0.3333055078983307, "learning_rate": 0.0001, "loss": 0.8505, "step": 1233 }, { "epoch": 1.0967670258860127, "grad_norm": 0.4030056595802307, "learning_rate": 0.0001, "loss": 0.9226, "step": 1234 }, { "epoch": 1.0976558160204422, "grad_norm": 0.4055632948875427, "learning_rate": 0.0001, "loss": 0.9892, "step": 1235 }, { "epoch": 1.0985446061548716, "grad_norm": 0.3327998220920563, "learning_rate": 0.0001, "loss": 0.8774, "step": 1236 }, { "epoch": 1.0994333962893013, "grad_norm": 0.4032350182533264, "learning_rate": 0.0001, "loss": 0.8252, "step": 1237 }, { "epoch": 1.1003221864237307, "grad_norm": 0.40369337797164917, "learning_rate": 0.0001, "loss": 0.9785, "step": 1238 }, { "epoch": 1.1012109765581601, "grad_norm": 0.339615136384964, "learning_rate": 0.0001, "loss": 0.9099, "step": 1239 }, { "epoch": 1.1020997666925898, "grad_norm": 0.4322150945663452, "learning_rate": 0.0001, "loss": 0.9437, "step": 1240 }, { "epoch": 1.1029885568270192, "grad_norm": 0.4031563103199005, "learning_rate": 0.0001, "loss": 0.9856, "step": 1241 }, { "epoch": 1.1038773469614487, "grad_norm": 0.36764100193977356, "learning_rate": 0.0001, "loss": 0.9202, "step": 1242 }, { "epoch": 1.1047661370958783, "grad_norm": 0.35683104395866394, "learning_rate": 0.0001, "loss": 0.8976, "step": 1243 }, { "epoch": 1.1056549272303078, "grad_norm": 0.3336647152900696, "learning_rate": 0.0001, "loss": 0.875, "step": 1244 }, { "epoch": 1.1065437173647372, "grad_norm": 0.36215344071388245, "learning_rate": 0.0001, "loss": 0.8985, "step": 1245 }, { "epoch": 1.1074325074991667, "grad_norm": 0.3436198830604553, "learning_rate": 0.0001, "loss": 0.8672, "step": 1246 }, { "epoch": 1.1083212976335963, "grad_norm": 0.3478955328464508, "learning_rate": 0.0001, "loss": 0.833, "step": 1247 }, { "epoch": 1.1092100877680258, "grad_norm": 0.3481768071651459, "learning_rate": 0.0001, "loss": 0.891, "step": 1248 }, { "epoch": 1.1100988779024552, "grad_norm": 0.37578824162483215, "learning_rate": 0.0001, "loss": 0.88, "step": 1249 }, { "epoch": 1.1109876680368849, "grad_norm": 0.3523572087287903, "learning_rate": 0.0001, "loss": 0.8152, "step": 1250 }, { "epoch": 1.1118764581713143, "grad_norm": 0.37987327575683594, "learning_rate": 0.0001, "loss": 0.9859, "step": 1251 }, { "epoch": 1.1127652483057437, "grad_norm": 0.34919604659080505, "learning_rate": 0.0001, "loss": 0.948, "step": 1252 }, { "epoch": 1.1136540384401734, "grad_norm": 0.401996910572052, "learning_rate": 0.0001, "loss": 0.9123, "step": 1253 }, { "epoch": 1.1145428285746029, "grad_norm": 0.3942583501338959, "learning_rate": 0.0001, "loss": 0.8524, "step": 1254 }, { "epoch": 1.1154316187090323, "grad_norm": 0.3758942484855652, "learning_rate": 0.0001, "loss": 0.8813, "step": 1255 }, { "epoch": 1.1163204088434617, "grad_norm": 0.3327169716358185, "learning_rate": 0.0001, "loss": 0.8493, "step": 1256 }, { "epoch": 1.1172091989778914, "grad_norm": 0.34999555349349976, "learning_rate": 0.0001, "loss": 0.8986, "step": 1257 }, { "epoch": 1.1180979891123208, "grad_norm": 0.3059004843235016, "learning_rate": 0.0001, "loss": 0.8299, "step": 1258 }, { "epoch": 1.1189867792467503, "grad_norm": 0.40918049216270447, "learning_rate": 0.0001, "loss": 1.0235, "step": 1259 }, { "epoch": 1.11987556938118, "grad_norm": 0.45679351687431335, "learning_rate": 0.0001, "loss": 0.9245, "step": 1260 }, { "epoch": 1.1207643595156094, "grad_norm": 0.36050447821617126, "learning_rate": 0.0001, "loss": 0.7914, "step": 1261 }, { "epoch": 1.1216531496500388, "grad_norm": 0.3530547320842743, "learning_rate": 0.0001, "loss": 0.8892, "step": 1262 }, { "epoch": 1.1225419397844685, "grad_norm": 0.39871805906295776, "learning_rate": 0.0001, "loss": 0.9088, "step": 1263 }, { "epoch": 1.123430729918898, "grad_norm": 0.37267911434173584, "learning_rate": 0.0001, "loss": 0.9331, "step": 1264 }, { "epoch": 1.1243195200533274, "grad_norm": 0.3619190752506256, "learning_rate": 0.0001, "loss": 0.8432, "step": 1265 }, { "epoch": 1.125208310187757, "grad_norm": 0.3668549656867981, "learning_rate": 0.0001, "loss": 0.9503, "step": 1266 }, { "epoch": 1.1260971003221865, "grad_norm": 0.34030482172966003, "learning_rate": 0.0001, "loss": 0.8852, "step": 1267 }, { "epoch": 1.126985890456616, "grad_norm": 0.4020557403564453, "learning_rate": 0.0001, "loss": 0.9593, "step": 1268 }, { "epoch": 1.1278746805910456, "grad_norm": 0.34024256467819214, "learning_rate": 0.0001, "loss": 0.8787, "step": 1269 }, { "epoch": 1.128763470725475, "grad_norm": 0.4241231083869934, "learning_rate": 0.0001, "loss": 0.9793, "step": 1270 }, { "epoch": 1.1296522608599044, "grad_norm": 0.4005233645439148, "learning_rate": 0.0001, "loss": 0.9619, "step": 1271 }, { "epoch": 1.1305410509943339, "grad_norm": 0.38167819380760193, "learning_rate": 0.0001, "loss": 0.9449, "step": 1272 }, { "epoch": 1.1314298411287635, "grad_norm": 0.34920740127563477, "learning_rate": 0.0001, "loss": 0.9811, "step": 1273 }, { "epoch": 1.132318631263193, "grad_norm": 0.3310723900794983, "learning_rate": 0.0001, "loss": 0.8865, "step": 1274 }, { "epoch": 1.1332074213976224, "grad_norm": 0.3370254933834076, "learning_rate": 0.0001, "loss": 0.8304, "step": 1275 }, { "epoch": 1.134096211532052, "grad_norm": 0.35583510994911194, "learning_rate": 0.0001, "loss": 0.9203, "step": 1276 }, { "epoch": 1.1349850016664815, "grad_norm": 0.34542956948280334, "learning_rate": 0.0001, "loss": 0.9403, "step": 1277 }, { "epoch": 1.135873791800911, "grad_norm": 0.3754197657108307, "learning_rate": 0.0001, "loss": 0.9866, "step": 1278 }, { "epoch": 1.1367625819353404, "grad_norm": 0.3555785119533539, "learning_rate": 0.0001, "loss": 0.8689, "step": 1279 }, { "epoch": 1.13765137206977, "grad_norm": 0.33429041504859924, "learning_rate": 0.0001, "loss": 0.833, "step": 1280 }, { "epoch": 1.1385401622041995, "grad_norm": 0.3541424572467804, "learning_rate": 0.0001, "loss": 0.8759, "step": 1281 }, { "epoch": 1.139428952338629, "grad_norm": 0.36240896582603455, "learning_rate": 0.0001, "loss": 0.9841, "step": 1282 }, { "epoch": 1.1403177424730586, "grad_norm": 0.3360092043876648, "learning_rate": 0.0001, "loss": 0.9414, "step": 1283 }, { "epoch": 1.141206532607488, "grad_norm": 0.3737250864505768, "learning_rate": 0.0001, "loss": 0.9729, "step": 1284 }, { "epoch": 1.1420953227419175, "grad_norm": 0.37672123312950134, "learning_rate": 0.0001, "loss": 0.8692, "step": 1285 }, { "epoch": 1.1429841128763472, "grad_norm": 0.3979622721672058, "learning_rate": 0.0001, "loss": 0.9644, "step": 1286 }, { "epoch": 1.1438729030107766, "grad_norm": 0.4209362268447876, "learning_rate": 0.0001, "loss": 0.9097, "step": 1287 }, { "epoch": 1.144761693145206, "grad_norm": 0.3987758159637451, "learning_rate": 0.0001, "loss": 0.8919, "step": 1288 }, { "epoch": 1.1456504832796357, "grad_norm": 0.3739655613899231, "learning_rate": 0.0001, "loss": 0.9623, "step": 1289 }, { "epoch": 1.1465392734140651, "grad_norm": 0.3652169406414032, "learning_rate": 0.0001, "loss": 0.9033, "step": 1290 }, { "epoch": 1.1474280635484946, "grad_norm": 0.3900451958179474, "learning_rate": 0.0001, "loss": 0.9795, "step": 1291 }, { "epoch": 1.1483168536829242, "grad_norm": 0.36547282338142395, "learning_rate": 0.0001, "loss": 0.9595, "step": 1292 }, { "epoch": 1.1492056438173537, "grad_norm": 0.3713114559650421, "learning_rate": 0.0001, "loss": 0.8854, "step": 1293 }, { "epoch": 1.1500944339517831, "grad_norm": 0.3759624660015106, "learning_rate": 0.0001, "loss": 0.9053, "step": 1294 }, { "epoch": 1.1509832240862126, "grad_norm": 0.3536144196987152, "learning_rate": 0.0001, "loss": 1.0054, "step": 1295 }, { "epoch": 1.1518720142206422, "grad_norm": 0.36850425601005554, "learning_rate": 0.0001, "loss": 0.9134, "step": 1296 }, { "epoch": 1.1527608043550717, "grad_norm": 0.3645191192626953, "learning_rate": 0.0001, "loss": 1.0067, "step": 1297 }, { "epoch": 1.153649594489501, "grad_norm": 0.3773975670337677, "learning_rate": 0.0001, "loss": 0.9362, "step": 1298 }, { "epoch": 1.1545383846239308, "grad_norm": 0.35518959164619446, "learning_rate": 0.0001, "loss": 0.9289, "step": 1299 }, { "epoch": 1.1554271747583602, "grad_norm": 0.4167252779006958, "learning_rate": 0.0001, "loss": 0.8655, "step": 1300 }, { "epoch": 1.1563159648927896, "grad_norm": 0.35073405504226685, "learning_rate": 0.0001, "loss": 0.8565, "step": 1301 }, { "epoch": 1.157204755027219, "grad_norm": 0.3294246196746826, "learning_rate": 0.0001, "loss": 0.8501, "step": 1302 }, { "epoch": 1.1580935451616488, "grad_norm": 0.3594052493572235, "learning_rate": 0.0001, "loss": 0.96, "step": 1303 }, { "epoch": 1.1589823352960782, "grad_norm": 0.40062353014945984, "learning_rate": 0.0001, "loss": 0.9908, "step": 1304 }, { "epoch": 1.1598711254305076, "grad_norm": 0.38618043065071106, "learning_rate": 0.0001, "loss": 0.9185, "step": 1305 }, { "epoch": 1.1607599155649373, "grad_norm": 0.36038386821746826, "learning_rate": 0.0001, "loss": 0.9733, "step": 1306 }, { "epoch": 1.1616487056993667, "grad_norm": 0.3996288776397705, "learning_rate": 0.0001, "loss": 0.9534, "step": 1307 }, { "epoch": 1.1625374958337962, "grad_norm": 0.3757215440273285, "learning_rate": 0.0001, "loss": 0.9219, "step": 1308 }, { "epoch": 1.1634262859682258, "grad_norm": 0.3682938814163208, "learning_rate": 0.0001, "loss": 0.9141, "step": 1309 }, { "epoch": 1.1643150761026553, "grad_norm": 0.36964985728263855, "learning_rate": 0.0001, "loss": 0.9956, "step": 1310 }, { "epoch": 1.1652038662370847, "grad_norm": 0.39142096042633057, "learning_rate": 0.0001, "loss": 0.8502, "step": 1311 }, { "epoch": 1.1660926563715144, "grad_norm": 0.31965819001197815, "learning_rate": 0.0001, "loss": 0.8842, "step": 1312 }, { "epoch": 1.1669814465059438, "grad_norm": 0.40179508924484253, "learning_rate": 0.0001, "loss": 0.9441, "step": 1313 }, { "epoch": 1.1678702366403733, "grad_norm": 0.3406355679035187, "learning_rate": 0.0001, "loss": 0.9468, "step": 1314 }, { "epoch": 1.168759026774803, "grad_norm": 0.3638407289981842, "learning_rate": 0.0001, "loss": 0.9296, "step": 1315 }, { "epoch": 1.1696478169092324, "grad_norm": 0.34185290336608887, "learning_rate": 0.0001, "loss": 0.8647, "step": 1316 }, { "epoch": 1.1705366070436618, "grad_norm": 0.3356599807739258, "learning_rate": 0.0001, "loss": 0.9484, "step": 1317 }, { "epoch": 1.1714253971780912, "grad_norm": 0.3953014314174652, "learning_rate": 0.0001, "loss": 0.947, "step": 1318 }, { "epoch": 1.172314187312521, "grad_norm": 0.4338318109512329, "learning_rate": 0.0001, "loss": 0.9389, "step": 1319 }, { "epoch": 1.1732029774469503, "grad_norm": 0.38250255584716797, "learning_rate": 0.0001, "loss": 0.9113, "step": 1320 }, { "epoch": 1.1740917675813798, "grad_norm": 1.4235469102859497, "learning_rate": 0.0001, "loss": 0.837, "step": 1321 }, { "epoch": 1.1749805577158094, "grad_norm": 0.4314570724964142, "learning_rate": 0.0001, "loss": 0.955, "step": 1322 }, { "epoch": 1.1758693478502389, "grad_norm": 0.3919623792171478, "learning_rate": 0.0001, "loss": 0.9662, "step": 1323 }, { "epoch": 1.1767581379846683, "grad_norm": 0.3859492540359497, "learning_rate": 0.0001, "loss": 0.8974, "step": 1324 }, { "epoch": 1.1776469281190978, "grad_norm": 0.39512211084365845, "learning_rate": 0.0001, "loss": 0.9592, "step": 1325 }, { "epoch": 1.1785357182535274, "grad_norm": 0.34336525201797485, "learning_rate": 0.0001, "loss": 0.8636, "step": 1326 }, { "epoch": 1.1794245083879569, "grad_norm": 0.39709898829460144, "learning_rate": 0.0001, "loss": 0.8655, "step": 1327 }, { "epoch": 1.1803132985223863, "grad_norm": 0.4077267348766327, "learning_rate": 0.0001, "loss": 0.9687, "step": 1328 }, { "epoch": 1.181202088656816, "grad_norm": 0.3338770866394043, "learning_rate": 0.0001, "loss": 0.9293, "step": 1329 }, { "epoch": 1.1820908787912454, "grad_norm": 0.3750726580619812, "learning_rate": 0.0001, "loss": 0.922, "step": 1330 }, { "epoch": 1.1829796689256749, "grad_norm": 0.35356971621513367, "learning_rate": 0.0001, "loss": 0.8574, "step": 1331 }, { "epoch": 1.1838684590601045, "grad_norm": 0.41766199469566345, "learning_rate": 0.0001, "loss": 0.9395, "step": 1332 }, { "epoch": 1.184757249194534, "grad_norm": 0.37516269087791443, "learning_rate": 0.0001, "loss": 0.9011, "step": 1333 }, { "epoch": 1.1856460393289634, "grad_norm": 0.3945169746875763, "learning_rate": 0.0001, "loss": 0.9384, "step": 1334 }, { "epoch": 1.186534829463393, "grad_norm": 0.33570146560668945, "learning_rate": 0.0001, "loss": 0.9534, "step": 1335 }, { "epoch": 1.1874236195978225, "grad_norm": 0.35922086238861084, "learning_rate": 0.0001, "loss": 0.9344, "step": 1336 }, { "epoch": 1.188312409732252, "grad_norm": 0.33384042978286743, "learning_rate": 0.0001, "loss": 0.9778, "step": 1337 }, { "epoch": 1.1892011998666816, "grad_norm": 0.3461971879005432, "learning_rate": 0.0001, "loss": 0.9317, "step": 1338 }, { "epoch": 1.190089990001111, "grad_norm": 0.366767019033432, "learning_rate": 0.0001, "loss": 0.923, "step": 1339 }, { "epoch": 1.1909787801355405, "grad_norm": 0.35670191049575806, "learning_rate": 0.0001, "loss": 0.8958, "step": 1340 }, { "epoch": 1.19186757026997, "grad_norm": 0.3564472496509552, "learning_rate": 0.0001, "loss": 0.8879, "step": 1341 }, { "epoch": 1.1927563604043996, "grad_norm": 0.34850582480430603, "learning_rate": 0.0001, "loss": 0.9472, "step": 1342 }, { "epoch": 1.193645150538829, "grad_norm": 0.3118721842765808, "learning_rate": 0.0001, "loss": 0.9182, "step": 1343 }, { "epoch": 1.1945339406732585, "grad_norm": 0.4474131464958191, "learning_rate": 0.0001, "loss": 0.9986, "step": 1344 }, { "epoch": 1.1954227308076881, "grad_norm": 0.4048672616481781, "learning_rate": 0.0001, "loss": 0.924, "step": 1345 }, { "epoch": 1.1963115209421176, "grad_norm": 0.3493656814098358, "learning_rate": 0.0001, "loss": 0.8614, "step": 1346 }, { "epoch": 1.197200311076547, "grad_norm": 0.32247281074523926, "learning_rate": 0.0001, "loss": 0.8659, "step": 1347 }, { "epoch": 1.1980891012109764, "grad_norm": 0.4119790196418762, "learning_rate": 0.0001, "loss": 0.9521, "step": 1348 }, { "epoch": 1.198977891345406, "grad_norm": 0.40390363335609436, "learning_rate": 0.0001, "loss": 0.9203, "step": 1349 }, { "epoch": 1.1998666814798356, "grad_norm": 0.40085315704345703, "learning_rate": 0.0001, "loss": 1.0135, "step": 1350 }, { "epoch": 1.200755471614265, "grad_norm": 0.3355647325515747, "learning_rate": 0.0001, "loss": 0.9248, "step": 1351 }, { "epoch": 1.2016442617486947, "grad_norm": 0.3319724500179291, "learning_rate": 0.0001, "loss": 0.8671, "step": 1352 }, { "epoch": 1.202533051883124, "grad_norm": 0.4024242162704468, "learning_rate": 0.0001, "loss": 0.9497, "step": 1353 }, { "epoch": 1.2034218420175535, "grad_norm": 0.3751072287559509, "learning_rate": 0.0001, "loss": 0.9692, "step": 1354 }, { "epoch": 1.2043106321519832, "grad_norm": 0.3578762114048004, "learning_rate": 0.0001, "loss": 0.9306, "step": 1355 }, { "epoch": 1.2051994222864126, "grad_norm": 0.41604581475257874, "learning_rate": 0.0001, "loss": 0.9746, "step": 1356 }, { "epoch": 1.206088212420842, "grad_norm": 0.38614901900291443, "learning_rate": 0.0001, "loss": 0.9475, "step": 1357 }, { "epoch": 1.2069770025552717, "grad_norm": 0.3261745274066925, "learning_rate": 0.0001, "loss": 0.9745, "step": 1358 }, { "epoch": 1.2078657926897012, "grad_norm": 0.34567147493362427, "learning_rate": 0.0001, "loss": 0.9237, "step": 1359 }, { "epoch": 1.2087545828241306, "grad_norm": 0.3168199360370636, "learning_rate": 0.0001, "loss": 0.9789, "step": 1360 }, { "epoch": 1.2096433729585603, "grad_norm": 0.32394152879714966, "learning_rate": 0.0001, "loss": 0.9087, "step": 1361 }, { "epoch": 1.2105321630929897, "grad_norm": 0.41741707921028137, "learning_rate": 0.0001, "loss": 0.9361, "step": 1362 }, { "epoch": 1.2114209532274192, "grad_norm": 0.37223905324935913, "learning_rate": 0.0001, "loss": 0.9193, "step": 1363 }, { "epoch": 1.2123097433618486, "grad_norm": 0.3394053876399994, "learning_rate": 0.0001, "loss": 0.9014, "step": 1364 }, { "epoch": 1.2131985334962783, "grad_norm": 0.43821749091148376, "learning_rate": 0.0001, "loss": 0.9964, "step": 1365 }, { "epoch": 1.2140873236307077, "grad_norm": 0.3783372938632965, "learning_rate": 0.0001, "loss": 0.9388, "step": 1366 }, { "epoch": 1.2149761137651371, "grad_norm": 0.3495936691761017, "learning_rate": 0.0001, "loss": 0.8109, "step": 1367 }, { "epoch": 1.2158649038995668, "grad_norm": 0.3929634690284729, "learning_rate": 0.0001, "loss": 0.9825, "step": 1368 }, { "epoch": 1.2167536940339962, "grad_norm": 0.3742099702358246, "learning_rate": 0.0001, "loss": 0.9098, "step": 1369 }, { "epoch": 1.2176424841684257, "grad_norm": 0.36973753571510315, "learning_rate": 0.0001, "loss": 0.9053, "step": 1370 }, { "epoch": 1.2185312743028551, "grad_norm": 0.362594872713089, "learning_rate": 0.0001, "loss": 0.9906, "step": 1371 }, { "epoch": 1.2194200644372848, "grad_norm": 0.442941278219223, "learning_rate": 0.0001, "loss": 1.0028, "step": 1372 }, { "epoch": 1.2203088545717142, "grad_norm": 0.41126692295074463, "learning_rate": 0.0001, "loss": 0.9192, "step": 1373 }, { "epoch": 1.2211976447061437, "grad_norm": 0.3734437823295593, "learning_rate": 0.0001, "loss": 0.919, "step": 1374 }, { "epoch": 1.2220864348405733, "grad_norm": 0.37153056263923645, "learning_rate": 0.0001, "loss": 0.9253, "step": 1375 }, { "epoch": 1.2229752249750028, "grad_norm": 0.4021115303039551, "learning_rate": 0.0001, "loss": 0.9013, "step": 1376 }, { "epoch": 1.2238640151094322, "grad_norm": 0.3954538404941559, "learning_rate": 0.0001, "loss": 0.9907, "step": 1377 }, { "epoch": 1.2247528052438619, "grad_norm": 0.707315981388092, "learning_rate": 0.0001, "loss": 0.9124, "step": 1378 }, { "epoch": 1.2256415953782913, "grad_norm": 0.2907516658306122, "learning_rate": 0.0001, "loss": 0.8792, "step": 1379 }, { "epoch": 1.2265303855127208, "grad_norm": 0.32730191946029663, "learning_rate": 0.0001, "loss": 0.8724, "step": 1380 }, { "epoch": 1.2274191756471504, "grad_norm": 0.3393136262893677, "learning_rate": 0.0001, "loss": 0.9822, "step": 1381 }, { "epoch": 1.2283079657815799, "grad_norm": 0.3186226189136505, "learning_rate": 0.0001, "loss": 0.9026, "step": 1382 }, { "epoch": 1.2291967559160093, "grad_norm": 0.3546365797519684, "learning_rate": 0.0001, "loss": 0.9419, "step": 1383 }, { "epoch": 1.230085546050439, "grad_norm": 0.33756113052368164, "learning_rate": 0.0001, "loss": 0.9292, "step": 1384 }, { "epoch": 1.2309743361848684, "grad_norm": 0.357030987739563, "learning_rate": 0.0001, "loss": 0.8748, "step": 1385 }, { "epoch": 1.2318631263192978, "grad_norm": 0.3271646797657013, "learning_rate": 0.0001, "loss": 0.9411, "step": 1386 }, { "epoch": 1.2327519164537273, "grad_norm": 0.3888196647167206, "learning_rate": 0.0001, "loss": 0.9634, "step": 1387 }, { "epoch": 1.233640706588157, "grad_norm": 0.3897395730018616, "learning_rate": 0.0001, "loss": 0.9404, "step": 1388 }, { "epoch": 1.2345294967225864, "grad_norm": 0.36448606848716736, "learning_rate": 0.0001, "loss": 0.9543, "step": 1389 }, { "epoch": 1.2354182868570158, "grad_norm": 0.35686194896698, "learning_rate": 0.0001, "loss": 0.8772, "step": 1390 }, { "epoch": 1.2363070769914455, "grad_norm": 0.3667612373828888, "learning_rate": 0.0001, "loss": 0.8449, "step": 1391 }, { "epoch": 1.237195867125875, "grad_norm": 0.35356754064559937, "learning_rate": 0.0001, "loss": 0.9099, "step": 1392 }, { "epoch": 1.2380846572603044, "grad_norm": 0.3918624818325043, "learning_rate": 0.0001, "loss": 0.9164, "step": 1393 }, { "epoch": 1.2389734473947338, "grad_norm": 0.347768634557724, "learning_rate": 0.0001, "loss": 0.9423, "step": 1394 }, { "epoch": 1.2398622375291635, "grad_norm": 0.3811168372631073, "learning_rate": 0.0001, "loss": 1.0301, "step": 1395 }, { "epoch": 1.240751027663593, "grad_norm": 0.3333447575569153, "learning_rate": 0.0001, "loss": 0.9424, "step": 1396 }, { "epoch": 1.2416398177980223, "grad_norm": 0.3668173551559448, "learning_rate": 0.0001, "loss": 0.9513, "step": 1397 }, { "epoch": 1.242528607932452, "grad_norm": 0.4245815575122833, "learning_rate": 0.0001, "loss": 0.8381, "step": 1398 }, { "epoch": 1.2434173980668815, "grad_norm": 0.38784492015838623, "learning_rate": 0.0001, "loss": 0.9348, "step": 1399 }, { "epoch": 1.244306188201311, "grad_norm": 0.3626404404640198, "learning_rate": 0.0001, "loss": 0.891, "step": 1400 }, { "epoch": 1.2451949783357406, "grad_norm": 0.3952024579048157, "learning_rate": 0.0001, "loss": 0.8988, "step": 1401 }, { "epoch": 1.24608376847017, "grad_norm": 0.3714921474456787, "learning_rate": 0.0001, "loss": 0.9597, "step": 1402 }, { "epoch": 1.2469725586045994, "grad_norm": 0.3674130141735077, "learning_rate": 0.0001, "loss": 0.8814, "step": 1403 }, { "epoch": 1.247861348739029, "grad_norm": 0.332264244556427, "learning_rate": 0.0001, "loss": 0.8723, "step": 1404 }, { "epoch": 1.2487501388734585, "grad_norm": 0.44990214705467224, "learning_rate": 0.0001, "loss": 0.8562, "step": 1405 }, { "epoch": 1.249638929007888, "grad_norm": 0.3691662847995758, "learning_rate": 0.0001, "loss": 0.9808, "step": 1406 }, { "epoch": 1.2505277191423176, "grad_norm": 0.4463985562324524, "learning_rate": 0.0001, "loss": 1.0113, "step": 1407 }, { "epoch": 1.251416509276747, "grad_norm": 0.40863966941833496, "learning_rate": 0.0001, "loss": 0.8999, "step": 1408 }, { "epoch": 1.2523052994111765, "grad_norm": 0.39324069023132324, "learning_rate": 0.0001, "loss": 0.9607, "step": 1409 }, { "epoch": 1.2531940895456062, "grad_norm": 0.4144541919231415, "learning_rate": 0.0001, "loss": 0.9257, "step": 1410 }, { "epoch": 1.2540828796800356, "grad_norm": 0.3502121567726135, "learning_rate": 0.0001, "loss": 0.9035, "step": 1411 }, { "epoch": 1.254971669814465, "grad_norm": 0.3595639765262604, "learning_rate": 0.0001, "loss": 0.958, "step": 1412 }, { "epoch": 1.2558604599488945, "grad_norm": 0.31337282061576843, "learning_rate": 0.0001, "loss": 0.8338, "step": 1413 }, { "epoch": 1.256749250083324, "grad_norm": 0.38238781690597534, "learning_rate": 0.0001, "loss": 0.9705, "step": 1414 }, { "epoch": 1.2576380402177536, "grad_norm": 0.34189799427986145, "learning_rate": 0.0001, "loss": 0.9685, "step": 1415 }, { "epoch": 1.258526830352183, "grad_norm": 0.3783426582813263, "learning_rate": 0.0001, "loss": 0.8579, "step": 1416 }, { "epoch": 1.2594156204866125, "grad_norm": 0.35170289874076843, "learning_rate": 0.0001, "loss": 1.0055, "step": 1417 }, { "epoch": 1.2603044106210421, "grad_norm": 0.3678469657897949, "learning_rate": 0.0001, "loss": 0.9932, "step": 1418 }, { "epoch": 1.2611932007554716, "grad_norm": 0.35460302233695984, "learning_rate": 0.0001, "loss": 0.8314, "step": 1419 }, { "epoch": 1.262081990889901, "grad_norm": 0.3786843419075012, "learning_rate": 0.0001, "loss": 0.9391, "step": 1420 }, { "epoch": 1.2629707810243307, "grad_norm": 0.37704455852508545, "learning_rate": 0.0001, "loss": 0.9314, "step": 1421 }, { "epoch": 1.2638595711587601, "grad_norm": 0.3811683654785156, "learning_rate": 0.0001, "loss": 0.9184, "step": 1422 }, { "epoch": 1.2647483612931896, "grad_norm": 0.3323057293891907, "learning_rate": 0.0001, "loss": 0.8478, "step": 1423 }, { "epoch": 1.2656371514276192, "grad_norm": 0.33943992853164673, "learning_rate": 0.0001, "loss": 0.7872, "step": 1424 }, { "epoch": 1.2665259415620487, "grad_norm": 0.370331346988678, "learning_rate": 0.0001, "loss": 1.0004, "step": 1425 }, { "epoch": 1.2674147316964781, "grad_norm": 0.3509312868118286, "learning_rate": 0.0001, "loss": 0.8777, "step": 1426 }, { "epoch": 1.2683035218309078, "grad_norm": 0.35262539982795715, "learning_rate": 0.0001, "loss": 0.8772, "step": 1427 }, { "epoch": 1.2691923119653372, "grad_norm": 0.3514516055583954, "learning_rate": 0.0001, "loss": 0.9092, "step": 1428 }, { "epoch": 1.2700811020997667, "grad_norm": 0.33917155861854553, "learning_rate": 0.0001, "loss": 0.9088, "step": 1429 }, { "epoch": 1.2709698922341963, "grad_norm": 0.37379592657089233, "learning_rate": 0.0001, "loss": 0.8811, "step": 1430 }, { "epoch": 1.2718586823686258, "grad_norm": 0.37309855222702026, "learning_rate": 0.0001, "loss": 0.9243, "step": 1431 }, { "epoch": 1.2727474725030552, "grad_norm": 0.3677520751953125, "learning_rate": 0.0001, "loss": 0.9191, "step": 1432 }, { "epoch": 1.2736362626374849, "grad_norm": 0.3768025040626526, "learning_rate": 0.0001, "loss": 0.9713, "step": 1433 }, { "epoch": 1.2745250527719143, "grad_norm": 0.3794255256652832, "learning_rate": 0.0001, "loss": 0.8944, "step": 1434 }, { "epoch": 1.2754138429063437, "grad_norm": 0.3847828805446625, "learning_rate": 0.0001, "loss": 0.9895, "step": 1435 }, { "epoch": 1.2763026330407732, "grad_norm": 0.3901936411857605, "learning_rate": 0.0001, "loss": 0.9604, "step": 1436 }, { "epoch": 1.2771914231752026, "grad_norm": 0.36127346754074097, "learning_rate": 0.0001, "loss": 0.9633, "step": 1437 }, { "epoch": 1.2780802133096323, "grad_norm": 0.3554041385650635, "learning_rate": 0.0001, "loss": 0.9282, "step": 1438 }, { "epoch": 1.2789690034440617, "grad_norm": 0.3366556465625763, "learning_rate": 0.0001, "loss": 0.9398, "step": 1439 }, { "epoch": 1.2798577935784912, "grad_norm": 0.38084515929222107, "learning_rate": 0.0001, "loss": 1.0101, "step": 1440 }, { "epoch": 1.2807465837129208, "grad_norm": 0.35212430357933044, "learning_rate": 0.0001, "loss": 0.921, "step": 1441 }, { "epoch": 1.2816353738473503, "grad_norm": 0.37851765751838684, "learning_rate": 0.0001, "loss": 0.8989, "step": 1442 }, { "epoch": 1.2825241639817797, "grad_norm": 0.31411126255989075, "learning_rate": 0.0001, "loss": 0.8476, "step": 1443 }, { "epoch": 1.2834129541162094, "grad_norm": 0.35545769333839417, "learning_rate": 0.0001, "loss": 0.9857, "step": 1444 }, { "epoch": 1.2843017442506388, "grad_norm": 0.36752983927726746, "learning_rate": 0.0001, "loss": 0.8876, "step": 1445 }, { "epoch": 1.2851905343850683, "grad_norm": 0.3653068542480469, "learning_rate": 0.0001, "loss": 0.8651, "step": 1446 }, { "epoch": 1.286079324519498, "grad_norm": 0.3388923704624176, "learning_rate": 0.0001, "loss": 0.8922, "step": 1447 }, { "epoch": 1.2869681146539274, "grad_norm": 0.3574695587158203, "learning_rate": 0.0001, "loss": 0.903, "step": 1448 }, { "epoch": 1.2878569047883568, "grad_norm": 0.36422044038772583, "learning_rate": 0.0001, "loss": 0.9122, "step": 1449 }, { "epoch": 1.2887456949227865, "grad_norm": 0.3757425844669342, "learning_rate": 0.0001, "loss": 0.9014, "step": 1450 }, { "epoch": 1.289634485057216, "grad_norm": 0.3608701229095459, "learning_rate": 0.0001, "loss": 0.9089, "step": 1451 }, { "epoch": 1.2905232751916453, "grad_norm": 0.3886134922504425, "learning_rate": 0.0001, "loss": 0.817, "step": 1452 }, { "epoch": 1.291412065326075, "grad_norm": 0.33738628029823303, "learning_rate": 0.0001, "loss": 0.8872, "step": 1453 }, { "epoch": 1.2923008554605044, "grad_norm": 0.3938889801502228, "learning_rate": 0.0001, "loss": 0.8523, "step": 1454 }, { "epoch": 1.2931896455949339, "grad_norm": 0.37151628732681274, "learning_rate": 0.0001, "loss": 0.9448, "step": 1455 }, { "epoch": 1.2940784357293635, "grad_norm": 0.36321815848350525, "learning_rate": 0.0001, "loss": 0.9511, "step": 1456 }, { "epoch": 1.294967225863793, "grad_norm": 0.35245969891548157, "learning_rate": 0.0001, "loss": 1.0141, "step": 1457 }, { "epoch": 1.2958560159982224, "grad_norm": 0.3494570851325989, "learning_rate": 0.0001, "loss": 0.9411, "step": 1458 }, { "epoch": 1.2967448061326519, "grad_norm": 0.3875609338283539, "learning_rate": 0.0001, "loss": 0.9318, "step": 1459 }, { "epoch": 1.2976335962670813, "grad_norm": 0.3761698305606842, "learning_rate": 0.0001, "loss": 0.8563, "step": 1460 }, { "epoch": 1.298522386401511, "grad_norm": 0.3185148239135742, "learning_rate": 0.0001, "loss": 0.9486, "step": 1461 }, { "epoch": 1.2994111765359404, "grad_norm": 0.3480708599090576, "learning_rate": 0.0001, "loss": 0.8733, "step": 1462 }, { "epoch": 1.3002999666703698, "grad_norm": 0.3743402659893036, "learning_rate": 0.0001, "loss": 0.9778, "step": 1463 }, { "epoch": 1.3011887568047995, "grad_norm": 0.4024488031864166, "learning_rate": 0.0001, "loss": 0.8907, "step": 1464 }, { "epoch": 1.302077546939229, "grad_norm": 0.366472452878952, "learning_rate": 0.0001, "loss": 0.9898, "step": 1465 }, { "epoch": 1.3029663370736584, "grad_norm": 0.36721619963645935, "learning_rate": 0.0001, "loss": 0.8577, "step": 1466 }, { "epoch": 1.303855127208088, "grad_norm": 0.36933907866477966, "learning_rate": 0.0001, "loss": 0.9365, "step": 1467 }, { "epoch": 1.3047439173425175, "grad_norm": 0.36210325360298157, "learning_rate": 0.0001, "loss": 0.8787, "step": 1468 }, { "epoch": 1.305632707476947, "grad_norm": 0.3773205876350403, "learning_rate": 0.0001, "loss": 0.9396, "step": 1469 }, { "epoch": 1.3065214976113766, "grad_norm": 0.3904916048049927, "learning_rate": 0.0001, "loss": 0.9872, "step": 1470 }, { "epoch": 1.307410287745806, "grad_norm": 0.38779687881469727, "learning_rate": 0.0001, "loss": 0.9682, "step": 1471 }, { "epoch": 1.3082990778802355, "grad_norm": 0.3612334728240967, "learning_rate": 0.0001, "loss": 0.8867, "step": 1472 }, { "epoch": 1.3091878680146651, "grad_norm": 0.4181813597679138, "learning_rate": 0.0001, "loss": 0.9666, "step": 1473 }, { "epoch": 1.3100766581490946, "grad_norm": 0.4051264524459839, "learning_rate": 0.0001, "loss": 0.9088, "step": 1474 }, { "epoch": 1.310965448283524, "grad_norm": 0.3271094262599945, "learning_rate": 0.0001, "loss": 0.9018, "step": 1475 }, { "epoch": 1.3118542384179537, "grad_norm": 0.35540738701820374, "learning_rate": 0.0001, "loss": 1.0157, "step": 1476 }, { "epoch": 1.3127430285523831, "grad_norm": 0.36050257086753845, "learning_rate": 0.0001, "loss": 0.9347, "step": 1477 }, { "epoch": 1.3136318186868126, "grad_norm": 0.39505940675735474, "learning_rate": 0.0001, "loss": 0.9321, "step": 1478 }, { "epoch": 1.3145206088212422, "grad_norm": 0.3984764516353607, "learning_rate": 0.0001, "loss": 0.9472, "step": 1479 }, { "epoch": 1.3154093989556717, "grad_norm": 0.3784257769584656, "learning_rate": 0.0001, "loss": 0.9075, "step": 1480 }, { "epoch": 1.316298189090101, "grad_norm": 0.35195082426071167, "learning_rate": 0.0001, "loss": 0.9514, "step": 1481 }, { "epoch": 1.3171869792245305, "grad_norm": 0.33862170577049255, "learning_rate": 0.0001, "loss": 0.9411, "step": 1482 }, { "epoch": 1.31807576935896, "grad_norm": 0.3316287696361542, "learning_rate": 0.0001, "loss": 0.9368, "step": 1483 }, { "epoch": 1.3189645594933896, "grad_norm": 0.3842187225818634, "learning_rate": 0.0001, "loss": 0.9396, "step": 1484 }, { "epoch": 1.319853349627819, "grad_norm": 0.39101001620292664, "learning_rate": 0.0001, "loss": 0.9329, "step": 1485 }, { "epoch": 1.3207421397622485, "grad_norm": 0.3348684012889862, "learning_rate": 0.0001, "loss": 0.8891, "step": 1486 }, { "epoch": 1.3216309298966782, "grad_norm": 0.35855552554130554, "learning_rate": 0.0001, "loss": 0.9148, "step": 1487 }, { "epoch": 1.3225197200311076, "grad_norm": 0.39019593596458435, "learning_rate": 0.0001, "loss": 0.9043, "step": 1488 }, { "epoch": 1.323408510165537, "grad_norm": 0.3681492805480957, "learning_rate": 0.0001, "loss": 0.9222, "step": 1489 }, { "epoch": 1.3242973002999667, "grad_norm": 0.3557593524456024, "learning_rate": 0.0001, "loss": 0.8972, "step": 1490 }, { "epoch": 1.3251860904343962, "grad_norm": 0.3503437638282776, "learning_rate": 0.0001, "loss": 0.9307, "step": 1491 }, { "epoch": 1.3260748805688256, "grad_norm": 0.31487593054771423, "learning_rate": 0.0001, "loss": 0.899, "step": 1492 }, { "epoch": 1.3269636707032553, "grad_norm": 0.3774222433567047, "learning_rate": 0.0001, "loss": 0.8246, "step": 1493 }, { "epoch": 1.3278524608376847, "grad_norm": 0.3760510981082916, "learning_rate": 0.0001, "loss": 1.0001, "step": 1494 }, { "epoch": 1.3287412509721142, "grad_norm": 0.3963356912136078, "learning_rate": 0.0001, "loss": 0.9093, "step": 1495 }, { "epoch": 1.3296300411065438, "grad_norm": 0.3808361887931824, "learning_rate": 0.0001, "loss": 0.9258, "step": 1496 }, { "epoch": 1.3305188312409733, "grad_norm": 0.39797237515449524, "learning_rate": 0.0001, "loss": 0.9168, "step": 1497 }, { "epoch": 1.3314076213754027, "grad_norm": 0.36626359820365906, "learning_rate": 0.0001, "loss": 0.9289, "step": 1498 }, { "epoch": 1.3322964115098324, "grad_norm": 0.38699278235435486, "learning_rate": 0.0001, "loss": 0.8827, "step": 1499 }, { "epoch": 1.3331852016442618, "grad_norm": 0.37499698996543884, "learning_rate": 0.0001, "loss": 0.9254, "step": 1500 }, { "epoch": 1.3340739917786912, "grad_norm": 0.34208056330680847, "learning_rate": 0.0001, "loss": 0.8792, "step": 1501 }, { "epoch": 1.334962781913121, "grad_norm": 0.3909793496131897, "learning_rate": 0.0001, "loss": 0.9986, "step": 1502 }, { "epoch": 1.3358515720475503, "grad_norm": 0.35674503445625305, "learning_rate": 0.0001, "loss": 0.8902, "step": 1503 }, { "epoch": 1.3367403621819798, "grad_norm": 0.35735827684402466, "learning_rate": 0.0001, "loss": 0.9898, "step": 1504 }, { "epoch": 1.3376291523164092, "grad_norm": 0.39872074127197266, "learning_rate": 0.0001, "loss": 1.048, "step": 1505 }, { "epoch": 1.3385179424508387, "grad_norm": 0.4395754039287567, "learning_rate": 0.0001, "loss": 0.8621, "step": 1506 }, { "epoch": 1.3394067325852683, "grad_norm": 0.3650192320346832, "learning_rate": 0.0001, "loss": 0.8788, "step": 1507 }, { "epoch": 1.3402955227196978, "grad_norm": 0.35158872604370117, "learning_rate": 0.0001, "loss": 0.8228, "step": 1508 }, { "epoch": 1.3411843128541272, "grad_norm": 0.3553421199321747, "learning_rate": 0.0001, "loss": 0.9102, "step": 1509 }, { "epoch": 1.3420731029885569, "grad_norm": 0.32123276591300964, "learning_rate": 0.0001, "loss": 0.8808, "step": 1510 }, { "epoch": 1.3429618931229863, "grad_norm": 0.35363566875457764, "learning_rate": 0.0001, "loss": 0.8895, "step": 1511 }, { "epoch": 1.3438506832574157, "grad_norm": 0.38487884402275085, "learning_rate": 0.0001, "loss": 0.8713, "step": 1512 }, { "epoch": 1.3447394733918454, "grad_norm": 0.3799397051334381, "learning_rate": 0.0001, "loss": 0.8982, "step": 1513 }, { "epoch": 1.3456282635262748, "grad_norm": 0.3352080285549164, "learning_rate": 0.0001, "loss": 0.914, "step": 1514 }, { "epoch": 1.3465170536607043, "grad_norm": 0.6567979454994202, "learning_rate": 0.0001, "loss": 0.978, "step": 1515 }, { "epoch": 1.347405843795134, "grad_norm": 0.4132377505302429, "learning_rate": 0.0001, "loss": 1.0033, "step": 1516 }, { "epoch": 1.3482946339295634, "grad_norm": 0.4519813060760498, "learning_rate": 0.0001, "loss": 0.9213, "step": 1517 }, { "epoch": 1.3491834240639928, "grad_norm": 0.35851332545280457, "learning_rate": 0.0001, "loss": 0.9634, "step": 1518 }, { "epoch": 1.3500722141984225, "grad_norm": 0.39293810725212097, "learning_rate": 0.0001, "loss": 0.9069, "step": 1519 }, { "epoch": 1.350961004332852, "grad_norm": 0.36240848898887634, "learning_rate": 0.0001, "loss": 0.8621, "step": 1520 }, { "epoch": 1.3518497944672814, "grad_norm": 0.3404124975204468, "learning_rate": 0.0001, "loss": 0.8964, "step": 1521 }, { "epoch": 1.352738584601711, "grad_norm": 0.31596043705940247, "learning_rate": 0.0001, "loss": 0.9091, "step": 1522 }, { "epoch": 1.3536273747361405, "grad_norm": 0.3596150279045105, "learning_rate": 0.0001, "loss": 0.9275, "step": 1523 }, { "epoch": 1.35451616487057, "grad_norm": 0.35948145389556885, "learning_rate": 0.0001, "loss": 0.8821, "step": 1524 }, { "epoch": 1.3554049550049996, "grad_norm": 0.3627282381057739, "learning_rate": 0.0001, "loss": 0.9501, "step": 1525 }, { "epoch": 1.356293745139429, "grad_norm": 0.38337674736976624, "learning_rate": 0.0001, "loss": 0.8677, "step": 1526 }, { "epoch": 1.3571825352738585, "grad_norm": 0.38964757323265076, "learning_rate": 0.0001, "loss": 0.9327, "step": 1527 }, { "epoch": 1.358071325408288, "grad_norm": 0.3563820421695709, "learning_rate": 0.0001, "loss": 0.836, "step": 1528 }, { "epoch": 1.3589601155427173, "grad_norm": 0.3765336573123932, "learning_rate": 0.0001, "loss": 0.9198, "step": 1529 }, { "epoch": 1.359848905677147, "grad_norm": 0.3981674611568451, "learning_rate": 0.0001, "loss": 0.9539, "step": 1530 }, { "epoch": 1.3607376958115764, "grad_norm": 0.4140075147151947, "learning_rate": 0.0001, "loss": 0.9104, "step": 1531 }, { "epoch": 1.3616264859460059, "grad_norm": 0.4137401282787323, "learning_rate": 0.0001, "loss": 1.0583, "step": 1532 }, { "epoch": 1.3625152760804355, "grad_norm": 0.3649982511997223, "learning_rate": 0.0001, "loss": 0.8334, "step": 1533 }, { "epoch": 1.363404066214865, "grad_norm": 0.3175910711288452, "learning_rate": 0.0001, "loss": 0.8834, "step": 1534 }, { "epoch": 1.3642928563492944, "grad_norm": 0.3742975890636444, "learning_rate": 0.0001, "loss": 0.9664, "step": 1535 }, { "epoch": 1.365181646483724, "grad_norm": 0.369791179895401, "learning_rate": 0.0001, "loss": 0.8947, "step": 1536 }, { "epoch": 1.3660704366181535, "grad_norm": 0.33241912722587585, "learning_rate": 0.0001, "loss": 0.9392, "step": 1537 }, { "epoch": 1.366959226752583, "grad_norm": 0.345084011554718, "learning_rate": 0.0001, "loss": 0.8655, "step": 1538 }, { "epoch": 1.3678480168870126, "grad_norm": 0.33112481236457825, "learning_rate": 0.0001, "loss": 0.9325, "step": 1539 }, { "epoch": 1.368736807021442, "grad_norm": 0.3624488413333893, "learning_rate": 0.0001, "loss": 0.9253, "step": 1540 }, { "epoch": 1.3696255971558715, "grad_norm": 0.3580976724624634, "learning_rate": 0.0001, "loss": 0.8796, "step": 1541 }, { "epoch": 1.3705143872903012, "grad_norm": 0.30134427547454834, "learning_rate": 0.0001, "loss": 0.8049, "step": 1542 }, { "epoch": 1.3714031774247306, "grad_norm": 0.3331443965435028, "learning_rate": 0.0001, "loss": 0.9014, "step": 1543 }, { "epoch": 1.37229196755916, "grad_norm": 0.33350786566734314, "learning_rate": 0.0001, "loss": 0.9826, "step": 1544 }, { "epoch": 1.3731807576935897, "grad_norm": 0.3651019036769867, "learning_rate": 0.0001, "loss": 0.9215, "step": 1545 }, { "epoch": 1.3740695478280192, "grad_norm": 0.3742315173149109, "learning_rate": 0.0001, "loss": 0.9774, "step": 1546 }, { "epoch": 1.3749583379624486, "grad_norm": 0.32286152243614197, "learning_rate": 0.0001, "loss": 0.8499, "step": 1547 }, { "epoch": 1.3758471280968783, "grad_norm": 0.33028700947761536, "learning_rate": 0.0001, "loss": 0.8511, "step": 1548 }, { "epoch": 1.3767359182313077, "grad_norm": 0.3529914617538452, "learning_rate": 0.0001, "loss": 0.8478, "step": 1549 }, { "epoch": 1.3776247083657371, "grad_norm": 0.34889042377471924, "learning_rate": 0.0001, "loss": 0.8968, "step": 1550 }, { "epoch": 1.3785134985001666, "grad_norm": 0.35926511883735657, "learning_rate": 0.0001, "loss": 0.9326, "step": 1551 }, { "epoch": 1.379402288634596, "grad_norm": 0.3612864315509796, "learning_rate": 0.0001, "loss": 0.9212, "step": 1552 }, { "epoch": 1.3802910787690257, "grad_norm": 0.342424601316452, "learning_rate": 0.0001, "loss": 0.8462, "step": 1553 }, { "epoch": 1.3811798689034551, "grad_norm": 0.36990538239479065, "learning_rate": 0.0001, "loss": 0.8569, "step": 1554 }, { "epoch": 1.3820686590378846, "grad_norm": 0.3396192491054535, "learning_rate": 0.0001, "loss": 0.8649, "step": 1555 }, { "epoch": 1.3829574491723142, "grad_norm": 0.3837670087814331, "learning_rate": 0.0001, "loss": 0.8626, "step": 1556 }, { "epoch": 1.3838462393067437, "grad_norm": 0.3661665618419647, "learning_rate": 0.0001, "loss": 0.9781, "step": 1557 }, { "epoch": 1.384735029441173, "grad_norm": 0.41726961731910706, "learning_rate": 0.0001, "loss": 0.9522, "step": 1558 }, { "epoch": 1.3856238195756028, "grad_norm": 0.4061615467071533, "learning_rate": 0.0001, "loss": 0.9127, "step": 1559 }, { "epoch": 1.3865126097100322, "grad_norm": 0.32869914174079895, "learning_rate": 0.0001, "loss": 0.8947, "step": 1560 }, { "epoch": 1.3874013998444616, "grad_norm": 0.6287943124771118, "learning_rate": 0.0001, "loss": 0.8759, "step": 1561 }, { "epoch": 1.3882901899788913, "grad_norm": 0.562345027923584, "learning_rate": 0.0001, "loss": 0.9109, "step": 1562 }, { "epoch": 1.3891789801133207, "grad_norm": 0.39289891719818115, "learning_rate": 0.0001, "loss": 0.9831, "step": 1563 }, { "epoch": 1.3900677702477502, "grad_norm": 0.4826609194278717, "learning_rate": 0.0001, "loss": 0.8846, "step": 1564 }, { "epoch": 1.3909565603821799, "grad_norm": 0.4759630262851715, "learning_rate": 0.0001, "loss": 0.7826, "step": 1565 }, { "epoch": 1.3918453505166093, "grad_norm": 0.8236848711967468, "learning_rate": 0.0001, "loss": 0.9145, "step": 1566 }, { "epoch": 1.3927341406510387, "grad_norm": 0.39659222960472107, "learning_rate": 0.0001, "loss": 0.9594, "step": 1567 }, { "epoch": 1.3936229307854684, "grad_norm": 0.32191386818885803, "learning_rate": 0.0001, "loss": 0.8515, "step": 1568 }, { "epoch": 1.3945117209198978, "grad_norm": 0.397210955619812, "learning_rate": 0.0001, "loss": 0.9207, "step": 1569 }, { "epoch": 1.3954005110543273, "grad_norm": 0.36070799827575684, "learning_rate": 0.0001, "loss": 0.932, "step": 1570 }, { "epoch": 1.396289301188757, "grad_norm": 0.35527974367141724, "learning_rate": 0.0001, "loss": 0.9069, "step": 1571 }, { "epoch": 1.3971780913231864, "grad_norm": 0.39062851667404175, "learning_rate": 0.0001, "loss": 0.9763, "step": 1572 }, { "epoch": 1.3980668814576158, "grad_norm": 0.3772708475589752, "learning_rate": 0.0001, "loss": 0.9299, "step": 1573 }, { "epoch": 1.3989556715920453, "grad_norm": 0.3413572609424591, "learning_rate": 0.0001, "loss": 0.9314, "step": 1574 }, { "epoch": 1.3998444617264747, "grad_norm": 0.4356358051300049, "learning_rate": 0.0001, "loss": 0.8741, "step": 1575 }, { "epoch": 1.4007332518609044, "grad_norm": 0.3393174409866333, "learning_rate": 0.0001, "loss": 0.8657, "step": 1576 }, { "epoch": 1.4016220419953338, "grad_norm": 0.35734716057777405, "learning_rate": 0.0001, "loss": 0.9329, "step": 1577 }, { "epoch": 1.4025108321297632, "grad_norm": 0.3565090298652649, "learning_rate": 0.0001, "loss": 0.9528, "step": 1578 }, { "epoch": 1.403399622264193, "grad_norm": 0.39361128211021423, "learning_rate": 0.0001, "loss": 0.8599, "step": 1579 }, { "epoch": 1.4042884123986223, "grad_norm": 0.4015718102455139, "learning_rate": 0.0001, "loss": 0.8092, "step": 1580 }, { "epoch": 1.4051772025330518, "grad_norm": 0.35963454842567444, "learning_rate": 0.0001, "loss": 0.9064, "step": 1581 }, { "epoch": 1.4060659926674814, "grad_norm": 0.3792876601219177, "learning_rate": 0.0001, "loss": 0.9415, "step": 1582 }, { "epoch": 1.4069547828019109, "grad_norm": 0.45550185441970825, "learning_rate": 0.0001, "loss": 0.9823, "step": 1583 }, { "epoch": 1.4078435729363403, "grad_norm": 0.3728649318218231, "learning_rate": 0.0001, "loss": 0.9192, "step": 1584 }, { "epoch": 1.40873236307077, "grad_norm": 0.35824286937713623, "learning_rate": 0.0001, "loss": 0.9029, "step": 1585 }, { "epoch": 1.4096211532051994, "grad_norm": 0.34785544872283936, "learning_rate": 0.0001, "loss": 0.8923, "step": 1586 }, { "epoch": 1.4105099433396289, "grad_norm": 0.3750496208667755, "learning_rate": 0.0001, "loss": 0.97, "step": 1587 }, { "epoch": 1.4113987334740585, "grad_norm": 0.365651398897171, "learning_rate": 0.0001, "loss": 0.9155, "step": 1588 }, { "epoch": 1.412287523608488, "grad_norm": 0.389883428812027, "learning_rate": 0.0001, "loss": 0.8792, "step": 1589 }, { "epoch": 1.4131763137429174, "grad_norm": 0.3404034674167633, "learning_rate": 0.0001, "loss": 0.8753, "step": 1590 }, { "epoch": 1.414065103877347, "grad_norm": 0.3574478328227997, "learning_rate": 0.0001, "loss": 0.884, "step": 1591 }, { "epoch": 1.4149538940117765, "grad_norm": 0.3543257415294647, "learning_rate": 0.0001, "loss": 0.9347, "step": 1592 }, { "epoch": 1.415842684146206, "grad_norm": 0.37373191118240356, "learning_rate": 0.0001, "loss": 0.9138, "step": 1593 }, { "epoch": 1.4167314742806356, "grad_norm": 0.7103110551834106, "learning_rate": 0.0001, "loss": 0.918, "step": 1594 }, { "epoch": 1.417620264415065, "grad_norm": 0.377218097448349, "learning_rate": 0.0001, "loss": 1.0207, "step": 1595 }, { "epoch": 1.4185090545494945, "grad_norm": 0.3593274652957916, "learning_rate": 0.0001, "loss": 0.8988, "step": 1596 }, { "epoch": 1.419397844683924, "grad_norm": 0.3128172755241394, "learning_rate": 0.0001, "loss": 0.9314, "step": 1597 }, { "epoch": 1.4202866348183536, "grad_norm": 0.39537543058395386, "learning_rate": 0.0001, "loss": 0.9583, "step": 1598 }, { "epoch": 1.421175424952783, "grad_norm": 0.38807258009910583, "learning_rate": 0.0001, "loss": 0.9978, "step": 1599 }, { "epoch": 1.4220642150872125, "grad_norm": 0.32216835021972656, "learning_rate": 0.0001, "loss": 0.8949, "step": 1600 }, { "epoch": 1.422953005221642, "grad_norm": 0.3530212938785553, "learning_rate": 0.0001, "loss": 0.9434, "step": 1601 }, { "epoch": 1.4238417953560716, "grad_norm": 0.38593819737434387, "learning_rate": 0.0001, "loss": 0.8588, "step": 1602 }, { "epoch": 1.424730585490501, "grad_norm": 0.319916307926178, "learning_rate": 0.0001, "loss": 0.9491, "step": 1603 }, { "epoch": 1.4256193756249305, "grad_norm": 0.46837642788887024, "learning_rate": 0.0001, "loss": 0.8415, "step": 1604 }, { "epoch": 1.4265081657593601, "grad_norm": 0.3597429394721985, "learning_rate": 0.0001, "loss": 0.9981, "step": 1605 }, { "epoch": 1.4273969558937896, "grad_norm": 0.33240172266960144, "learning_rate": 0.0001, "loss": 0.9084, "step": 1606 }, { "epoch": 1.428285746028219, "grad_norm": 0.3380354046821594, "learning_rate": 0.0001, "loss": 0.9233, "step": 1607 }, { "epoch": 1.4291745361626487, "grad_norm": 0.503455400466919, "learning_rate": 0.0001, "loss": 0.9825, "step": 1608 }, { "epoch": 1.430063326297078, "grad_norm": 0.3610774576663971, "learning_rate": 0.0001, "loss": 1.0047, "step": 1609 }, { "epoch": 1.4309521164315075, "grad_norm": 0.44202589988708496, "learning_rate": 0.0001, "loss": 0.9203, "step": 1610 }, { "epoch": 1.4318409065659372, "grad_norm": 0.3535199761390686, "learning_rate": 0.0001, "loss": 0.9925, "step": 1611 }, { "epoch": 1.4327296967003667, "grad_norm": 0.3379201292991638, "learning_rate": 0.0001, "loss": 0.9715, "step": 1612 }, { "epoch": 1.433618486834796, "grad_norm": 0.34348997473716736, "learning_rate": 0.0001, "loss": 0.9106, "step": 1613 }, { "epoch": 1.4345072769692258, "grad_norm": 0.341406911611557, "learning_rate": 0.0001, "loss": 0.9345, "step": 1614 }, { "epoch": 1.4353960671036552, "grad_norm": 0.2867315411567688, "learning_rate": 0.0001, "loss": 0.8473, "step": 1615 }, { "epoch": 1.4362848572380846, "grad_norm": 0.3688201308250427, "learning_rate": 0.0001, "loss": 0.887, "step": 1616 }, { "epoch": 1.4371736473725143, "grad_norm": 0.3555888235569, "learning_rate": 0.0001, "loss": 0.8905, "step": 1617 }, { "epoch": 1.4380624375069437, "grad_norm": 0.3761601448059082, "learning_rate": 0.0001, "loss": 0.8659, "step": 1618 }, { "epoch": 1.4389512276413732, "grad_norm": 0.3437979519367218, "learning_rate": 0.0001, "loss": 0.9541, "step": 1619 }, { "epoch": 1.4398400177758026, "grad_norm": 0.33345940709114075, "learning_rate": 0.0001, "loss": 0.9721, "step": 1620 }, { "epoch": 1.4407288079102323, "grad_norm": 0.356842577457428, "learning_rate": 0.0001, "loss": 0.9055, "step": 1621 }, { "epoch": 1.4416175980446617, "grad_norm": 0.35060185194015503, "learning_rate": 0.0001, "loss": 0.8517, "step": 1622 }, { "epoch": 1.4425063881790912, "grad_norm": 0.3705711364746094, "learning_rate": 0.0001, "loss": 1.0321, "step": 1623 }, { "epoch": 1.4433951783135206, "grad_norm": 0.38974469900131226, "learning_rate": 0.0001, "loss": 0.8974, "step": 1624 }, { "epoch": 1.4442839684479503, "grad_norm": 0.40210771560668945, "learning_rate": 0.0001, "loss": 0.9183, "step": 1625 }, { "epoch": 1.4451727585823797, "grad_norm": 0.3603565990924835, "learning_rate": 0.0001, "loss": 0.8622, "step": 1626 }, { "epoch": 1.4460615487168091, "grad_norm": 0.3595213294029236, "learning_rate": 0.0001, "loss": 0.931, "step": 1627 }, { "epoch": 1.4469503388512388, "grad_norm": 0.34834766387939453, "learning_rate": 0.0001, "loss": 0.9076, "step": 1628 }, { "epoch": 1.4478391289856682, "grad_norm": 0.3451787531375885, "learning_rate": 0.0001, "loss": 0.8818, "step": 1629 }, { "epoch": 1.4487279191200977, "grad_norm": 0.3447902202606201, "learning_rate": 0.0001, "loss": 0.8725, "step": 1630 }, { "epoch": 1.4496167092545273, "grad_norm": 0.3713982403278351, "learning_rate": 0.0001, "loss": 0.9179, "step": 1631 }, { "epoch": 1.4505054993889568, "grad_norm": 0.41470077633857727, "learning_rate": 0.0001, "loss": 0.8618, "step": 1632 }, { "epoch": 1.4513942895233862, "grad_norm": 0.35781964659690857, "learning_rate": 0.0001, "loss": 0.8232, "step": 1633 }, { "epoch": 1.452283079657816, "grad_norm": 0.32825222611427307, "learning_rate": 0.0001, "loss": 0.8527, "step": 1634 }, { "epoch": 1.4531718697922453, "grad_norm": 0.3815636932849884, "learning_rate": 0.0001, "loss": 0.9824, "step": 1635 }, { "epoch": 1.4540606599266748, "grad_norm": 0.3856026828289032, "learning_rate": 0.0001, "loss": 0.9522, "step": 1636 }, { "epoch": 1.4549494500611044, "grad_norm": 0.3923175036907196, "learning_rate": 0.0001, "loss": 0.8057, "step": 1637 }, { "epoch": 1.4558382401955339, "grad_norm": 0.3747645914554596, "learning_rate": 0.0001, "loss": 0.9438, "step": 1638 }, { "epoch": 1.4567270303299633, "grad_norm": 0.3561338782310486, "learning_rate": 0.0001, "loss": 0.8827, "step": 1639 }, { "epoch": 1.457615820464393, "grad_norm": 0.32757651805877686, "learning_rate": 0.0001, "loss": 0.8562, "step": 1640 }, { "epoch": 1.4585046105988224, "grad_norm": 0.378717839717865, "learning_rate": 0.0001, "loss": 0.9642, "step": 1641 }, { "epoch": 1.4593934007332519, "grad_norm": 0.3806663751602173, "learning_rate": 0.0001, "loss": 0.9111, "step": 1642 }, { "epoch": 1.4602821908676813, "grad_norm": 0.34055235981941223, "learning_rate": 0.0001, "loss": 0.9138, "step": 1643 }, { "epoch": 1.461170981002111, "grad_norm": 0.3508027493953705, "learning_rate": 0.0001, "loss": 0.8867, "step": 1644 }, { "epoch": 1.4620597711365404, "grad_norm": 0.3853304386138916, "learning_rate": 0.0001, "loss": 0.895, "step": 1645 }, { "epoch": 1.4629485612709698, "grad_norm": 0.39283403754234314, "learning_rate": 0.0001, "loss": 0.8977, "step": 1646 }, { "epoch": 1.4638373514053993, "grad_norm": 0.3606354892253876, "learning_rate": 0.0001, "loss": 0.8564, "step": 1647 }, { "epoch": 1.464726141539829, "grad_norm": 0.3763819932937622, "learning_rate": 0.0001, "loss": 0.8714, "step": 1648 }, { "epoch": 1.4656149316742584, "grad_norm": 0.3701646625995636, "learning_rate": 0.0001, "loss": 0.9512, "step": 1649 }, { "epoch": 1.4665037218086878, "grad_norm": 0.383543998003006, "learning_rate": 0.0001, "loss": 0.9052, "step": 1650 }, { "epoch": 1.4673925119431175, "grad_norm": 0.3642030954360962, "learning_rate": 0.0001, "loss": 0.8511, "step": 1651 }, { "epoch": 1.468281302077547, "grad_norm": 0.38401633501052856, "learning_rate": 0.0001, "loss": 0.9076, "step": 1652 }, { "epoch": 1.4691700922119764, "grad_norm": 0.3193959891796112, "learning_rate": 0.0001, "loss": 0.8841, "step": 1653 }, { "epoch": 1.470058882346406, "grad_norm": 0.35109949111938477, "learning_rate": 0.0001, "loss": 0.9746, "step": 1654 }, { "epoch": 1.4709476724808355, "grad_norm": 0.38246193528175354, "learning_rate": 0.0001, "loss": 0.9658, "step": 1655 }, { "epoch": 1.471836462615265, "grad_norm": 0.3813583552837372, "learning_rate": 0.0001, "loss": 0.9618, "step": 1656 }, { "epoch": 1.4727252527496946, "grad_norm": 0.345525860786438, "learning_rate": 0.0001, "loss": 1.0142, "step": 1657 }, { "epoch": 1.473614042884124, "grad_norm": 0.3886154592037201, "learning_rate": 0.0001, "loss": 0.9028, "step": 1658 }, { "epoch": 1.4745028330185534, "grad_norm": 0.31922218203544617, "learning_rate": 0.0001, "loss": 0.9339, "step": 1659 }, { "epoch": 1.4753916231529831, "grad_norm": 0.38563981652259827, "learning_rate": 0.0001, "loss": 1.0242, "step": 1660 }, { "epoch": 1.4762804132874126, "grad_norm": 0.3879290223121643, "learning_rate": 0.0001, "loss": 0.9703, "step": 1661 }, { "epoch": 1.477169203421842, "grad_norm": 0.36282819509506226, "learning_rate": 0.0001, "loss": 0.9179, "step": 1662 }, { "epoch": 1.4780579935562717, "grad_norm": 0.39247390627861023, "learning_rate": 0.0001, "loss": 0.8528, "step": 1663 }, { "epoch": 1.478946783690701, "grad_norm": 0.37190213799476624, "learning_rate": 0.0001, "loss": 0.8894, "step": 1664 }, { "epoch": 1.4798355738251305, "grad_norm": 0.35375040769577026, "learning_rate": 0.0001, "loss": 0.862, "step": 1665 }, { "epoch": 1.48072436395956, "grad_norm": 0.38695651292800903, "learning_rate": 0.0001, "loss": 0.9118, "step": 1666 }, { "epoch": 1.4816131540939896, "grad_norm": 0.34383878111839294, "learning_rate": 0.0001, "loss": 0.874, "step": 1667 }, { "epoch": 1.482501944228419, "grad_norm": 0.3391941785812378, "learning_rate": 0.0001, "loss": 0.9457, "step": 1668 }, { "epoch": 1.4833907343628485, "grad_norm": 0.3345963954925537, "learning_rate": 0.0001, "loss": 0.8521, "step": 1669 }, { "epoch": 1.484279524497278, "grad_norm": 0.3772295117378235, "learning_rate": 0.0001, "loss": 0.8987, "step": 1670 }, { "epoch": 1.4851683146317076, "grad_norm": 0.3609481751918793, "learning_rate": 0.0001, "loss": 0.8841, "step": 1671 }, { "epoch": 1.486057104766137, "grad_norm": 0.3498031795024872, "learning_rate": 0.0001, "loss": 0.9713, "step": 1672 }, { "epoch": 1.4869458949005665, "grad_norm": 0.33337390422821045, "learning_rate": 0.0001, "loss": 0.9458, "step": 1673 }, { "epoch": 1.4878346850349962, "grad_norm": 0.3364051282405853, "learning_rate": 0.0001, "loss": 0.8227, "step": 1674 }, { "epoch": 1.4887234751694256, "grad_norm": 0.3339247405529022, "learning_rate": 0.0001, "loss": 0.9191, "step": 1675 }, { "epoch": 1.489612265303855, "grad_norm": 0.3879411816596985, "learning_rate": 0.0001, "loss": 0.9857, "step": 1676 }, { "epoch": 1.4905010554382847, "grad_norm": 0.3417137563228607, "learning_rate": 0.0001, "loss": 0.8781, "step": 1677 }, { "epoch": 1.4913898455727141, "grad_norm": 0.3149627447128296, "learning_rate": 0.0001, "loss": 0.9536, "step": 1678 }, { "epoch": 1.4922786357071436, "grad_norm": 0.3448125720024109, "learning_rate": 0.0001, "loss": 0.8991, "step": 1679 }, { "epoch": 1.4931674258415732, "grad_norm": 0.3406059741973877, "learning_rate": 0.0001, "loss": 0.9704, "step": 1680 }, { "epoch": 1.4940562159760027, "grad_norm": 0.3938051164150238, "learning_rate": 0.0001, "loss": 0.9602, "step": 1681 }, { "epoch": 1.4949450061104321, "grad_norm": 0.35217610001564026, "learning_rate": 0.0001, "loss": 0.8743, "step": 1682 }, { "epoch": 1.4958337962448618, "grad_norm": 0.3443836271762848, "learning_rate": 0.0001, "loss": 0.8809, "step": 1683 }, { "epoch": 1.4967225863792912, "grad_norm": 0.36990824341773987, "learning_rate": 0.0001, "loss": 0.9191, "step": 1684 }, { "epoch": 1.4976113765137207, "grad_norm": 0.3850703239440918, "learning_rate": 0.0001, "loss": 0.9439, "step": 1685 }, { "epoch": 1.4985001666481503, "grad_norm": 0.336357444524765, "learning_rate": 0.0001, "loss": 0.9015, "step": 1686 }, { "epoch": 1.4993889567825798, "grad_norm": 0.33322861790657043, "learning_rate": 0.0001, "loss": 0.9134, "step": 1687 }, { "epoch": 1.5002777469170092, "grad_norm": 0.3278595507144928, "learning_rate": 0.0001, "loss": 0.902, "step": 1688 }, { "epoch": 1.5011665370514389, "grad_norm": 0.36542853713035583, "learning_rate": 0.0001, "loss": 0.9095, "step": 1689 }, { "epoch": 1.502055327185868, "grad_norm": 0.3316444456577301, "learning_rate": 0.0001, "loss": 0.8739, "step": 1690 }, { "epoch": 1.5029441173202978, "grad_norm": 0.34426775574684143, "learning_rate": 0.0001, "loss": 0.8909, "step": 1691 }, { "epoch": 1.5038329074547274, "grad_norm": 0.3481121063232422, "learning_rate": 0.0001, "loss": 0.9043, "step": 1692 }, { "epoch": 1.5047216975891566, "grad_norm": 0.3318212628364563, "learning_rate": 0.0001, "loss": 0.9219, "step": 1693 }, { "epoch": 1.5056104877235863, "grad_norm": 0.3894112706184387, "learning_rate": 0.0001, "loss": 0.9349, "step": 1694 }, { "epoch": 1.5064992778580157, "grad_norm": 0.3638782501220703, "learning_rate": 0.0001, "loss": 0.8549, "step": 1695 }, { "epoch": 1.5073880679924452, "grad_norm": 0.3686305284500122, "learning_rate": 0.0001, "loss": 0.8293, "step": 1696 }, { "epoch": 1.5082768581268748, "grad_norm": 0.3255722224712372, "learning_rate": 0.0001, "loss": 0.9002, "step": 1697 }, { "epoch": 1.5091656482613043, "grad_norm": 0.3039862811565399, "learning_rate": 0.0001, "loss": 0.838, "step": 1698 }, { "epoch": 1.5100544383957337, "grad_norm": 0.372408926486969, "learning_rate": 0.0001, "loss": 0.9254, "step": 1699 }, { "epoch": 1.5109432285301634, "grad_norm": 0.3547128438949585, "learning_rate": 0.0001, "loss": 0.9622, "step": 1700 }, { "epoch": 1.5118320186645928, "grad_norm": 0.3619207441806793, "learning_rate": 0.0001, "loss": 0.9454, "step": 1701 }, { "epoch": 1.5127208087990223, "grad_norm": 0.347741961479187, "learning_rate": 0.0001, "loss": 0.8835, "step": 1702 }, { "epoch": 1.513609598933452, "grad_norm": 0.3651529848575592, "learning_rate": 0.0001, "loss": 0.9481, "step": 1703 }, { "epoch": 1.5144983890678814, "grad_norm": 0.3714975416660309, "learning_rate": 0.0001, "loss": 0.8664, "step": 1704 }, { "epoch": 1.5153871792023108, "grad_norm": 0.38629162311553955, "learning_rate": 0.0001, "loss": 0.911, "step": 1705 }, { "epoch": 1.5162759693367405, "grad_norm": 0.31858816742897034, "learning_rate": 0.0001, "loss": 0.8828, "step": 1706 }, { "epoch": 1.51716475947117, "grad_norm": 0.34497538208961487, "learning_rate": 0.0001, "loss": 0.8967, "step": 1707 }, { "epoch": 1.5180535496055994, "grad_norm": 0.34334251284599304, "learning_rate": 0.0001, "loss": 0.9161, "step": 1708 }, { "epoch": 1.518942339740029, "grad_norm": 0.35556355118751526, "learning_rate": 0.0001, "loss": 0.9303, "step": 1709 }, { "epoch": 1.5198311298744582, "grad_norm": 0.2841368019580841, "learning_rate": 0.0001, "loss": 0.9444, "step": 1710 }, { "epoch": 1.520719920008888, "grad_norm": 0.36247727274894714, "learning_rate": 0.0001, "loss": 0.9561, "step": 1711 }, { "epoch": 1.5216087101433176, "grad_norm": 0.3494194447994232, "learning_rate": 0.0001, "loss": 0.9227, "step": 1712 }, { "epoch": 1.5224975002777468, "grad_norm": 0.34062543511390686, "learning_rate": 0.0001, "loss": 0.9002, "step": 1713 }, { "epoch": 1.5233862904121764, "grad_norm": 0.32053127884864807, "learning_rate": 0.0001, "loss": 0.8472, "step": 1714 }, { "epoch": 1.524275080546606, "grad_norm": 0.34881147742271423, "learning_rate": 0.0001, "loss": 0.996, "step": 1715 }, { "epoch": 1.5251638706810353, "grad_norm": 0.31298136711120605, "learning_rate": 0.0001, "loss": 0.9664, "step": 1716 }, { "epoch": 1.526052660815465, "grad_norm": 0.35817843675613403, "learning_rate": 0.0001, "loss": 0.8581, "step": 1717 }, { "epoch": 1.5269414509498944, "grad_norm": 0.36280620098114014, "learning_rate": 0.0001, "loss": 0.9752, "step": 1718 }, { "epoch": 1.5278302410843239, "grad_norm": 0.3819250464439392, "learning_rate": 0.0001, "loss": 0.8537, "step": 1719 }, { "epoch": 1.5287190312187535, "grad_norm": 0.319489061832428, "learning_rate": 0.0001, "loss": 0.8939, "step": 1720 }, { "epoch": 1.529607821353183, "grad_norm": 0.3549860417842865, "learning_rate": 0.0001, "loss": 0.9703, "step": 1721 }, { "epoch": 1.5304966114876124, "grad_norm": 0.3653222620487213, "learning_rate": 0.0001, "loss": 0.9454, "step": 1722 }, { "epoch": 1.531385401622042, "grad_norm": 1.377524495124817, "learning_rate": 0.0001, "loss": 0.9496, "step": 1723 }, { "epoch": 1.5322741917564715, "grad_norm": 0.37237972021102905, "learning_rate": 0.0001, "loss": 0.9882, "step": 1724 }, { "epoch": 1.533162981890901, "grad_norm": 0.3513152003288269, "learning_rate": 0.0001, "loss": 0.9387, "step": 1725 }, { "epoch": 1.5340517720253306, "grad_norm": 1.2794930934906006, "learning_rate": 0.0001, "loss": 0.9713, "step": 1726 }, { "epoch": 1.53494056215976, "grad_norm": 0.37315309047698975, "learning_rate": 0.0001, "loss": 0.9837, "step": 1727 }, { "epoch": 1.5358293522941895, "grad_norm": 0.34950658679008484, "learning_rate": 0.0001, "loss": 0.9261, "step": 1728 }, { "epoch": 1.5367181424286191, "grad_norm": 0.3580825924873352, "learning_rate": 0.0001, "loss": 0.9007, "step": 1729 }, { "epoch": 1.5376069325630486, "grad_norm": 0.6059324145317078, "learning_rate": 0.0001, "loss": 0.9215, "step": 1730 }, { "epoch": 1.538495722697478, "grad_norm": 2.688345432281494, "learning_rate": 0.0001, "loss": 0.9347, "step": 1731 }, { "epoch": 1.5393845128319077, "grad_norm": 1.058393955230713, "learning_rate": 0.0001, "loss": 0.8553, "step": 1732 }, { "epoch": 1.540273302966337, "grad_norm": 0.5399541854858398, "learning_rate": 0.0001, "loss": 0.9347, "step": 1733 }, { "epoch": 1.5411620931007666, "grad_norm": 0.38701578974723816, "learning_rate": 0.0001, "loss": 0.9532, "step": 1734 }, { "epoch": 1.5420508832351962, "grad_norm": 0.3606896698474884, "learning_rate": 0.0001, "loss": 0.9679, "step": 1735 }, { "epoch": 1.5429396733696255, "grad_norm": 0.33948925137519836, "learning_rate": 0.0001, "loss": 0.8631, "step": 1736 }, { "epoch": 1.5438284635040551, "grad_norm": 0.3091464638710022, "learning_rate": 0.0001, "loss": 0.8451, "step": 1737 }, { "epoch": 1.5447172536384848, "grad_norm": 0.3580194413661957, "learning_rate": 0.0001, "loss": 0.9166, "step": 1738 }, { "epoch": 1.545606043772914, "grad_norm": 0.32534483075141907, "learning_rate": 0.0001, "loss": 0.8588, "step": 1739 }, { "epoch": 1.5464948339073437, "grad_norm": 0.37172192335128784, "learning_rate": 0.0001, "loss": 0.8881, "step": 1740 }, { "epoch": 1.547383624041773, "grad_norm": 0.3773120045661926, "learning_rate": 0.0001, "loss": 0.9553, "step": 1741 }, { "epoch": 1.5482724141762025, "grad_norm": 0.37788835167884827, "learning_rate": 0.0001, "loss": 0.9444, "step": 1742 }, { "epoch": 1.5491612043106322, "grad_norm": 0.33638015389442444, "learning_rate": 0.0001, "loss": 0.8482, "step": 1743 }, { "epoch": 1.5500499944450616, "grad_norm": 0.38720908761024475, "learning_rate": 0.0001, "loss": 0.8876, "step": 1744 }, { "epoch": 1.550938784579491, "grad_norm": 0.3406068980693817, "learning_rate": 0.0001, "loss": 0.9519, "step": 1745 }, { "epoch": 1.5518275747139207, "grad_norm": 0.3615241050720215, "learning_rate": 0.0001, "loss": 0.9574, "step": 1746 }, { "epoch": 1.5527163648483502, "grad_norm": 0.34514304995536804, "learning_rate": 0.0001, "loss": 0.9059, "step": 1747 }, { "epoch": 1.5536051549827796, "grad_norm": 0.3243461549282074, "learning_rate": 0.0001, "loss": 0.888, "step": 1748 }, { "epoch": 1.5544939451172093, "grad_norm": 0.37375468015670776, "learning_rate": 0.0001, "loss": 0.7914, "step": 1749 }, { "epoch": 1.5553827352516387, "grad_norm": 0.37953221797943115, "learning_rate": 0.0001, "loss": 0.8981, "step": 1750 }, { "epoch": 1.5562715253860682, "grad_norm": 0.36913594603538513, "learning_rate": 0.0001, "loss": 0.9042, "step": 1751 }, { "epoch": 1.5571603155204978, "grad_norm": 0.40167394280433655, "learning_rate": 0.0001, "loss": 0.9929, "step": 1752 }, { "epoch": 1.5580491056549273, "grad_norm": 0.3700322210788727, "learning_rate": 0.0001, "loss": 0.9464, "step": 1753 }, { "epoch": 1.5589378957893567, "grad_norm": 0.4139519929885864, "learning_rate": 0.0001, "loss": 0.8966, "step": 1754 }, { "epoch": 1.5598266859237864, "grad_norm": 1.0801198482513428, "learning_rate": 0.0001, "loss": 0.8788, "step": 1755 }, { "epoch": 1.5607154760582156, "grad_norm": 0.8275560140609741, "learning_rate": 0.0001, "loss": 0.8335, "step": 1756 }, { "epoch": 1.5616042661926453, "grad_norm": 0.5179623961448669, "learning_rate": 0.0001, "loss": 0.9282, "step": 1757 }, { "epoch": 1.562493056327075, "grad_norm": 0.3609626889228821, "learning_rate": 0.0001, "loss": 0.8669, "step": 1758 }, { "epoch": 1.5633818464615041, "grad_norm": 0.35782667994499207, "learning_rate": 0.0001, "loss": 0.8826, "step": 1759 }, { "epoch": 1.5642706365959338, "grad_norm": 0.5242655873298645, "learning_rate": 0.0001, "loss": 0.8888, "step": 1760 }, { "epoch": 1.5651594267303635, "grad_norm": 0.3657304346561432, "learning_rate": 0.0001, "loss": 0.9309, "step": 1761 }, { "epoch": 1.5660482168647927, "grad_norm": 0.5481138825416565, "learning_rate": 0.0001, "loss": 0.8967, "step": 1762 }, { "epoch": 1.5669370069992223, "grad_norm": 0.39052364230155945, "learning_rate": 0.0001, "loss": 0.9363, "step": 1763 }, { "epoch": 1.5678257971336518, "grad_norm": 0.5495015978813171, "learning_rate": 0.0001, "loss": 0.9439, "step": 1764 }, { "epoch": 1.5687145872680812, "grad_norm": 0.3862490653991699, "learning_rate": 0.0001, "loss": 0.9504, "step": 1765 }, { "epoch": 1.5696033774025109, "grad_norm": 0.4059329330921173, "learning_rate": 0.0001, "loss": 0.9468, "step": 1766 }, { "epoch": 1.5704921675369403, "grad_norm": 0.3757980465888977, "learning_rate": 0.0001, "loss": 0.9091, "step": 1767 }, { "epoch": 1.5713809576713698, "grad_norm": 0.32866010069847107, "learning_rate": 0.0001, "loss": 0.9177, "step": 1768 }, { "epoch": 1.5722697478057994, "grad_norm": 0.3823925852775574, "learning_rate": 0.0001, "loss": 0.8151, "step": 1769 }, { "epoch": 1.5731585379402289, "grad_norm": 0.3286689519882202, "learning_rate": 0.0001, "loss": 0.9327, "step": 1770 }, { "epoch": 1.5740473280746583, "grad_norm": 0.3756827712059021, "learning_rate": 0.0001, "loss": 0.9451, "step": 1771 }, { "epoch": 1.574936118209088, "grad_norm": 0.34540703892707825, "learning_rate": 0.0001, "loss": 0.906, "step": 1772 }, { "epoch": 1.5758249083435174, "grad_norm": 0.3327772617340088, "learning_rate": 0.0001, "loss": 0.8418, "step": 1773 }, { "epoch": 1.5767136984779468, "grad_norm": 0.3816230893135071, "learning_rate": 0.0001, "loss": 0.9186, "step": 1774 }, { "epoch": 1.5776024886123765, "grad_norm": 0.3620496094226837, "learning_rate": 0.0001, "loss": 0.9283, "step": 1775 }, { "epoch": 1.578491278746806, "grad_norm": 0.3261551558971405, "learning_rate": 0.0001, "loss": 0.8566, "step": 1776 }, { "epoch": 1.5793800688812354, "grad_norm": 0.3376888334751129, "learning_rate": 0.0001, "loss": 0.8816, "step": 1777 }, { "epoch": 1.580268859015665, "grad_norm": 0.34106653928756714, "learning_rate": 0.0001, "loss": 0.8366, "step": 1778 }, { "epoch": 1.5811576491500943, "grad_norm": 0.39439812302589417, "learning_rate": 0.0001, "loss": 0.9562, "step": 1779 }, { "epoch": 1.582046439284524, "grad_norm": 0.4464170038700104, "learning_rate": 0.0001, "loss": 0.9191, "step": 1780 }, { "epoch": 1.5829352294189536, "grad_norm": 0.3573848605155945, "learning_rate": 0.0001, "loss": 0.8999, "step": 1781 }, { "epoch": 1.5838240195533828, "grad_norm": 0.3367520272731781, "learning_rate": 0.0001, "loss": 0.8198, "step": 1782 }, { "epoch": 1.5847128096878125, "grad_norm": 0.7480552196502686, "learning_rate": 0.0001, "loss": 0.9561, "step": 1783 }, { "epoch": 1.5856015998222421, "grad_norm": 0.3286367952823639, "learning_rate": 0.0001, "loss": 0.9986, "step": 1784 }, { "epoch": 1.5864903899566714, "grad_norm": 0.32625967264175415, "learning_rate": 0.0001, "loss": 0.9228, "step": 1785 }, { "epoch": 1.587379180091101, "grad_norm": 0.3715958893299103, "learning_rate": 0.0001, "loss": 0.9178, "step": 1786 }, { "epoch": 1.5882679702255305, "grad_norm": 0.34337377548217773, "learning_rate": 0.0001, "loss": 0.9656, "step": 1787 }, { "epoch": 1.58915676035996, "grad_norm": 0.36245790123939514, "learning_rate": 0.0001, "loss": 0.9587, "step": 1788 }, { "epoch": 1.5900455504943896, "grad_norm": 0.4087197780609131, "learning_rate": 0.0001, "loss": 0.9179, "step": 1789 }, { "epoch": 1.590934340628819, "grad_norm": 0.4008493423461914, "learning_rate": 0.0001, "loss": 0.8691, "step": 1790 }, { "epoch": 1.5918231307632484, "grad_norm": 0.8266111016273499, "learning_rate": 0.0001, "loss": 1.0221, "step": 1791 }, { "epoch": 1.592711920897678, "grad_norm": 0.3512238562107086, "learning_rate": 0.0001, "loss": 0.8909, "step": 1792 }, { "epoch": 1.5936007110321075, "grad_norm": 0.3301697075366974, "learning_rate": 0.0001, "loss": 0.9072, "step": 1793 }, { "epoch": 1.594489501166537, "grad_norm": 0.3027113974094391, "learning_rate": 0.0001, "loss": 0.8956, "step": 1794 }, { "epoch": 1.5953782913009666, "grad_norm": 0.34008100628852844, "learning_rate": 0.0001, "loss": 0.922, "step": 1795 }, { "epoch": 1.596267081435396, "grad_norm": 0.37972933053970337, "learning_rate": 0.0001, "loss": 0.8718, "step": 1796 }, { "epoch": 1.5971558715698255, "grad_norm": 0.38649454712867737, "learning_rate": 0.0001, "loss": 0.8555, "step": 1797 }, { "epoch": 1.5980446617042552, "grad_norm": 0.3708219826221466, "learning_rate": 0.0001, "loss": 0.8959, "step": 1798 }, { "epoch": 1.5989334518386846, "grad_norm": 0.36001938581466675, "learning_rate": 0.0001, "loss": 0.8835, "step": 1799 }, { "epoch": 1.599822241973114, "grad_norm": 0.36445868015289307, "learning_rate": 0.0001, "loss": 0.9237, "step": 1800 }, { "epoch": 1.6007110321075437, "grad_norm": 0.41779571771621704, "learning_rate": 0.0001, "loss": 0.9475, "step": 1801 }, { "epoch": 1.601599822241973, "grad_norm": 0.48116981983184814, "learning_rate": 0.0001, "loss": 0.9844, "step": 1802 }, { "epoch": 1.6024886123764026, "grad_norm": 0.3373110592365265, "learning_rate": 0.0001, "loss": 0.8992, "step": 1803 }, { "epoch": 1.6033774025108323, "grad_norm": 0.33223381638526917, "learning_rate": 0.0001, "loss": 0.8746, "step": 1804 }, { "epoch": 1.6042661926452615, "grad_norm": 0.3680526912212372, "learning_rate": 0.0001, "loss": 0.9382, "step": 1805 }, { "epoch": 1.6051549827796912, "grad_norm": 0.5222595930099487, "learning_rate": 0.0001, "loss": 0.9586, "step": 1806 }, { "epoch": 1.6060437729141208, "grad_norm": 0.35774093866348267, "learning_rate": 0.0001, "loss": 0.9331, "step": 1807 }, { "epoch": 1.60693256304855, "grad_norm": 0.528674840927124, "learning_rate": 0.0001, "loss": 0.9237, "step": 1808 }, { "epoch": 1.6078213531829797, "grad_norm": 0.41820868849754333, "learning_rate": 0.0001, "loss": 0.9443, "step": 1809 }, { "epoch": 1.6087101433174091, "grad_norm": 0.3307277262210846, "learning_rate": 0.0001, "loss": 0.8068, "step": 1810 }, { "epoch": 1.6095989334518386, "grad_norm": 0.4219682812690735, "learning_rate": 0.0001, "loss": 0.9318, "step": 1811 }, { "epoch": 1.6104877235862682, "grad_norm": 0.42429181933403015, "learning_rate": 0.0001, "loss": 0.8943, "step": 1812 }, { "epoch": 1.6113765137206977, "grad_norm": 1.4591997861862183, "learning_rate": 0.0001, "loss": 0.8419, "step": 1813 }, { "epoch": 1.6122653038551271, "grad_norm": 0.49615946412086487, "learning_rate": 0.0001, "loss": 0.9466, "step": 1814 }, { "epoch": 1.6131540939895568, "grad_norm": 0.5319680571556091, "learning_rate": 0.0001, "loss": 0.8491, "step": 1815 }, { "epoch": 1.6140428841239862, "grad_norm": 1.1099143028259277, "learning_rate": 0.0001, "loss": 0.9024, "step": 1816 }, { "epoch": 1.6149316742584157, "grad_norm": 0.3784678280353546, "learning_rate": 0.0001, "loss": 0.9001, "step": 1817 }, { "epoch": 1.6158204643928453, "grad_norm": 0.42704182863235474, "learning_rate": 0.0001, "loss": 0.9171, "step": 1818 }, { "epoch": 1.6167092545272748, "grad_norm": 0.3222212791442871, "learning_rate": 0.0001, "loss": 0.8957, "step": 1819 }, { "epoch": 1.6175980446617042, "grad_norm": 0.3985123038291931, "learning_rate": 0.0001, "loss": 0.803, "step": 1820 }, { "epoch": 1.6184868347961339, "grad_norm": 0.3731878697872162, "learning_rate": 0.0001, "loss": 0.9434, "step": 1821 }, { "epoch": 1.6193756249305633, "grad_norm": 0.35805362462997437, "learning_rate": 0.0001, "loss": 0.9385, "step": 1822 }, { "epoch": 1.6202644150649927, "grad_norm": 0.353607177734375, "learning_rate": 0.0001, "loss": 0.9146, "step": 1823 }, { "epoch": 1.6211532051994224, "grad_norm": 0.3861144185066223, "learning_rate": 0.0001, "loss": 0.9313, "step": 1824 }, { "epoch": 1.6220419953338518, "grad_norm": 0.3477698564529419, "learning_rate": 0.0001, "loss": 1.0114, "step": 1825 }, { "epoch": 1.6229307854682813, "grad_norm": 0.3571338355541229, "learning_rate": 0.0001, "loss": 0.9542, "step": 1826 }, { "epoch": 1.623819575602711, "grad_norm": 0.3161649703979492, "learning_rate": 0.0001, "loss": 0.8867, "step": 1827 }, { "epoch": 1.6247083657371402, "grad_norm": 0.34788355231285095, "learning_rate": 0.0001, "loss": 0.9498, "step": 1828 }, { "epoch": 1.6255971558715698, "grad_norm": 0.3480173647403717, "learning_rate": 0.0001, "loss": 0.9012, "step": 1829 }, { "epoch": 1.6264859460059995, "grad_norm": 0.3352920413017273, "learning_rate": 0.0001, "loss": 0.9405, "step": 1830 }, { "epoch": 1.6273747361404287, "grad_norm": 0.3569203019142151, "learning_rate": 0.0001, "loss": 0.8903, "step": 1831 }, { "epoch": 1.6282635262748584, "grad_norm": 0.36906954646110535, "learning_rate": 0.0001, "loss": 0.9598, "step": 1832 }, { "epoch": 1.6291523164092878, "grad_norm": 0.3525664508342743, "learning_rate": 0.0001, "loss": 0.9319, "step": 1833 }, { "epoch": 1.6300411065437173, "grad_norm": 0.3371136784553528, "learning_rate": 0.0001, "loss": 0.9193, "step": 1834 }, { "epoch": 1.630929896678147, "grad_norm": 0.3484685719013214, "learning_rate": 0.0001, "loss": 0.9035, "step": 1835 }, { "epoch": 1.6318186868125764, "grad_norm": 0.3327842354774475, "learning_rate": 0.0001, "loss": 0.7831, "step": 1836 }, { "epoch": 1.6327074769470058, "grad_norm": 0.290935754776001, "learning_rate": 0.0001, "loss": 0.8634, "step": 1837 }, { "epoch": 1.6335962670814355, "grad_norm": 0.35313087701797485, "learning_rate": 0.0001, "loss": 0.8898, "step": 1838 }, { "epoch": 1.634485057215865, "grad_norm": 0.35210633277893066, "learning_rate": 0.0001, "loss": 0.9164, "step": 1839 }, { "epoch": 1.6353738473502943, "grad_norm": 0.36426299810409546, "learning_rate": 0.0001, "loss": 0.8369, "step": 1840 }, { "epoch": 1.636262637484724, "grad_norm": 0.36496469378471375, "learning_rate": 0.0001, "loss": 0.94, "step": 1841 }, { "epoch": 1.6371514276191534, "grad_norm": 0.3476478159427643, "learning_rate": 0.0001, "loss": 0.8577, "step": 1842 }, { "epoch": 1.6380402177535829, "grad_norm": 0.3203316926956177, "learning_rate": 0.0001, "loss": 0.861, "step": 1843 }, { "epoch": 1.6389290078880125, "grad_norm": 0.3230277895927429, "learning_rate": 0.0001, "loss": 0.8456, "step": 1844 }, { "epoch": 1.639817798022442, "grad_norm": 0.3015528917312622, "learning_rate": 0.0001, "loss": 0.8744, "step": 1845 }, { "epoch": 1.6407065881568714, "grad_norm": 0.35518017411231995, "learning_rate": 0.0001, "loss": 0.8698, "step": 1846 }, { "epoch": 1.641595378291301, "grad_norm": 0.35839059948921204, "learning_rate": 0.0001, "loss": 0.8935, "step": 1847 }, { "epoch": 1.6424841684257305, "grad_norm": 0.3377850651741028, "learning_rate": 0.0001, "loss": 0.9225, "step": 1848 }, { "epoch": 1.64337295856016, "grad_norm": 0.3225672245025635, "learning_rate": 0.0001, "loss": 0.9355, "step": 1849 }, { "epoch": 1.6442617486945896, "grad_norm": 0.3357776999473572, "learning_rate": 0.0001, "loss": 0.8404, "step": 1850 }, { "epoch": 1.6451505388290188, "grad_norm": 0.3443959653377533, "learning_rate": 0.0001, "loss": 0.9009, "step": 1851 }, { "epoch": 1.6460393289634485, "grad_norm": 0.3641587495803833, "learning_rate": 0.0001, "loss": 0.9419, "step": 1852 }, { "epoch": 1.6469281190978782, "grad_norm": 0.3614901304244995, "learning_rate": 0.0001, "loss": 0.8922, "step": 1853 }, { "epoch": 1.6478169092323074, "grad_norm": 0.3756221532821655, "learning_rate": 0.0001, "loss": 0.8531, "step": 1854 }, { "epoch": 1.648705699366737, "grad_norm": 0.32705435156822205, "learning_rate": 0.0001, "loss": 0.9559, "step": 1855 }, { "epoch": 1.6495944895011665, "grad_norm": 0.354168564081192, "learning_rate": 0.0001, "loss": 0.8389, "step": 1856 }, { "epoch": 1.650483279635596, "grad_norm": 0.3950870633125305, "learning_rate": 0.0001, "loss": 0.8642, "step": 1857 }, { "epoch": 1.6513720697700256, "grad_norm": 0.3480079174041748, "learning_rate": 0.0001, "loss": 0.9008, "step": 1858 }, { "epoch": 1.652260859904455, "grad_norm": 0.37953078746795654, "learning_rate": 0.0001, "loss": 0.9134, "step": 1859 }, { "epoch": 1.6531496500388845, "grad_norm": 0.9250193238258362, "learning_rate": 0.0001, "loss": 0.8301, "step": 1860 }, { "epoch": 1.6540384401733141, "grad_norm": 0.33579471707344055, "learning_rate": 0.0001, "loss": 0.8996, "step": 1861 }, { "epoch": 1.6549272303077436, "grad_norm": 0.37209945917129517, "learning_rate": 0.0001, "loss": 0.8403, "step": 1862 }, { "epoch": 1.655816020442173, "grad_norm": 0.3377666473388672, "learning_rate": 0.0001, "loss": 0.871, "step": 1863 }, { "epoch": 1.6567048105766027, "grad_norm": 0.37800464034080505, "learning_rate": 0.0001, "loss": 0.9146, "step": 1864 }, { "epoch": 1.6575936007110321, "grad_norm": 0.4948398172855377, "learning_rate": 0.0001, "loss": 0.9485, "step": 1865 }, { "epoch": 1.6584823908454616, "grad_norm": 0.4751080274581909, "learning_rate": 0.0001, "loss": 0.8765, "step": 1866 }, { "epoch": 1.6593711809798912, "grad_norm": 0.375337690114975, "learning_rate": 0.0001, "loss": 0.9074, "step": 1867 }, { "epoch": 1.6602599711143207, "grad_norm": 1.2624320983886719, "learning_rate": 0.0001, "loss": 0.9032, "step": 1868 }, { "epoch": 1.66114876124875, "grad_norm": 0.7726836800575256, "learning_rate": 0.0001, "loss": 0.9992, "step": 1869 }, { "epoch": 1.6620375513831798, "grad_norm": 0.38178691267967224, "learning_rate": 0.0001, "loss": 0.9621, "step": 1870 }, { "epoch": 1.6629263415176092, "grad_norm": 0.39392024278640747, "learning_rate": 0.0001, "loss": 0.9873, "step": 1871 }, { "epoch": 1.6638151316520386, "grad_norm": 1.0342029333114624, "learning_rate": 0.0001, "loss": 0.9192, "step": 1872 }, { "epoch": 1.6647039217864683, "grad_norm": 0.3234097361564636, "learning_rate": 0.0001, "loss": 0.9132, "step": 1873 }, { "epoch": 1.6655927119208975, "grad_norm": 2.331127405166626, "learning_rate": 0.0001, "loss": 0.9651, "step": 1874 }, { "epoch": 1.6664815020553272, "grad_norm": 0.3696269989013672, "learning_rate": 0.0001, "loss": 0.8714, "step": 1875 }, { "epoch": 1.6673702921897569, "grad_norm": 0.3183539807796478, "learning_rate": 0.0001, "loss": 0.8484, "step": 1876 }, { "epoch": 1.668259082324186, "grad_norm": 0.3290097713470459, "learning_rate": 0.0001, "loss": 0.9776, "step": 1877 }, { "epoch": 1.6691478724586157, "grad_norm": 0.3337092101573944, "learning_rate": 0.0001, "loss": 0.9359, "step": 1878 }, { "epoch": 1.6700366625930452, "grad_norm": 0.3510020971298218, "learning_rate": 0.0001, "loss": 0.9407, "step": 1879 }, { "epoch": 1.6709254527274746, "grad_norm": 0.3333737254142761, "learning_rate": 0.0001, "loss": 0.8822, "step": 1880 }, { "epoch": 1.6718142428619043, "grad_norm": 0.3105640709400177, "learning_rate": 0.0001, "loss": 0.8645, "step": 1881 }, { "epoch": 1.6727030329963337, "grad_norm": 0.29750367999076843, "learning_rate": 0.0001, "loss": 0.9338, "step": 1882 }, { "epoch": 1.6735918231307632, "grad_norm": 0.34713029861450195, "learning_rate": 0.0001, "loss": 0.9299, "step": 1883 }, { "epoch": 1.6744806132651928, "grad_norm": 0.3725203275680542, "learning_rate": 0.0001, "loss": 0.9744, "step": 1884 }, { "epoch": 1.6753694033996223, "grad_norm": 0.3104175925254822, "learning_rate": 0.0001, "loss": 0.9057, "step": 1885 }, { "epoch": 1.6762581935340517, "grad_norm": 0.37686604261398315, "learning_rate": 0.0001, "loss": 1.0123, "step": 1886 }, { "epoch": 1.6771469836684814, "grad_norm": 0.29763513803482056, "learning_rate": 0.0001, "loss": 0.8857, "step": 1887 }, { "epoch": 1.6780357738029108, "grad_norm": 0.3398146629333496, "learning_rate": 0.0001, "loss": 0.8808, "step": 1888 }, { "epoch": 1.6789245639373402, "grad_norm": 0.33802369236946106, "learning_rate": 0.0001, "loss": 0.8505, "step": 1889 }, { "epoch": 1.67981335407177, "grad_norm": 0.28601887822151184, "learning_rate": 0.0001, "loss": 0.8208, "step": 1890 }, { "epoch": 1.6807021442061993, "grad_norm": 0.31592151522636414, "learning_rate": 0.0001, "loss": 0.864, "step": 1891 }, { "epoch": 1.6815909343406288, "grad_norm": 0.3350915014743805, "learning_rate": 0.0001, "loss": 0.8605, "step": 1892 }, { "epoch": 1.6824797244750584, "grad_norm": 0.37870457768440247, "learning_rate": 0.0001, "loss": 0.9751, "step": 1893 }, { "epoch": 1.6833685146094879, "grad_norm": 0.3566136956214905, "learning_rate": 0.0001, "loss": 0.9273, "step": 1894 }, { "epoch": 1.6842573047439173, "grad_norm": 0.3385891318321228, "learning_rate": 0.0001, "loss": 0.8724, "step": 1895 }, { "epoch": 1.685146094878347, "grad_norm": 0.3746001720428467, "learning_rate": 0.0001, "loss": 0.9185, "step": 1896 }, { "epoch": 1.6860348850127762, "grad_norm": 0.3757399320602417, "learning_rate": 0.0001, "loss": 0.8831, "step": 1897 }, { "epoch": 1.6869236751472059, "grad_norm": 0.33278876543045044, "learning_rate": 0.0001, "loss": 0.8558, "step": 1898 }, { "epoch": 1.6878124652816355, "grad_norm": 0.3170175552368164, "learning_rate": 0.0001, "loss": 0.8932, "step": 1899 }, { "epoch": 1.6887012554160648, "grad_norm": 0.32355326414108276, "learning_rate": 0.0001, "loss": 0.8787, "step": 1900 }, { "epoch": 1.6895900455504944, "grad_norm": 0.31958630681037903, "learning_rate": 0.0001, "loss": 0.9199, "step": 1901 }, { "epoch": 1.6904788356849239, "grad_norm": 0.3621491491794586, "learning_rate": 0.0001, "loss": 0.8973, "step": 1902 }, { "epoch": 1.6913676258193533, "grad_norm": 0.48310527205467224, "learning_rate": 0.0001, "loss": 0.8819, "step": 1903 }, { "epoch": 1.692256415953783, "grad_norm": 0.3930363059043884, "learning_rate": 0.0001, "loss": 0.9095, "step": 1904 }, { "epoch": 1.6931452060882124, "grad_norm": 0.5452322363853455, "learning_rate": 0.0001, "loss": 0.8147, "step": 1905 }, { "epoch": 1.6940339962226418, "grad_norm": 0.3676657974720001, "learning_rate": 0.0001, "loss": 0.8653, "step": 1906 }, { "epoch": 1.6949227863570715, "grad_norm": 0.5217211246490479, "learning_rate": 0.0001, "loss": 0.8173, "step": 1907 }, { "epoch": 1.695811576491501, "grad_norm": 0.4746188223361969, "learning_rate": 0.0001, "loss": 0.9034, "step": 1908 }, { "epoch": 1.6967003666259304, "grad_norm": 0.6145462989807129, "learning_rate": 0.0001, "loss": 0.9044, "step": 1909 }, { "epoch": 1.69758915676036, "grad_norm": 0.43200716376304626, "learning_rate": 0.0001, "loss": 0.9742, "step": 1910 }, { "epoch": 1.6984779468947895, "grad_norm": 0.3674427568912506, "learning_rate": 0.0001, "loss": 0.9574, "step": 1911 }, { "epoch": 1.699366737029219, "grad_norm": 0.3205876052379608, "learning_rate": 0.0001, "loss": 0.83, "step": 1912 }, { "epoch": 1.7002555271636486, "grad_norm": 0.41618865728378296, "learning_rate": 0.0001, "loss": 0.9021, "step": 1913 }, { "epoch": 1.701144317298078, "grad_norm": 0.40217745304107666, "learning_rate": 0.0001, "loss": 0.8975, "step": 1914 }, { "epoch": 1.7020331074325075, "grad_norm": 0.37823233008384705, "learning_rate": 0.0001, "loss": 0.9554, "step": 1915 }, { "epoch": 1.7029218975669371, "grad_norm": 0.37665289640426636, "learning_rate": 0.0001, "loss": 0.9769, "step": 1916 }, { "epoch": 1.7038106877013666, "grad_norm": 0.3486294746398926, "learning_rate": 0.0001, "loss": 0.9428, "step": 1917 }, { "epoch": 1.704699477835796, "grad_norm": 0.40671101212501526, "learning_rate": 0.0001, "loss": 0.9127, "step": 1918 }, { "epoch": 1.7055882679702257, "grad_norm": 0.35006022453308105, "learning_rate": 0.0001, "loss": 0.9617, "step": 1919 }, { "epoch": 1.7064770581046549, "grad_norm": 0.36815375089645386, "learning_rate": 0.0001, "loss": 0.9224, "step": 1920 }, { "epoch": 1.7073658482390845, "grad_norm": 0.34530341625213623, "learning_rate": 0.0001, "loss": 0.8884, "step": 1921 }, { "epoch": 1.7082546383735142, "grad_norm": 0.3507446348667145, "learning_rate": 0.0001, "loss": 0.9667, "step": 1922 }, { "epoch": 1.7091434285079434, "grad_norm": 0.34675371646881104, "learning_rate": 0.0001, "loss": 0.8347, "step": 1923 }, { "epoch": 1.710032218642373, "grad_norm": 0.34880420565605164, "learning_rate": 0.0001, "loss": 0.8989, "step": 1924 }, { "epoch": 1.7109210087768025, "grad_norm": 0.35518354177474976, "learning_rate": 0.0001, "loss": 0.8713, "step": 1925 }, { "epoch": 1.711809798911232, "grad_norm": 0.34916481375694275, "learning_rate": 0.0001, "loss": 0.8733, "step": 1926 }, { "epoch": 1.7126985890456616, "grad_norm": 0.3095919191837311, "learning_rate": 0.0001, "loss": 0.9293, "step": 1927 }, { "epoch": 1.713587379180091, "grad_norm": 0.36070945858955383, "learning_rate": 0.0001, "loss": 0.9314, "step": 1928 }, { "epoch": 1.7144761693145205, "grad_norm": 0.36071017384529114, "learning_rate": 0.0001, "loss": 1.0066, "step": 1929 }, { "epoch": 1.7153649594489502, "grad_norm": 0.3244760036468506, "learning_rate": 0.0001, "loss": 0.8946, "step": 1930 }, { "epoch": 1.7162537495833796, "grad_norm": 0.34101998805999756, "learning_rate": 0.0001, "loss": 0.8418, "step": 1931 }, { "epoch": 1.717142539717809, "grad_norm": 0.38111263513565063, "learning_rate": 0.0001, "loss": 0.855, "step": 1932 }, { "epoch": 1.7180313298522387, "grad_norm": 0.3378105163574219, "learning_rate": 0.0001, "loss": 0.8144, "step": 1933 }, { "epoch": 1.7189201199866682, "grad_norm": 0.3542186915874481, "learning_rate": 0.0001, "loss": 0.8838, "step": 1934 }, { "epoch": 1.7198089101210976, "grad_norm": 0.3762187957763672, "learning_rate": 0.0001, "loss": 0.8893, "step": 1935 }, { "epoch": 1.7206977002555273, "grad_norm": 0.3594928979873657, "learning_rate": 0.0001, "loss": 0.9449, "step": 1936 }, { "epoch": 1.7215864903899567, "grad_norm": 0.3138940632343292, "learning_rate": 0.0001, "loss": 0.9581, "step": 1937 }, { "epoch": 1.7224752805243861, "grad_norm": 0.3370933532714844, "learning_rate": 0.0001, "loss": 0.903, "step": 1938 }, { "epoch": 1.7233640706588158, "grad_norm": 0.3399626612663269, "learning_rate": 0.0001, "loss": 0.8864, "step": 1939 }, { "epoch": 1.7242528607932452, "grad_norm": 0.329193115234375, "learning_rate": 0.0001, "loss": 0.9119, "step": 1940 }, { "epoch": 1.7251416509276747, "grad_norm": 0.3717508316040039, "learning_rate": 0.0001, "loss": 0.8974, "step": 1941 }, { "epoch": 1.7260304410621043, "grad_norm": 0.3529798090457916, "learning_rate": 0.0001, "loss": 0.9278, "step": 1942 }, { "epoch": 1.7269192311965336, "grad_norm": 0.38855284452438354, "learning_rate": 0.0001, "loss": 0.8737, "step": 1943 }, { "epoch": 1.7278080213309632, "grad_norm": 0.3287023901939392, "learning_rate": 0.0001, "loss": 0.9743, "step": 1944 }, { "epoch": 1.728696811465393, "grad_norm": 0.3897152841091156, "learning_rate": 0.0001, "loss": 0.8451, "step": 1945 }, { "epoch": 1.729585601599822, "grad_norm": 0.3124273419380188, "learning_rate": 0.0001, "loss": 0.8386, "step": 1946 }, { "epoch": 1.7304743917342518, "grad_norm": 0.2802036702632904, "learning_rate": 0.0001, "loss": 0.8524, "step": 1947 }, { "epoch": 1.7313631818686812, "grad_norm": 0.33942630887031555, "learning_rate": 0.0001, "loss": 0.8643, "step": 1948 }, { "epoch": 1.7322519720031107, "grad_norm": 0.3702329695224762, "learning_rate": 0.0001, "loss": 0.9303, "step": 1949 }, { "epoch": 1.7331407621375403, "grad_norm": 0.31775155663490295, "learning_rate": 0.0001, "loss": 0.8537, "step": 1950 }, { "epoch": 1.7340295522719698, "grad_norm": 0.3505903482437134, "learning_rate": 0.0001, "loss": 0.8882, "step": 1951 }, { "epoch": 1.7349183424063992, "grad_norm": 0.3102082908153534, "learning_rate": 0.0001, "loss": 0.8552, "step": 1952 }, { "epoch": 1.7358071325408289, "grad_norm": 0.40489935874938965, "learning_rate": 0.0001, "loss": 0.945, "step": 1953 }, { "epoch": 1.7366959226752583, "grad_norm": 0.35709646344184875, "learning_rate": 0.0001, "loss": 0.8496, "step": 1954 }, { "epoch": 1.7375847128096877, "grad_norm": 0.33537212014198303, "learning_rate": 0.0001, "loss": 0.9205, "step": 1955 }, { "epoch": 1.7384735029441174, "grad_norm": 0.3619324564933777, "learning_rate": 0.0001, "loss": 0.9342, "step": 1956 }, { "epoch": 1.7393622930785468, "grad_norm": 0.31395861506462097, "learning_rate": 0.0001, "loss": 0.8663, "step": 1957 }, { "epoch": 1.7402510832129763, "grad_norm": 0.3466954827308655, "learning_rate": 0.0001, "loss": 0.8824, "step": 1958 }, { "epoch": 1.741139873347406, "grad_norm": 0.34001612663269043, "learning_rate": 0.0001, "loss": 0.9166, "step": 1959 }, { "epoch": 1.7420286634818354, "grad_norm": 0.31668078899383545, "learning_rate": 0.0001, "loss": 0.8834, "step": 1960 }, { "epoch": 1.7429174536162648, "grad_norm": 0.33864402770996094, "learning_rate": 0.0001, "loss": 0.9826, "step": 1961 }, { "epoch": 1.7438062437506945, "grad_norm": 0.34717845916748047, "learning_rate": 0.0001, "loss": 0.9625, "step": 1962 }, { "epoch": 1.744695033885124, "grad_norm": 0.3487595021724701, "learning_rate": 0.0001, "loss": 0.9137, "step": 1963 }, { "epoch": 1.7455838240195534, "grad_norm": 0.28423944115638733, "learning_rate": 0.0001, "loss": 0.8175, "step": 1964 }, { "epoch": 1.746472614153983, "grad_norm": 0.4026448428630829, "learning_rate": 0.0001, "loss": 0.9801, "step": 1965 }, { "epoch": 1.7473614042884122, "grad_norm": 0.34367966651916504, "learning_rate": 0.0001, "loss": 0.908, "step": 1966 }, { "epoch": 1.748250194422842, "grad_norm": 0.3341876268386841, "learning_rate": 0.0001, "loss": 0.9105, "step": 1967 }, { "epoch": 1.7491389845572716, "grad_norm": 0.3579274117946625, "learning_rate": 0.0001, "loss": 0.8702, "step": 1968 }, { "epoch": 1.7500277746917008, "grad_norm": 0.30886539816856384, "learning_rate": 0.0001, "loss": 0.9024, "step": 1969 }, { "epoch": 1.7509165648261305, "grad_norm": 0.33523109555244446, "learning_rate": 0.0001, "loss": 0.8876, "step": 1970 }, { "epoch": 1.75180535496056, "grad_norm": 0.32511594891548157, "learning_rate": 0.0001, "loss": 0.9099, "step": 1971 }, { "epoch": 1.7526941450949893, "grad_norm": 0.3819718658924103, "learning_rate": 0.0001, "loss": 0.9776, "step": 1972 }, { "epoch": 1.753582935229419, "grad_norm": 0.32984215021133423, "learning_rate": 0.0001, "loss": 0.9106, "step": 1973 }, { "epoch": 1.7544717253638484, "grad_norm": 0.4052921235561371, "learning_rate": 0.0001, "loss": 0.9176, "step": 1974 }, { "epoch": 1.7553605154982779, "grad_norm": 0.3490274250507355, "learning_rate": 0.0001, "loss": 0.9902, "step": 1975 }, { "epoch": 1.7562493056327075, "grad_norm": 0.37016966938972473, "learning_rate": 0.0001, "loss": 0.9497, "step": 1976 }, { "epoch": 1.757138095767137, "grad_norm": 0.3575468957424164, "learning_rate": 0.0001, "loss": 0.864, "step": 1977 }, { "epoch": 1.7580268859015664, "grad_norm": 0.34194570779800415, "learning_rate": 0.0001, "loss": 0.9862, "step": 1978 }, { "epoch": 1.758915676035996, "grad_norm": 0.361395001411438, "learning_rate": 0.0001, "loss": 0.8745, "step": 1979 }, { "epoch": 1.7598044661704255, "grad_norm": 0.3402308225631714, "learning_rate": 0.0001, "loss": 0.9091, "step": 1980 }, { "epoch": 1.760693256304855, "grad_norm": 0.3822394013404846, "learning_rate": 0.0001, "loss": 0.9479, "step": 1981 }, { "epoch": 1.7615820464392846, "grad_norm": 0.3259856700897217, "learning_rate": 0.0001, "loss": 0.912, "step": 1982 }, { "epoch": 1.762470836573714, "grad_norm": 0.40468284487724304, "learning_rate": 0.0001, "loss": 0.8525, "step": 1983 }, { "epoch": 1.7633596267081435, "grad_norm": 0.4105701446533203, "learning_rate": 0.0001, "loss": 0.8759, "step": 1984 }, { "epoch": 1.7642484168425732, "grad_norm": 0.5805673003196716, "learning_rate": 0.0001, "loss": 1.0095, "step": 1985 }, { "epoch": 1.7651372069770026, "grad_norm": 0.6856684684753418, "learning_rate": 0.0001, "loss": 0.9672, "step": 1986 }, { "epoch": 1.766025997111432, "grad_norm": 0.3889956474304199, "learning_rate": 0.0001, "loss": 0.8923, "step": 1987 }, { "epoch": 1.7669147872458617, "grad_norm": 0.37936434149742126, "learning_rate": 0.0001, "loss": 0.9341, "step": 1988 }, { "epoch": 1.767803577380291, "grad_norm": 0.34393537044525146, "learning_rate": 0.0001, "loss": 0.8836, "step": 1989 }, { "epoch": 1.7686923675147206, "grad_norm": 0.34456318616867065, "learning_rate": 0.0001, "loss": 0.9741, "step": 1990 }, { "epoch": 1.7695811576491502, "grad_norm": 0.32842886447906494, "learning_rate": 0.0001, "loss": 0.9137, "step": 1991 }, { "epoch": 1.7704699477835795, "grad_norm": 0.35680335760116577, "learning_rate": 0.0001, "loss": 0.8332, "step": 1992 }, { "epoch": 1.7713587379180091, "grad_norm": 0.3687342703342438, "learning_rate": 0.0001, "loss": 0.9065, "step": 1993 }, { "epoch": 1.7722475280524386, "grad_norm": 0.39165356755256653, "learning_rate": 0.0001, "loss": 0.933, "step": 1994 }, { "epoch": 1.773136318186868, "grad_norm": 0.36600562930107117, "learning_rate": 0.0001, "loss": 0.8694, "step": 1995 }, { "epoch": 1.7740251083212977, "grad_norm": 0.3371865749359131, "learning_rate": 0.0001, "loss": 0.9622, "step": 1996 }, { "epoch": 1.7749138984557271, "grad_norm": 0.3732564449310303, "learning_rate": 0.0001, "loss": 1.0046, "step": 1997 }, { "epoch": 1.7758026885901566, "grad_norm": 0.33094069361686707, "learning_rate": 0.0001, "loss": 0.8466, "step": 1998 }, { "epoch": 1.7766914787245862, "grad_norm": 0.3384708762168884, "learning_rate": 0.0001, "loss": 0.855, "step": 1999 }, { "epoch": 1.7775802688590157, "grad_norm": 0.3246244788169861, "learning_rate": 0.0001, "loss": 0.9129, "step": 2000 }, { "epoch": 1.778469058993445, "grad_norm": 0.3717402517795563, "learning_rate": 0.0001, "loss": 0.9298, "step": 2001 }, { "epoch": 1.7793578491278748, "grad_norm": 0.4107438325881958, "learning_rate": 0.0001, "loss": 0.9007, "step": 2002 }, { "epoch": 1.7802466392623042, "grad_norm": 0.355356901884079, "learning_rate": 0.0001, "loss": 0.8573, "step": 2003 }, { "epoch": 1.7811354293967336, "grad_norm": 0.34428539872169495, "learning_rate": 0.0001, "loss": 0.9481, "step": 2004 }, { "epoch": 1.7820242195311633, "grad_norm": 0.3407171666622162, "learning_rate": 0.0001, "loss": 0.9211, "step": 2005 }, { "epoch": 1.7829130096655927, "grad_norm": 0.3513103425502777, "learning_rate": 0.0001, "loss": 0.9179, "step": 2006 }, { "epoch": 1.7838017998000222, "grad_norm": 0.32477250695228577, "learning_rate": 0.0001, "loss": 0.8792, "step": 2007 }, { "epoch": 1.7846905899344518, "grad_norm": 0.3490789234638214, "learning_rate": 0.0001, "loss": 0.9442, "step": 2008 }, { "epoch": 1.7855793800688813, "grad_norm": 0.3724386692047119, "learning_rate": 0.0001, "loss": 0.9503, "step": 2009 }, { "epoch": 1.7864681702033107, "grad_norm": 0.3316227197647095, "learning_rate": 0.0001, "loss": 0.9598, "step": 2010 }, { "epoch": 1.7873569603377404, "grad_norm": 0.35122936964035034, "learning_rate": 0.0001, "loss": 0.8149, "step": 2011 }, { "epoch": 1.7882457504721696, "grad_norm": 0.4029070734977722, "learning_rate": 0.0001, "loss": 0.9305, "step": 2012 }, { "epoch": 1.7891345406065993, "grad_norm": 0.31067872047424316, "learning_rate": 0.0001, "loss": 0.8878, "step": 2013 }, { "epoch": 1.790023330741029, "grad_norm": 0.34684422612190247, "learning_rate": 0.0001, "loss": 0.9143, "step": 2014 }, { "epoch": 1.7909121208754581, "grad_norm": 0.32135358452796936, "learning_rate": 0.0001, "loss": 0.926, "step": 2015 }, { "epoch": 1.7918009110098878, "grad_norm": 0.32894784212112427, "learning_rate": 0.0001, "loss": 0.9249, "step": 2016 }, { "epoch": 1.7926897011443172, "grad_norm": 0.35362952947616577, "learning_rate": 0.0001, "loss": 0.9004, "step": 2017 }, { "epoch": 1.7935784912787467, "grad_norm": 0.3030708432197571, "learning_rate": 0.0001, "loss": 1.0155, "step": 2018 }, { "epoch": 1.7944672814131764, "grad_norm": 0.3144596219062805, "learning_rate": 0.0001, "loss": 0.9367, "step": 2019 }, { "epoch": 1.7953560715476058, "grad_norm": 0.34851983189582825, "learning_rate": 0.0001, "loss": 0.8684, "step": 2020 }, { "epoch": 1.7962448616820352, "grad_norm": 0.32248032093048096, "learning_rate": 0.0001, "loss": 0.8977, "step": 2021 }, { "epoch": 1.797133651816465, "grad_norm": 0.3193919062614441, "learning_rate": 0.0001, "loss": 0.9297, "step": 2022 }, { "epoch": 1.7980224419508943, "grad_norm": 0.3607577085494995, "learning_rate": 0.0001, "loss": 0.9774, "step": 2023 }, { "epoch": 1.7989112320853238, "grad_norm": 0.30439522862434387, "learning_rate": 0.0001, "loss": 0.9313, "step": 2024 }, { "epoch": 1.7998000222197534, "grad_norm": 0.31033584475517273, "learning_rate": 0.0001, "loss": 0.9535, "step": 2025 }, { "epoch": 1.8006888123541829, "grad_norm": 0.34924376010894775, "learning_rate": 0.0001, "loss": 1.0392, "step": 2026 }, { "epoch": 1.8015776024886123, "grad_norm": 0.3580887019634247, "learning_rate": 0.0001, "loss": 0.8772, "step": 2027 }, { "epoch": 1.802466392623042, "grad_norm": 0.30029070377349854, "learning_rate": 0.0001, "loss": 0.904, "step": 2028 }, { "epoch": 1.8033551827574714, "grad_norm": 0.36260783672332764, "learning_rate": 0.0001, "loss": 0.8598, "step": 2029 }, { "epoch": 1.8042439728919009, "grad_norm": 0.3398006558418274, "learning_rate": 0.0001, "loss": 0.926, "step": 2030 }, { "epoch": 1.8051327630263305, "grad_norm": 0.31382572650909424, "learning_rate": 0.0001, "loss": 0.871, "step": 2031 }, { "epoch": 1.80602155316076, "grad_norm": 0.33854883909225464, "learning_rate": 0.0001, "loss": 0.9493, "step": 2032 }, { "epoch": 1.8069103432951894, "grad_norm": 0.334824800491333, "learning_rate": 0.0001, "loss": 0.8451, "step": 2033 }, { "epoch": 1.807799133429619, "grad_norm": 0.31903523206710815, "learning_rate": 0.0001, "loss": 0.8204, "step": 2034 }, { "epoch": 1.8086879235640483, "grad_norm": 0.351361483335495, "learning_rate": 0.0001, "loss": 0.8484, "step": 2035 }, { "epoch": 1.809576713698478, "grad_norm": 0.3341825604438782, "learning_rate": 0.0001, "loss": 0.9075, "step": 2036 }, { "epoch": 1.8104655038329076, "grad_norm": 0.3398033678531647, "learning_rate": 0.0001, "loss": 0.9491, "step": 2037 }, { "epoch": 1.8113542939673368, "grad_norm": 0.3391786813735962, "learning_rate": 0.0001, "loss": 0.9576, "step": 2038 }, { "epoch": 1.8122430841017665, "grad_norm": 0.31477200984954834, "learning_rate": 0.0001, "loss": 0.8863, "step": 2039 }, { "epoch": 1.813131874236196, "grad_norm": 0.31994014978408813, "learning_rate": 0.0001, "loss": 0.903, "step": 2040 }, { "epoch": 1.8140206643706254, "grad_norm": 0.38486775755882263, "learning_rate": 0.0001, "loss": 0.9182, "step": 2041 }, { "epoch": 1.814909454505055, "grad_norm": 0.3258659839630127, "learning_rate": 0.0001, "loss": 0.8561, "step": 2042 }, { "epoch": 1.8157982446394845, "grad_norm": 0.36081910133361816, "learning_rate": 0.0001, "loss": 0.9326, "step": 2043 }, { "epoch": 1.816687034773914, "grad_norm": 0.33326515555381775, "learning_rate": 0.0001, "loss": 0.941, "step": 2044 }, { "epoch": 1.8175758249083436, "grad_norm": 0.3134666383266449, "learning_rate": 0.0001, "loss": 0.7828, "step": 2045 }, { "epoch": 1.818464615042773, "grad_norm": 0.318588525056839, "learning_rate": 0.0001, "loss": 0.974, "step": 2046 }, { "epoch": 1.8193534051772025, "grad_norm": 0.3159397840499878, "learning_rate": 0.0001, "loss": 0.8277, "step": 2047 }, { "epoch": 1.8202421953116321, "grad_norm": 0.3912515938282013, "learning_rate": 0.0001, "loss": 0.9024, "step": 2048 }, { "epoch": 1.8211309854460616, "grad_norm": 0.3366560935974121, "learning_rate": 0.0001, "loss": 0.9652, "step": 2049 }, { "epoch": 1.822019775580491, "grad_norm": 0.33880704641342163, "learning_rate": 0.0001, "loss": 0.9178, "step": 2050 }, { "epoch": 1.8229085657149207, "grad_norm": 0.3547055721282959, "learning_rate": 0.0001, "loss": 0.8965, "step": 2051 }, { "epoch": 1.82379735584935, "grad_norm": 0.3428584337234497, "learning_rate": 0.0001, "loss": 0.8787, "step": 2052 }, { "epoch": 1.8246861459837795, "grad_norm": 0.3745492398738861, "learning_rate": 0.0001, "loss": 0.9322, "step": 2053 }, { "epoch": 1.8255749361182092, "grad_norm": 0.32607874274253845, "learning_rate": 0.0001, "loss": 0.8768, "step": 2054 }, { "epoch": 1.8264637262526386, "grad_norm": 0.5843383073806763, "learning_rate": 0.0001, "loss": 0.9578, "step": 2055 }, { "epoch": 1.827352516387068, "grad_norm": 0.3096837103366852, "learning_rate": 0.0001, "loss": 0.8705, "step": 2056 }, { "epoch": 1.8282413065214977, "grad_norm": 0.32685980200767517, "learning_rate": 0.0001, "loss": 0.907, "step": 2057 }, { "epoch": 1.829130096655927, "grad_norm": 0.3481847643852234, "learning_rate": 0.0001, "loss": 0.9312, "step": 2058 }, { "epoch": 1.8300188867903566, "grad_norm": 0.3510027229785919, "learning_rate": 0.0001, "loss": 0.9272, "step": 2059 }, { "epoch": 1.8309076769247863, "grad_norm": 0.3125600814819336, "learning_rate": 0.0001, "loss": 0.8919, "step": 2060 }, { "epoch": 1.8317964670592155, "grad_norm": 0.343363881111145, "learning_rate": 0.0001, "loss": 0.9294, "step": 2061 }, { "epoch": 1.8326852571936452, "grad_norm": 0.30843785405158997, "learning_rate": 0.0001, "loss": 0.9, "step": 2062 }, { "epoch": 1.8335740473280746, "grad_norm": 0.38246282935142517, "learning_rate": 0.0001, "loss": 0.9301, "step": 2063 }, { "epoch": 1.834462837462504, "grad_norm": 0.30538472533226013, "learning_rate": 0.0001, "loss": 1.0181, "step": 2064 }, { "epoch": 1.8353516275969337, "grad_norm": 0.3176038861274719, "learning_rate": 0.0001, "loss": 0.8301, "step": 2065 }, { "epoch": 1.8362404177313632, "grad_norm": 0.3275960683822632, "learning_rate": 0.0001, "loss": 0.8787, "step": 2066 }, { "epoch": 1.8371292078657926, "grad_norm": 0.3405584394931793, "learning_rate": 0.0001, "loss": 0.8562, "step": 2067 }, { "epoch": 1.8380179980002223, "grad_norm": 0.3240833580493927, "learning_rate": 0.0001, "loss": 0.8733, "step": 2068 }, { "epoch": 1.8389067881346517, "grad_norm": 0.3166552484035492, "learning_rate": 0.0001, "loss": 0.9458, "step": 2069 }, { "epoch": 1.8397955782690811, "grad_norm": 0.3474705219268799, "learning_rate": 0.0001, "loss": 0.8703, "step": 2070 }, { "epoch": 1.8406843684035108, "grad_norm": 0.3441227376461029, "learning_rate": 0.0001, "loss": 0.9149, "step": 2071 }, { "epoch": 1.8415731585379402, "grad_norm": 0.3015057146549225, "learning_rate": 0.0001, "loss": 0.8754, "step": 2072 }, { "epoch": 1.8424619486723697, "grad_norm": 0.33986207842826843, "learning_rate": 0.0001, "loss": 0.9179, "step": 2073 }, { "epoch": 1.8433507388067993, "grad_norm": 0.33093711733818054, "learning_rate": 0.0001, "loss": 0.9072, "step": 2074 }, { "epoch": 1.8442395289412288, "grad_norm": 0.338056743144989, "learning_rate": 0.0001, "loss": 0.8816, "step": 2075 }, { "epoch": 1.8451283190756582, "grad_norm": 0.32622647285461426, "learning_rate": 0.0001, "loss": 0.9017, "step": 2076 }, { "epoch": 1.8460171092100879, "grad_norm": 0.33805835247039795, "learning_rate": 0.0001, "loss": 0.9135, "step": 2077 }, { "epoch": 1.8469058993445173, "grad_norm": 0.3360745906829834, "learning_rate": 0.0001, "loss": 0.8238, "step": 2078 }, { "epoch": 1.8477946894789468, "grad_norm": 0.3394176959991455, "learning_rate": 0.0001, "loss": 0.9539, "step": 2079 }, { "epoch": 1.8486834796133764, "grad_norm": 0.3259996175765991, "learning_rate": 0.0001, "loss": 0.8874, "step": 2080 }, { "epoch": 1.8495722697478056, "grad_norm": 0.30118152499198914, "learning_rate": 0.0001, "loss": 0.8875, "step": 2081 }, { "epoch": 1.8504610598822353, "grad_norm": 0.3274901211261749, "learning_rate": 0.0001, "loss": 0.9579, "step": 2082 }, { "epoch": 1.851349850016665, "grad_norm": 0.34811556339263916, "learning_rate": 0.0001, "loss": 0.9363, "step": 2083 }, { "epoch": 1.8522386401510942, "grad_norm": 0.35961511731147766, "learning_rate": 0.0001, "loss": 0.8803, "step": 2084 }, { "epoch": 1.8531274302855238, "grad_norm": 0.3167450726032257, "learning_rate": 0.0001, "loss": 0.8289, "step": 2085 }, { "epoch": 1.8540162204199533, "grad_norm": 0.32763344049453735, "learning_rate": 0.0001, "loss": 0.8965, "step": 2086 }, { "epoch": 1.8549050105543827, "grad_norm": 0.32265403866767883, "learning_rate": 0.0001, "loss": 0.9522, "step": 2087 }, { "epoch": 1.8557938006888124, "grad_norm": 0.3623351752758026, "learning_rate": 0.0001, "loss": 0.9497, "step": 2088 }, { "epoch": 1.8566825908232418, "grad_norm": 0.34022286534309387, "learning_rate": 0.0001, "loss": 0.9482, "step": 2089 }, { "epoch": 1.8575713809576713, "grad_norm": 0.34971117973327637, "learning_rate": 0.0001, "loss": 0.9088, "step": 2090 }, { "epoch": 1.858460171092101, "grad_norm": 0.35238999128341675, "learning_rate": 0.0001, "loss": 0.893, "step": 2091 }, { "epoch": 1.8593489612265304, "grad_norm": 0.3588760197162628, "learning_rate": 0.0001, "loss": 0.9416, "step": 2092 }, { "epoch": 1.8602377513609598, "grad_norm": 0.3537149131298065, "learning_rate": 0.0001, "loss": 0.9008, "step": 2093 }, { "epoch": 1.8611265414953895, "grad_norm": 0.35885384678840637, "learning_rate": 0.0001, "loss": 0.9227, "step": 2094 }, { "epoch": 1.862015331629819, "grad_norm": 0.44651269912719727, "learning_rate": 0.0001, "loss": 0.8945, "step": 2095 }, { "epoch": 1.8629041217642484, "grad_norm": 0.8166044354438782, "learning_rate": 0.0001, "loss": 0.9453, "step": 2096 }, { "epoch": 1.863792911898678, "grad_norm": 0.4329817593097687, "learning_rate": 0.0001, "loss": 0.9554, "step": 2097 }, { "epoch": 1.8646817020331075, "grad_norm": 0.3563162684440613, "learning_rate": 0.0001, "loss": 0.9519, "step": 2098 }, { "epoch": 1.865570492167537, "grad_norm": 0.3440234065055847, "learning_rate": 0.0001, "loss": 0.9086, "step": 2099 }, { "epoch": 1.8664592823019666, "grad_norm": 0.5276257991790771, "learning_rate": 0.0001, "loss": 0.9137, "step": 2100 }, { "epoch": 1.867348072436396, "grad_norm": 0.39348065853118896, "learning_rate": 0.0001, "loss": 0.9041, "step": 2101 }, { "epoch": 1.8682368625708254, "grad_norm": 0.3502536416053772, "learning_rate": 0.0001, "loss": 0.9357, "step": 2102 }, { "epoch": 1.869125652705255, "grad_norm": 0.3747837245464325, "learning_rate": 0.0001, "loss": 0.9268, "step": 2103 }, { "epoch": 1.8700144428396843, "grad_norm": 0.3654274344444275, "learning_rate": 0.0001, "loss": 0.9001, "step": 2104 }, { "epoch": 1.870903232974114, "grad_norm": 0.34981828927993774, "learning_rate": 0.0001, "loss": 0.8879, "step": 2105 }, { "epoch": 1.8717920231085436, "grad_norm": 0.34973669052124023, "learning_rate": 0.0001, "loss": 0.9614, "step": 2106 }, { "epoch": 1.8726808132429729, "grad_norm": 0.35739874839782715, "learning_rate": 0.0001, "loss": 0.9628, "step": 2107 }, { "epoch": 1.8735696033774025, "grad_norm": 0.33049049973487854, "learning_rate": 0.0001, "loss": 0.8869, "step": 2108 }, { "epoch": 1.874458393511832, "grad_norm": 0.3104158639907837, "learning_rate": 0.0001, "loss": 0.8905, "step": 2109 }, { "epoch": 1.8753471836462614, "grad_norm": 0.3537830114364624, "learning_rate": 0.0001, "loss": 0.9178, "step": 2110 }, { "epoch": 1.876235973780691, "grad_norm": 0.36299172043800354, "learning_rate": 0.0001, "loss": 0.8937, "step": 2111 }, { "epoch": 1.8771247639151205, "grad_norm": 0.30172330141067505, "learning_rate": 0.0001, "loss": 0.8999, "step": 2112 }, { "epoch": 1.87801355404955, "grad_norm": 0.34734198451042175, "learning_rate": 0.0001, "loss": 0.8742, "step": 2113 }, { "epoch": 1.8789023441839796, "grad_norm": 0.3040066659450531, "learning_rate": 0.0001, "loss": 0.8276, "step": 2114 }, { "epoch": 1.879791134318409, "grad_norm": 0.38280028104782104, "learning_rate": 0.0001, "loss": 0.8841, "step": 2115 }, { "epoch": 1.8806799244528385, "grad_norm": 0.3382204473018646, "learning_rate": 0.0001, "loss": 0.9274, "step": 2116 }, { "epoch": 1.8815687145872682, "grad_norm": 0.3138622045516968, "learning_rate": 0.0001, "loss": 0.8872, "step": 2117 }, { "epoch": 1.8824575047216976, "grad_norm": 0.37905508279800415, "learning_rate": 0.0001, "loss": 0.9759, "step": 2118 }, { "epoch": 1.883346294856127, "grad_norm": 0.36490878462791443, "learning_rate": 0.0001, "loss": 0.935, "step": 2119 }, { "epoch": 1.8842350849905567, "grad_norm": 0.30611562728881836, "learning_rate": 0.0001, "loss": 0.8862, "step": 2120 }, { "epoch": 1.8851238751249861, "grad_norm": 0.30281272530555725, "learning_rate": 0.0001, "loss": 0.9486, "step": 2121 }, { "epoch": 1.8860126652594156, "grad_norm": 0.3309618830680847, "learning_rate": 0.0001, "loss": 0.9686, "step": 2122 }, { "epoch": 1.8869014553938452, "grad_norm": 0.36201414465904236, "learning_rate": 0.0001, "loss": 0.8934, "step": 2123 }, { "epoch": 1.8877902455282747, "grad_norm": 0.3028503656387329, "learning_rate": 0.0001, "loss": 0.916, "step": 2124 }, { "epoch": 1.8886790356627041, "grad_norm": 0.3417774736881256, "learning_rate": 0.0001, "loss": 0.96, "step": 2125 }, { "epoch": 1.8895678257971338, "grad_norm": 0.3560897707939148, "learning_rate": 0.0001, "loss": 0.9264, "step": 2126 }, { "epoch": 1.890456615931563, "grad_norm": 0.33282026648521423, "learning_rate": 0.0001, "loss": 0.9066, "step": 2127 }, { "epoch": 1.8913454060659927, "grad_norm": 0.3309515416622162, "learning_rate": 0.0001, "loss": 0.8867, "step": 2128 }, { "epoch": 1.8922341962004223, "grad_norm": 0.32819992303848267, "learning_rate": 0.0001, "loss": 0.9102, "step": 2129 }, { "epoch": 1.8931229863348515, "grad_norm": 0.3443058133125305, "learning_rate": 0.0001, "loss": 0.9559, "step": 2130 }, { "epoch": 1.8940117764692812, "grad_norm": 0.3516992926597595, "learning_rate": 0.0001, "loss": 0.917, "step": 2131 }, { "epoch": 1.8949005666037106, "grad_norm": 0.32620400190353394, "learning_rate": 0.0001, "loss": 0.8944, "step": 2132 }, { "epoch": 1.89578935673814, "grad_norm": 0.34699490666389465, "learning_rate": 0.0001, "loss": 0.87, "step": 2133 }, { "epoch": 1.8966781468725697, "grad_norm": 0.32767391204833984, "learning_rate": 0.0001, "loss": 0.9523, "step": 2134 }, { "epoch": 1.8975669370069992, "grad_norm": 0.34350037574768066, "learning_rate": 0.0001, "loss": 0.9449, "step": 2135 }, { "epoch": 1.8984557271414286, "grad_norm": 0.36585548520088196, "learning_rate": 0.0001, "loss": 0.9338, "step": 2136 }, { "epoch": 1.8993445172758583, "grad_norm": 0.3513524830341339, "learning_rate": 0.0001, "loss": 0.9429, "step": 2137 }, { "epoch": 1.9002333074102877, "grad_norm": 0.3541501462459564, "learning_rate": 0.0001, "loss": 0.8976, "step": 2138 }, { "epoch": 1.9011220975447172, "grad_norm": 0.3008817434310913, "learning_rate": 0.0001, "loss": 0.8689, "step": 2139 }, { "epoch": 1.9020108876791468, "grad_norm": 0.3268721401691437, "learning_rate": 0.0001, "loss": 0.8712, "step": 2140 }, { "epoch": 1.9028996778135763, "grad_norm": 0.3459062874317169, "learning_rate": 0.0001, "loss": 0.9399, "step": 2141 }, { "epoch": 1.9037884679480057, "grad_norm": 0.3577088713645935, "learning_rate": 0.0001, "loss": 0.9029, "step": 2142 }, { "epoch": 1.9046772580824354, "grad_norm": 0.34948304295539856, "learning_rate": 0.0001, "loss": 0.8714, "step": 2143 }, { "epoch": 1.9055660482168648, "grad_norm": 0.34985479712486267, "learning_rate": 0.0001, "loss": 0.902, "step": 2144 }, { "epoch": 1.9064548383512943, "grad_norm": 0.32033130526542664, "learning_rate": 0.0001, "loss": 0.8456, "step": 2145 }, { "epoch": 1.907343628485724, "grad_norm": 0.32239830493927, "learning_rate": 0.0001, "loss": 0.9245, "step": 2146 }, { "epoch": 1.9082324186201534, "grad_norm": 0.35271769762039185, "learning_rate": 0.0001, "loss": 0.9427, "step": 2147 }, { "epoch": 1.9091212087545828, "grad_norm": 0.34471169114112854, "learning_rate": 0.0001, "loss": 0.9369, "step": 2148 }, { "epoch": 1.9100099988890125, "grad_norm": 0.2854446470737457, "learning_rate": 0.0001, "loss": 0.8902, "step": 2149 }, { "epoch": 1.9108987890234417, "grad_norm": 0.32251161336898804, "learning_rate": 0.0001, "loss": 0.8536, "step": 2150 }, { "epoch": 1.9117875791578713, "grad_norm": 0.33492353558540344, "learning_rate": 0.0001, "loss": 0.8761, "step": 2151 }, { "epoch": 1.912676369292301, "grad_norm": 0.3147629201412201, "learning_rate": 0.0001, "loss": 0.8802, "step": 2152 }, { "epoch": 1.9135651594267302, "grad_norm": 0.32801344990730286, "learning_rate": 0.0001, "loss": 0.8123, "step": 2153 }, { "epoch": 1.9144539495611599, "grad_norm": 0.2836940586566925, "learning_rate": 0.0001, "loss": 0.9012, "step": 2154 }, { "epoch": 1.9153427396955893, "grad_norm": 0.3101726770401001, "learning_rate": 0.0001, "loss": 0.8274, "step": 2155 }, { "epoch": 1.9162315298300188, "grad_norm": 0.3067977726459503, "learning_rate": 0.0001, "loss": 0.8986, "step": 2156 }, { "epoch": 1.9171203199644484, "grad_norm": 0.27377212047576904, "learning_rate": 0.0001, "loss": 0.8844, "step": 2157 }, { "epoch": 1.9180091100988779, "grad_norm": 0.36923712491989136, "learning_rate": 0.0001, "loss": 0.9264, "step": 2158 }, { "epoch": 1.9188979002333073, "grad_norm": 0.3664684295654297, "learning_rate": 0.0001, "loss": 0.953, "step": 2159 }, { "epoch": 1.919786690367737, "grad_norm": 0.3705950081348419, "learning_rate": 0.0001, "loss": 0.873, "step": 2160 }, { "epoch": 1.9206754805021664, "grad_norm": 0.33334994316101074, "learning_rate": 0.0001, "loss": 0.8896, "step": 2161 }, { "epoch": 1.9215642706365959, "grad_norm": 0.32042577862739563, "learning_rate": 0.0001, "loss": 0.832, "step": 2162 }, { "epoch": 1.9224530607710255, "grad_norm": 0.2969491183757782, "learning_rate": 0.0001, "loss": 0.8963, "step": 2163 }, { "epoch": 1.923341850905455, "grad_norm": 0.3007069528102875, "learning_rate": 0.0001, "loss": 0.8812, "step": 2164 }, { "epoch": 1.9242306410398844, "grad_norm": 0.34228307008743286, "learning_rate": 0.0001, "loss": 0.9252, "step": 2165 }, { "epoch": 1.925119431174314, "grad_norm": 0.34786513447761536, "learning_rate": 0.0001, "loss": 0.8791, "step": 2166 }, { "epoch": 1.9260082213087435, "grad_norm": 0.30037039518356323, "learning_rate": 0.0001, "loss": 0.8397, "step": 2167 }, { "epoch": 1.926897011443173, "grad_norm": 0.29265427589416504, "learning_rate": 0.0001, "loss": 0.8792, "step": 2168 }, { "epoch": 1.9277858015776026, "grad_norm": 0.35532552003860474, "learning_rate": 0.0001, "loss": 0.9429, "step": 2169 }, { "epoch": 1.928674591712032, "grad_norm": 0.3534153997898102, "learning_rate": 0.0001, "loss": 0.8774, "step": 2170 }, { "epoch": 1.9295633818464615, "grad_norm": 0.29537761211395264, "learning_rate": 0.0001, "loss": 0.9146, "step": 2171 }, { "epoch": 1.9304521719808911, "grad_norm": 0.34193140268325806, "learning_rate": 0.0001, "loss": 0.9506, "step": 2172 }, { "epoch": 1.9313409621153204, "grad_norm": 0.33322104811668396, "learning_rate": 0.0001, "loss": 0.965, "step": 2173 }, { "epoch": 1.93222975224975, "grad_norm": 0.34304308891296387, "learning_rate": 0.0001, "loss": 0.9704, "step": 2174 }, { "epoch": 1.9331185423841797, "grad_norm": 0.3284062445163727, "learning_rate": 0.0001, "loss": 0.7752, "step": 2175 }, { "epoch": 1.934007332518609, "grad_norm": 0.34800034761428833, "learning_rate": 0.0001, "loss": 0.9484, "step": 2176 }, { "epoch": 1.9348961226530386, "grad_norm": 0.34290215373039246, "learning_rate": 0.0001, "loss": 0.9032, "step": 2177 }, { "epoch": 1.935784912787468, "grad_norm": 0.34737300872802734, "learning_rate": 0.0001, "loss": 0.9158, "step": 2178 }, { "epoch": 1.9366737029218974, "grad_norm": 0.34877312183380127, "learning_rate": 0.0001, "loss": 0.8934, "step": 2179 }, { "epoch": 1.937562493056327, "grad_norm": 0.3398269712924957, "learning_rate": 0.0001, "loss": 0.9713, "step": 2180 }, { "epoch": 1.9384512831907565, "grad_norm": 0.33079251646995544, "learning_rate": 0.0001, "loss": 1.01, "step": 2181 }, { "epoch": 1.939340073325186, "grad_norm": 0.3067607581615448, "learning_rate": 0.0001, "loss": 0.8631, "step": 2182 }, { "epoch": 1.9402288634596156, "grad_norm": 0.34091418981552124, "learning_rate": 0.0001, "loss": 0.9544, "step": 2183 }, { "epoch": 1.941117653594045, "grad_norm": 0.30287304520606995, "learning_rate": 0.0001, "loss": 0.941, "step": 2184 }, { "epoch": 1.9420064437284745, "grad_norm": 0.2976100742816925, "learning_rate": 0.0001, "loss": 0.9028, "step": 2185 }, { "epoch": 1.9428952338629042, "grad_norm": 0.3236287236213684, "learning_rate": 0.0001, "loss": 0.9633, "step": 2186 }, { "epoch": 1.9437840239973336, "grad_norm": 0.35301899909973145, "learning_rate": 0.0001, "loss": 0.9464, "step": 2187 }, { "epoch": 1.944672814131763, "grad_norm": 0.35564854741096497, "learning_rate": 0.0001, "loss": 1.0202, "step": 2188 }, { "epoch": 1.9455616042661927, "grad_norm": 0.31409159302711487, "learning_rate": 0.0001, "loss": 0.965, "step": 2189 }, { "epoch": 1.9464503944006222, "grad_norm": 0.3617132306098938, "learning_rate": 0.0001, "loss": 0.9124, "step": 2190 }, { "epoch": 1.9473391845350516, "grad_norm": 0.2927257716655731, "learning_rate": 0.0001, "loss": 0.9333, "step": 2191 }, { "epoch": 1.9482279746694813, "grad_norm": 0.3173227906227112, "learning_rate": 0.0001, "loss": 0.8952, "step": 2192 }, { "epoch": 1.9491167648039107, "grad_norm": 0.29953786730766296, "learning_rate": 0.0001, "loss": 0.8728, "step": 2193 }, { "epoch": 1.9500055549383402, "grad_norm": 0.36522650718688965, "learning_rate": 0.0001, "loss": 0.8915, "step": 2194 }, { "epoch": 1.9508943450727698, "grad_norm": 0.36083757877349854, "learning_rate": 0.0001, "loss": 0.9012, "step": 2195 }, { "epoch": 1.951783135207199, "grad_norm": 0.3177006244659424, "learning_rate": 0.0001, "loss": 0.8705, "step": 2196 }, { "epoch": 1.9526719253416287, "grad_norm": 0.33240652084350586, "learning_rate": 0.0001, "loss": 0.9201, "step": 2197 }, { "epoch": 1.9535607154760584, "grad_norm": 0.33079612255096436, "learning_rate": 0.0001, "loss": 0.8434, "step": 2198 }, { "epoch": 1.9544495056104876, "grad_norm": 0.3984503149986267, "learning_rate": 0.0001, "loss": 0.9963, "step": 2199 }, { "epoch": 1.9553382957449172, "grad_norm": 0.33371084928512573, "learning_rate": 0.0001, "loss": 0.9055, "step": 2200 }, { "epoch": 1.956227085879347, "grad_norm": 0.3662305772304535, "learning_rate": 0.0001, "loss": 0.8911, "step": 2201 }, { "epoch": 1.9571158760137761, "grad_norm": 0.3294123411178589, "learning_rate": 0.0001, "loss": 0.9329, "step": 2202 }, { "epoch": 1.9580046661482058, "grad_norm": 0.32030513882637024, "learning_rate": 0.0001, "loss": 0.9697, "step": 2203 }, { "epoch": 1.9588934562826352, "grad_norm": 0.3227706551551819, "learning_rate": 0.0001, "loss": 0.8552, "step": 2204 }, { "epoch": 1.9597822464170647, "grad_norm": 0.3211442828178406, "learning_rate": 0.0001, "loss": 0.8235, "step": 2205 }, { "epoch": 1.9606710365514943, "grad_norm": 0.3925526440143585, "learning_rate": 0.0001, "loss": 0.8938, "step": 2206 }, { "epoch": 1.9615598266859238, "grad_norm": 0.2847107946872711, "learning_rate": 0.0001, "loss": 0.8749, "step": 2207 }, { "epoch": 1.9624486168203532, "grad_norm": 0.35088127851486206, "learning_rate": 0.0001, "loss": 0.9328, "step": 2208 }, { "epoch": 1.9633374069547829, "grad_norm": 0.3825710713863373, "learning_rate": 0.0001, "loss": 0.8982, "step": 2209 }, { "epoch": 1.9642261970892123, "grad_norm": 0.3291476368904114, "learning_rate": 0.0001, "loss": 0.8782, "step": 2210 }, { "epoch": 1.9651149872236418, "grad_norm": 0.3212360441684723, "learning_rate": 0.0001, "loss": 0.9278, "step": 2211 }, { "epoch": 1.9660037773580714, "grad_norm": 0.3502598702907562, "learning_rate": 0.0001, "loss": 0.8873, "step": 2212 }, { "epoch": 1.9668925674925009, "grad_norm": 0.3515768051147461, "learning_rate": 0.0001, "loss": 0.8633, "step": 2213 }, { "epoch": 1.9677813576269303, "grad_norm": 0.340701162815094, "learning_rate": 0.0001, "loss": 0.9178, "step": 2214 }, { "epoch": 1.96867014776136, "grad_norm": 0.3128719925880432, "learning_rate": 0.0001, "loss": 0.8933, "step": 2215 }, { "epoch": 1.9695589378957894, "grad_norm": 0.34365659952163696, "learning_rate": 0.0001, "loss": 0.9546, "step": 2216 }, { "epoch": 1.9704477280302188, "grad_norm": 0.31693172454833984, "learning_rate": 0.0001, "loss": 0.8905, "step": 2217 }, { "epoch": 1.9713365181646485, "grad_norm": 0.36978334188461304, "learning_rate": 0.0001, "loss": 0.8712, "step": 2218 }, { "epoch": 1.9722253082990777, "grad_norm": 0.31684744358062744, "learning_rate": 0.0001, "loss": 0.9069, "step": 2219 }, { "epoch": 1.9731140984335074, "grad_norm": 0.3603816330432892, "learning_rate": 0.0001, "loss": 0.9562, "step": 2220 }, { "epoch": 1.974002888567937, "grad_norm": 0.3792661428451538, "learning_rate": 0.0001, "loss": 0.9382, "step": 2221 }, { "epoch": 1.9748916787023663, "grad_norm": 0.4025600254535675, "learning_rate": 0.0001, "loss": 0.9571, "step": 2222 }, { "epoch": 1.975780468836796, "grad_norm": 0.3250136077404022, "learning_rate": 0.0001, "loss": 0.849, "step": 2223 }, { "epoch": 1.9766692589712256, "grad_norm": 0.3059399425983429, "learning_rate": 0.0001, "loss": 0.9364, "step": 2224 }, { "epoch": 1.9775580491056548, "grad_norm": 0.2995017468929291, "learning_rate": 0.0001, "loss": 0.8323, "step": 2225 }, { "epoch": 1.9784468392400845, "grad_norm": 0.33151498436927795, "learning_rate": 0.0001, "loss": 0.9296, "step": 2226 }, { "epoch": 1.979335629374514, "grad_norm": 0.36364221572875977, "learning_rate": 0.0001, "loss": 0.9274, "step": 2227 }, { "epoch": 1.9802244195089433, "grad_norm": 0.3598885238170624, "learning_rate": 0.0001, "loss": 0.9747, "step": 2228 }, { "epoch": 1.981113209643373, "grad_norm": 0.3541521430015564, "learning_rate": 0.0001, "loss": 0.9024, "step": 2229 }, { "epoch": 1.9820019997778024, "grad_norm": 0.312953382730484, "learning_rate": 0.0001, "loss": 0.8793, "step": 2230 }, { "epoch": 1.9828907899122319, "grad_norm": 0.3080136477947235, "learning_rate": 0.0001, "loss": 0.8732, "step": 2231 }, { "epoch": 1.9837795800466616, "grad_norm": 0.32917580008506775, "learning_rate": 0.0001, "loss": 0.8911, "step": 2232 }, { "epoch": 1.984668370181091, "grad_norm": 0.34910324215888977, "learning_rate": 0.0001, "loss": 0.9626, "step": 2233 }, { "epoch": 1.9855571603155204, "grad_norm": 0.38250458240509033, "learning_rate": 0.0001, "loss": 0.9059, "step": 2234 }, { "epoch": 1.98644595044995, "grad_norm": 0.34895434975624084, "learning_rate": 0.0001, "loss": 0.9963, "step": 2235 }, { "epoch": 1.9873347405843795, "grad_norm": 0.3334400951862335, "learning_rate": 0.0001, "loss": 0.8768, "step": 2236 }, { "epoch": 1.988223530718809, "grad_norm": 0.33048558235168457, "learning_rate": 0.0001, "loss": 0.9229, "step": 2237 }, { "epoch": 1.9891123208532386, "grad_norm": 0.3428661823272705, "learning_rate": 0.0001, "loss": 0.9175, "step": 2238 }, { "epoch": 1.990001110987668, "grad_norm": 0.3587013781070709, "learning_rate": 0.0001, "loss": 0.9277, "step": 2239 }, { "epoch": 1.9908899011220975, "grad_norm": 0.3457068204879761, "learning_rate": 0.0001, "loss": 0.8732, "step": 2240 }, { "epoch": 1.9917786912565272, "grad_norm": 0.3348812162876129, "learning_rate": 0.0001, "loss": 0.903, "step": 2241 }, { "epoch": 1.9926674813909564, "grad_norm": 0.3357104957103729, "learning_rate": 0.0001, "loss": 0.898, "step": 2242 }, { "epoch": 1.993556271525386, "grad_norm": 0.346883088350296, "learning_rate": 0.0001, "loss": 0.9087, "step": 2243 }, { "epoch": 1.9944450616598157, "grad_norm": 0.30482059717178345, "learning_rate": 0.0001, "loss": 0.9312, "step": 2244 }, { "epoch": 1.995333851794245, "grad_norm": 0.34484198689460754, "learning_rate": 0.0001, "loss": 0.8785, "step": 2245 }, { "epoch": 1.9962226419286746, "grad_norm": 0.2776843309402466, "learning_rate": 0.0001, "loss": 0.8985, "step": 2246 }, { "epoch": 1.9971114320631043, "grad_norm": 0.33060091733932495, "learning_rate": 0.0001, "loss": 0.9854, "step": 2247 }, { "epoch": 1.9980002221975335, "grad_norm": 0.31561362743377686, "learning_rate": 0.0001, "loss": 0.8927, "step": 2248 }, { "epoch": 1.9988890123319631, "grad_norm": 0.29034098982810974, "learning_rate": 0.0001, "loss": 0.868, "step": 2249 }, { "epoch": 1.9997778024663926, "grad_norm": 0.3135145604610443, "learning_rate": 0.0001, "loss": 0.7991, "step": 2250 }, { "epoch": 2.000666592600822, "grad_norm": 0.36708906292915344, "learning_rate": 0.0001, "loss": 0.8307, "step": 2251 }, { "epoch": 2.0015553827352517, "grad_norm": 0.3554810881614685, "learning_rate": 0.0001, "loss": 0.9073, "step": 2252 }, { "epoch": 2.0024441728696813, "grad_norm": 0.3140956461429596, "learning_rate": 0.0001, "loss": 0.8779, "step": 2253 }, { "epoch": 2.0033329630041106, "grad_norm": 0.3324379622936249, "learning_rate": 0.0001, "loss": 0.8612, "step": 2254 }, { "epoch": 2.0042217531385402, "grad_norm": 0.3427555561065674, "learning_rate": 0.0001, "loss": 0.8843, "step": 2255 }, { "epoch": 2.00511054327297, "grad_norm": 0.298017293214798, "learning_rate": 0.0001, "loss": 0.8393, "step": 2256 }, { "epoch": 2.005999333407399, "grad_norm": 0.3473068177700043, "learning_rate": 0.0001, "loss": 0.9666, "step": 2257 }, { "epoch": 2.0068881235418288, "grad_norm": 0.32553350925445557, "learning_rate": 0.0001, "loss": 0.9294, "step": 2258 }, { "epoch": 2.007776913676258, "grad_norm": 0.29851260781288147, "learning_rate": 0.0001, "loss": 0.9186, "step": 2259 }, { "epoch": 2.0086657038106877, "grad_norm": 0.3403533101081848, "learning_rate": 0.0001, "loss": 0.8777, "step": 2260 }, { "epoch": 2.0095544939451173, "grad_norm": 0.31564658880233765, "learning_rate": 0.0001, "loss": 0.8203, "step": 2261 }, { "epoch": 2.0104432840795465, "grad_norm": 0.34301063418388367, "learning_rate": 0.0001, "loss": 0.8448, "step": 2262 }, { "epoch": 2.011332074213976, "grad_norm": 0.34970736503601074, "learning_rate": 0.0001, "loss": 0.8925, "step": 2263 }, { "epoch": 2.012220864348406, "grad_norm": 0.3418786823749542, "learning_rate": 0.0001, "loss": 0.9322, "step": 2264 }, { "epoch": 2.013109654482835, "grad_norm": 0.3406841456890106, "learning_rate": 0.0001, "loss": 0.8289, "step": 2265 }, { "epoch": 2.0139984446172647, "grad_norm": 0.35538503527641296, "learning_rate": 0.0001, "loss": 0.9544, "step": 2266 }, { "epoch": 2.0148872347516944, "grad_norm": 0.35400450229644775, "learning_rate": 0.0001, "loss": 0.9106, "step": 2267 }, { "epoch": 2.0157760248861236, "grad_norm": 0.3589840829372406, "learning_rate": 0.0001, "loss": 0.9033, "step": 2268 }, { "epoch": 2.0166648150205533, "grad_norm": 0.3830343782901764, "learning_rate": 0.0001, "loss": 0.9254, "step": 2269 }, { "epoch": 2.017553605154983, "grad_norm": 0.3211663067340851, "learning_rate": 0.0001, "loss": 0.8952, "step": 2270 }, { "epoch": 2.018442395289412, "grad_norm": 0.37262219190597534, "learning_rate": 0.0001, "loss": 0.9151, "step": 2271 }, { "epoch": 2.019331185423842, "grad_norm": 0.34682542085647583, "learning_rate": 0.0001, "loss": 0.8174, "step": 2272 }, { "epoch": 2.0202199755582715, "grad_norm": 0.3054455816745758, "learning_rate": 0.0001, "loss": 0.8983, "step": 2273 }, { "epoch": 2.0211087656927007, "grad_norm": 0.2940855622291565, "learning_rate": 0.0001, "loss": 0.8573, "step": 2274 }, { "epoch": 2.0219975558271304, "grad_norm": 0.37542182207107544, "learning_rate": 0.0001, "loss": 0.9686, "step": 2275 }, { "epoch": 2.02288634596156, "grad_norm": 0.29696211218833923, "learning_rate": 0.0001, "loss": 0.8948, "step": 2276 }, { "epoch": 2.0237751360959892, "grad_norm": 0.35127562284469604, "learning_rate": 0.0001, "loss": 0.9528, "step": 2277 }, { "epoch": 2.024663926230419, "grad_norm": 0.3054238259792328, "learning_rate": 0.0001, "loss": 0.8935, "step": 2278 }, { "epoch": 2.0255527163648486, "grad_norm": 0.33190521597862244, "learning_rate": 0.0001, "loss": 0.8616, "step": 2279 }, { "epoch": 2.026441506499278, "grad_norm": 0.3622536361217499, "learning_rate": 0.0001, "loss": 0.9324, "step": 2280 }, { "epoch": 2.0273302966337075, "grad_norm": 0.32910165190696716, "learning_rate": 0.0001, "loss": 0.8391, "step": 2281 }, { "epoch": 2.0282190867681367, "grad_norm": 0.31704333424568176, "learning_rate": 0.0001, "loss": 0.8393, "step": 2282 }, { "epoch": 2.0291078769025663, "grad_norm": 0.39209648966789246, "learning_rate": 0.0001, "loss": 0.8637, "step": 2283 }, { "epoch": 2.029996667036996, "grad_norm": 0.3536166846752167, "learning_rate": 0.0001, "loss": 0.8957, "step": 2284 }, { "epoch": 2.030885457171425, "grad_norm": 0.33245381712913513, "learning_rate": 0.0001, "loss": 0.8915, "step": 2285 }, { "epoch": 2.031774247305855, "grad_norm": 0.3466053307056427, "learning_rate": 0.0001, "loss": 0.8671, "step": 2286 }, { "epoch": 2.0326630374402845, "grad_norm": 0.32629281282424927, "learning_rate": 0.0001, "loss": 0.93, "step": 2287 }, { "epoch": 2.0335518275747138, "grad_norm": 0.321918785572052, "learning_rate": 0.0001, "loss": 0.8497, "step": 2288 }, { "epoch": 2.0344406177091434, "grad_norm": 0.3338482081890106, "learning_rate": 0.0001, "loss": 0.8984, "step": 2289 }, { "epoch": 2.035329407843573, "grad_norm": 0.32754746079444885, "learning_rate": 0.0001, "loss": 0.8787, "step": 2290 }, { "epoch": 2.0362181979780023, "grad_norm": 0.34894460439682007, "learning_rate": 0.0001, "loss": 0.8761, "step": 2291 }, { "epoch": 2.037106988112432, "grad_norm": 0.34283894300460815, "learning_rate": 0.0001, "loss": 0.9411, "step": 2292 }, { "epoch": 2.0379957782468616, "grad_norm": 0.3812194764614105, "learning_rate": 0.0001, "loss": 0.9111, "step": 2293 }, { "epoch": 2.038884568381291, "grad_norm": 0.323310911655426, "learning_rate": 0.0001, "loss": 0.8984, "step": 2294 }, { "epoch": 2.0397733585157205, "grad_norm": 0.376645565032959, "learning_rate": 0.0001, "loss": 0.8604, "step": 2295 }, { "epoch": 2.04066214865015, "grad_norm": 0.3399569094181061, "learning_rate": 0.0001, "loss": 0.8495, "step": 2296 }, { "epoch": 2.0415509387845794, "grad_norm": 0.3920535147190094, "learning_rate": 0.0001, "loss": 0.8972, "step": 2297 }, { "epoch": 2.042439728919009, "grad_norm": 0.3221544623374939, "learning_rate": 0.0001, "loss": 0.9215, "step": 2298 }, { "epoch": 2.0433285190534387, "grad_norm": 0.32029077410697937, "learning_rate": 0.0001, "loss": 0.8555, "step": 2299 }, { "epoch": 2.044217309187868, "grad_norm": 0.3429443836212158, "learning_rate": 0.0001, "loss": 0.8572, "step": 2300 }, { "epoch": 2.0451060993222976, "grad_norm": 0.32114291191101074, "learning_rate": 0.0001, "loss": 0.8466, "step": 2301 }, { "epoch": 2.0459948894567273, "grad_norm": 0.35281145572662354, "learning_rate": 0.0001, "loss": 0.9114, "step": 2302 }, { "epoch": 2.0468836795911565, "grad_norm": 0.32281407713890076, "learning_rate": 0.0001, "loss": 0.9293, "step": 2303 }, { "epoch": 2.047772469725586, "grad_norm": 0.3905143439769745, "learning_rate": 0.0001, "loss": 0.9392, "step": 2304 }, { "epoch": 2.0486612598600153, "grad_norm": 0.34570732712745667, "learning_rate": 0.0001, "loss": 0.8764, "step": 2305 }, { "epoch": 2.049550049994445, "grad_norm": 0.34093308448791504, "learning_rate": 0.0001, "loss": 0.9282, "step": 2306 }, { "epoch": 2.0504388401288747, "grad_norm": 0.3642049729824066, "learning_rate": 0.0001, "loss": 0.8468, "step": 2307 }, { "epoch": 2.051327630263304, "grad_norm": 0.3626710772514343, "learning_rate": 0.0001, "loss": 0.8742, "step": 2308 }, { "epoch": 2.0522164203977336, "grad_norm": 0.3760134279727936, "learning_rate": 0.0001, "loss": 0.8199, "step": 2309 }, { "epoch": 2.053105210532163, "grad_norm": 0.3483445346355438, "learning_rate": 0.0001, "loss": 0.9071, "step": 2310 }, { "epoch": 2.0539940006665924, "grad_norm": 0.3375813663005829, "learning_rate": 0.0001, "loss": 0.8971, "step": 2311 }, { "epoch": 2.054882790801022, "grad_norm": 0.3339730501174927, "learning_rate": 0.0001, "loss": 0.8204, "step": 2312 }, { "epoch": 2.0557715809354518, "grad_norm": 0.36098751425743103, "learning_rate": 0.0001, "loss": 0.8845, "step": 2313 }, { "epoch": 2.056660371069881, "grad_norm": 0.34348028898239136, "learning_rate": 0.0001, "loss": 0.8818, "step": 2314 }, { "epoch": 2.0575491612043106, "grad_norm": 0.36092281341552734, "learning_rate": 0.0001, "loss": 0.9322, "step": 2315 }, { "epoch": 2.0584379513387403, "grad_norm": 0.4044939875602722, "learning_rate": 0.0001, "loss": 0.868, "step": 2316 }, { "epoch": 2.0593267414731695, "grad_norm": 0.3279127776622772, "learning_rate": 0.0001, "loss": 0.886, "step": 2317 }, { "epoch": 2.060215531607599, "grad_norm": 0.3734513819217682, "learning_rate": 0.0001, "loss": 0.8934, "step": 2318 }, { "epoch": 2.061104321742029, "grad_norm": 0.3154412508010864, "learning_rate": 0.0001, "loss": 0.8556, "step": 2319 }, { "epoch": 2.061993111876458, "grad_norm": 0.35183727741241455, "learning_rate": 0.0001, "loss": 0.9645, "step": 2320 }, { "epoch": 2.0628819020108877, "grad_norm": 0.35789453983306885, "learning_rate": 0.0001, "loss": 0.8836, "step": 2321 }, { "epoch": 2.0637706921453174, "grad_norm": 0.3455105423927307, "learning_rate": 0.0001, "loss": 0.9554, "step": 2322 }, { "epoch": 2.0646594822797466, "grad_norm": 0.35150641202926636, "learning_rate": 0.0001, "loss": 0.8408, "step": 2323 }, { "epoch": 2.0655482724141763, "grad_norm": 0.3445546627044678, "learning_rate": 0.0001, "loss": 0.9087, "step": 2324 }, { "epoch": 2.066437062548606, "grad_norm": 0.3782254755496979, "learning_rate": 0.0001, "loss": 0.9708, "step": 2325 }, { "epoch": 2.067325852683035, "grad_norm": 0.30935153365135193, "learning_rate": 0.0001, "loss": 0.8883, "step": 2326 }, { "epoch": 2.068214642817465, "grad_norm": 0.34302398562431335, "learning_rate": 0.0001, "loss": 0.9016, "step": 2327 }, { "epoch": 2.069103432951894, "grad_norm": 0.38530564308166504, "learning_rate": 0.0001, "loss": 0.8869, "step": 2328 }, { "epoch": 2.0699922230863237, "grad_norm": 0.34200990200042725, "learning_rate": 0.0001, "loss": 0.8758, "step": 2329 }, { "epoch": 2.0708810132207534, "grad_norm": 0.36103734374046326, "learning_rate": 0.0001, "loss": 0.9055, "step": 2330 }, { "epoch": 2.0717698033551826, "grad_norm": 0.36420273780822754, "learning_rate": 0.0001, "loss": 0.9257, "step": 2331 }, { "epoch": 2.0726585934896122, "grad_norm": 0.3854941725730896, "learning_rate": 0.0001, "loss": 0.9891, "step": 2332 }, { "epoch": 2.073547383624042, "grad_norm": 0.33720192313194275, "learning_rate": 0.0001, "loss": 0.8197, "step": 2333 }, { "epoch": 2.074436173758471, "grad_norm": 0.3435489237308502, "learning_rate": 0.0001, "loss": 0.8943, "step": 2334 }, { "epoch": 2.0753249638929008, "grad_norm": 0.33030179142951965, "learning_rate": 0.0001, "loss": 0.8799, "step": 2335 }, { "epoch": 2.0762137540273304, "grad_norm": 0.48584482073783875, "learning_rate": 0.0001, "loss": 0.8402, "step": 2336 }, { "epoch": 2.0771025441617597, "grad_norm": 0.39869070053100586, "learning_rate": 0.0001, "loss": 0.8668, "step": 2337 }, { "epoch": 2.0779913342961893, "grad_norm": 0.3875821530818939, "learning_rate": 0.0001, "loss": 0.9544, "step": 2338 }, { "epoch": 2.078880124430619, "grad_norm": 0.3594411611557007, "learning_rate": 0.0001, "loss": 0.9415, "step": 2339 }, { "epoch": 2.079768914565048, "grad_norm": 0.34104394912719727, "learning_rate": 0.0001, "loss": 0.8698, "step": 2340 }, { "epoch": 2.080657704699478, "grad_norm": 0.3437696695327759, "learning_rate": 0.0001, "loss": 0.8795, "step": 2341 }, { "epoch": 2.0815464948339075, "grad_norm": 0.3021574318408966, "learning_rate": 0.0001, "loss": 0.8896, "step": 2342 }, { "epoch": 2.0824352849683367, "grad_norm": 0.306111216545105, "learning_rate": 0.0001, "loss": 0.8579, "step": 2343 }, { "epoch": 2.0833240751027664, "grad_norm": 0.3199199140071869, "learning_rate": 0.0001, "loss": 0.8107, "step": 2344 }, { "epoch": 2.084212865237196, "grad_norm": 0.34308746457099915, "learning_rate": 0.0001, "loss": 0.9077, "step": 2345 }, { "epoch": 2.0851016553716253, "grad_norm": 0.366335928440094, "learning_rate": 0.0001, "loss": 0.9323, "step": 2346 }, { "epoch": 2.085990445506055, "grad_norm": 0.3313388228416443, "learning_rate": 0.0001, "loss": 0.8682, "step": 2347 }, { "epoch": 2.0868792356404846, "grad_norm": 0.31360548734664917, "learning_rate": 0.0001, "loss": 0.8854, "step": 2348 }, { "epoch": 2.087768025774914, "grad_norm": 0.3268912434577942, "learning_rate": 0.0001, "loss": 0.867, "step": 2349 }, { "epoch": 2.0886568159093435, "grad_norm": 0.33978188037872314, "learning_rate": 0.0001, "loss": 0.9178, "step": 2350 }, { "epoch": 2.0895456060437727, "grad_norm": 0.3470538258552551, "learning_rate": 0.0001, "loss": 0.9443, "step": 2351 }, { "epoch": 2.0904343961782024, "grad_norm": 0.35913875699043274, "learning_rate": 0.0001, "loss": 0.943, "step": 2352 }, { "epoch": 2.091323186312632, "grad_norm": 0.37776345014572144, "learning_rate": 0.0001, "loss": 0.8927, "step": 2353 }, { "epoch": 2.0922119764470613, "grad_norm": 0.39539778232574463, "learning_rate": 0.0001, "loss": 0.8981, "step": 2354 }, { "epoch": 2.093100766581491, "grad_norm": 0.352658748626709, "learning_rate": 0.0001, "loss": 0.8996, "step": 2355 }, { "epoch": 2.0939895567159206, "grad_norm": 0.31923985481262207, "learning_rate": 0.0001, "loss": 0.9008, "step": 2356 }, { "epoch": 2.09487834685035, "grad_norm": 0.36446240544319153, "learning_rate": 0.0001, "loss": 0.9393, "step": 2357 }, { "epoch": 2.0957671369847795, "grad_norm": 0.3254162073135376, "learning_rate": 0.0001, "loss": 0.7878, "step": 2358 }, { "epoch": 2.096655927119209, "grad_norm": 0.3216734826564789, "learning_rate": 0.0001, "loss": 0.8606, "step": 2359 }, { "epoch": 2.0975447172536383, "grad_norm": 0.3242090344429016, "learning_rate": 0.0001, "loss": 0.8695, "step": 2360 }, { "epoch": 2.098433507388068, "grad_norm": 0.29055359959602356, "learning_rate": 0.0001, "loss": 0.8997, "step": 2361 }, { "epoch": 2.0993222975224977, "grad_norm": 0.3655925691127777, "learning_rate": 0.0001, "loss": 0.9162, "step": 2362 }, { "epoch": 2.100211087656927, "grad_norm": 0.3490038514137268, "learning_rate": 0.0001, "loss": 0.9202, "step": 2363 }, { "epoch": 2.1010998777913565, "grad_norm": 0.3455352485179901, "learning_rate": 0.0001, "loss": 0.8759, "step": 2364 }, { "epoch": 2.101988667925786, "grad_norm": 0.2887704074382782, "learning_rate": 0.0001, "loss": 0.8869, "step": 2365 }, { "epoch": 2.1028774580602154, "grad_norm": 0.32117751240730286, "learning_rate": 0.0001, "loss": 0.8057, "step": 2366 }, { "epoch": 2.103766248194645, "grad_norm": 0.33989858627319336, "learning_rate": 0.0001, "loss": 1.0061, "step": 2367 }, { "epoch": 2.1046550383290747, "grad_norm": 0.3154880106449127, "learning_rate": 0.0001, "loss": 0.8518, "step": 2368 }, { "epoch": 2.105543828463504, "grad_norm": 0.2925361692905426, "learning_rate": 0.0001, "loss": 0.8935, "step": 2369 }, { "epoch": 2.1064326185979336, "grad_norm": 0.3639398217201233, "learning_rate": 0.0001, "loss": 0.8115, "step": 2370 }, { "epoch": 2.1073214087323633, "grad_norm": 0.3522791266441345, "learning_rate": 0.0001, "loss": 0.8857, "step": 2371 }, { "epoch": 2.1082101988667925, "grad_norm": 0.3359151780605316, "learning_rate": 0.0001, "loss": 0.8931, "step": 2372 }, { "epoch": 2.109098989001222, "grad_norm": 0.33028051257133484, "learning_rate": 0.0001, "loss": 0.8142, "step": 2373 }, { "epoch": 2.1099877791356514, "grad_norm": 0.3378293514251709, "learning_rate": 0.0001, "loss": 0.9244, "step": 2374 }, { "epoch": 2.110876569270081, "grad_norm": 0.36423346400260925, "learning_rate": 0.0001, "loss": 0.8713, "step": 2375 }, { "epoch": 2.1117653594045107, "grad_norm": 0.32155025005340576, "learning_rate": 0.0001, "loss": 0.8168, "step": 2376 }, { "epoch": 2.11265414953894, "grad_norm": 0.627085268497467, "learning_rate": 0.0001, "loss": 0.8542, "step": 2377 }, { "epoch": 2.1135429396733696, "grad_norm": 0.32278314232826233, "learning_rate": 0.0001, "loss": 0.8938, "step": 2378 }, { "epoch": 2.1144317298077993, "grad_norm": 0.3442274332046509, "learning_rate": 0.0001, "loss": 0.9269, "step": 2379 }, { "epoch": 2.1153205199422285, "grad_norm": 0.3448584973812103, "learning_rate": 0.0001, "loss": 0.8243, "step": 2380 }, { "epoch": 2.116209310076658, "grad_norm": 0.3396780490875244, "learning_rate": 0.0001, "loss": 0.8122, "step": 2381 }, { "epoch": 2.117098100211088, "grad_norm": 0.3797731101512909, "learning_rate": 0.0001, "loss": 0.9614, "step": 2382 }, { "epoch": 2.117986890345517, "grad_norm": 0.41133296489715576, "learning_rate": 0.0001, "loss": 0.9597, "step": 2383 }, { "epoch": 2.1188756804799467, "grad_norm": 0.4031218886375427, "learning_rate": 0.0001, "loss": 0.9289, "step": 2384 }, { "epoch": 2.1197644706143763, "grad_norm": 0.3527853488922119, "learning_rate": 0.0001, "loss": 0.9029, "step": 2385 }, { "epoch": 2.1206532607488056, "grad_norm": 0.3235529363155365, "learning_rate": 0.0001, "loss": 0.9195, "step": 2386 }, { "epoch": 2.121542050883235, "grad_norm": 0.31495201587677, "learning_rate": 0.0001, "loss": 0.8524, "step": 2387 }, { "epoch": 2.122430841017665, "grad_norm": 0.378337562084198, "learning_rate": 0.0001, "loss": 0.8896, "step": 2388 }, { "epoch": 2.123319631152094, "grad_norm": 0.34486186504364014, "learning_rate": 0.0001, "loss": 0.9178, "step": 2389 }, { "epoch": 2.1242084212865238, "grad_norm": 0.36222806572914124, "learning_rate": 0.0001, "loss": 0.8327, "step": 2390 }, { "epoch": 2.1250972114209534, "grad_norm": 0.3327309191226959, "learning_rate": 0.0001, "loss": 0.8055, "step": 2391 }, { "epoch": 2.1259860015553826, "grad_norm": 0.3839544951915741, "learning_rate": 0.0001, "loss": 0.9453, "step": 2392 }, { "epoch": 2.1268747916898123, "grad_norm": 0.33836036920547485, "learning_rate": 0.0001, "loss": 0.8824, "step": 2393 }, { "epoch": 2.127763581824242, "grad_norm": 0.3374810218811035, "learning_rate": 0.0001, "loss": 0.8261, "step": 2394 }, { "epoch": 2.128652371958671, "grad_norm": 0.3791443109512329, "learning_rate": 0.0001, "loss": 0.9758, "step": 2395 }, { "epoch": 2.129541162093101, "grad_norm": 0.3412923216819763, "learning_rate": 0.0001, "loss": 0.8655, "step": 2396 }, { "epoch": 2.13042995222753, "grad_norm": 0.3597002625465393, "learning_rate": 0.0001, "loss": 0.8728, "step": 2397 }, { "epoch": 2.1313187423619597, "grad_norm": 0.35584625601768494, "learning_rate": 0.0001, "loss": 0.8583, "step": 2398 }, { "epoch": 2.1322075324963894, "grad_norm": 0.3240017890930176, "learning_rate": 0.0001, "loss": 0.9065, "step": 2399 }, { "epoch": 2.1330963226308186, "grad_norm": 0.31349146366119385, "learning_rate": 0.0001, "loss": 0.8752, "step": 2400 }, { "epoch": 2.1339851127652483, "grad_norm": 0.3268539011478424, "learning_rate": 0.0001, "loss": 0.867, "step": 2401 }, { "epoch": 2.134873902899678, "grad_norm": 1.1880651712417603, "learning_rate": 0.0001, "loss": 0.9108, "step": 2402 }, { "epoch": 2.135762693034107, "grad_norm": 0.32068613171577454, "learning_rate": 0.0001, "loss": 0.865, "step": 2403 }, { "epoch": 2.136651483168537, "grad_norm": 0.3671189248561859, "learning_rate": 0.0001, "loss": 0.922, "step": 2404 }, { "epoch": 2.1375402733029665, "grad_norm": 0.3628632724285126, "learning_rate": 0.0001, "loss": 0.8714, "step": 2405 }, { "epoch": 2.1384290634373957, "grad_norm": 0.38792744278907776, "learning_rate": 0.0001, "loss": 0.9345, "step": 2406 }, { "epoch": 2.1393178535718254, "grad_norm": 0.38119903206825256, "learning_rate": 0.0001, "loss": 0.8692, "step": 2407 }, { "epoch": 2.140206643706255, "grad_norm": 0.431945264339447, "learning_rate": 0.0001, "loss": 0.9405, "step": 2408 }, { "epoch": 2.1410954338406842, "grad_norm": 0.3509438633918762, "learning_rate": 0.0001, "loss": 0.9128, "step": 2409 }, { "epoch": 2.141984223975114, "grad_norm": 0.35793623328208923, "learning_rate": 0.0001, "loss": 0.8824, "step": 2410 }, { "epoch": 2.1428730141095436, "grad_norm": 0.6159213185310364, "learning_rate": 0.0001, "loss": 0.8774, "step": 2411 }, { "epoch": 2.143761804243973, "grad_norm": 0.36963677406311035, "learning_rate": 0.0001, "loss": 0.8521, "step": 2412 }, { "epoch": 2.1446505943784024, "grad_norm": 0.36160334944725037, "learning_rate": 0.0001, "loss": 0.8255, "step": 2413 }, { "epoch": 2.145539384512832, "grad_norm": 0.35096341371536255, "learning_rate": 0.0001, "loss": 0.955, "step": 2414 }, { "epoch": 2.1464281746472613, "grad_norm": 0.5263632535934448, "learning_rate": 0.0001, "loss": 0.9105, "step": 2415 }, { "epoch": 2.147316964781691, "grad_norm": 0.37115469574928284, "learning_rate": 0.0001, "loss": 0.8369, "step": 2416 }, { "epoch": 2.1482057549161206, "grad_norm": 0.3329596519470215, "learning_rate": 0.0001, "loss": 0.8283, "step": 2417 }, { "epoch": 2.14909454505055, "grad_norm": 0.5003162026405334, "learning_rate": 0.0001, "loss": 0.9056, "step": 2418 }, { "epoch": 2.1499833351849795, "grad_norm": 0.3137914538383484, "learning_rate": 0.0001, "loss": 0.8761, "step": 2419 }, { "epoch": 2.1508721253194087, "grad_norm": 0.35285452008247375, "learning_rate": 0.0001, "loss": 0.8635, "step": 2420 }, { "epoch": 2.1517609154538384, "grad_norm": 0.35029417276382446, "learning_rate": 0.0001, "loss": 0.8181, "step": 2421 }, { "epoch": 2.152649705588268, "grad_norm": 0.36478570103645325, "learning_rate": 0.0001, "loss": 0.8536, "step": 2422 }, { "epoch": 2.1535384957226973, "grad_norm": 0.43607574701309204, "learning_rate": 0.0001, "loss": 0.8965, "step": 2423 }, { "epoch": 2.154427285857127, "grad_norm": 0.36862248182296753, "learning_rate": 0.0001, "loss": 0.8031, "step": 2424 }, { "epoch": 2.1553160759915566, "grad_norm": 0.49188151955604553, "learning_rate": 0.0001, "loss": 0.9334, "step": 2425 }, { "epoch": 2.156204866125986, "grad_norm": 0.7494956254959106, "learning_rate": 0.0001, "loss": 0.9039, "step": 2426 }, { "epoch": 2.1570936562604155, "grad_norm": 0.9286481738090515, "learning_rate": 0.0001, "loss": 0.9374, "step": 2427 }, { "epoch": 2.157982446394845, "grad_norm": 0.4706733822822571, "learning_rate": 0.0001, "loss": 0.9537, "step": 2428 }, { "epoch": 2.1588712365292744, "grad_norm": 0.4074293076992035, "learning_rate": 0.0001, "loss": 0.8819, "step": 2429 }, { "epoch": 2.159760026663704, "grad_norm": 0.3723183870315552, "learning_rate": 0.0001, "loss": 0.8569, "step": 2430 }, { "epoch": 2.1606488167981337, "grad_norm": 0.5987865924835205, "learning_rate": 0.0001, "loss": 0.8495, "step": 2431 }, { "epoch": 2.161537606932563, "grad_norm": 0.3815048038959503, "learning_rate": 0.0001, "loss": 0.8312, "step": 2432 }, { "epoch": 2.1624263970669926, "grad_norm": 0.33688196539878845, "learning_rate": 0.0001, "loss": 0.8509, "step": 2433 }, { "epoch": 2.1633151872014222, "grad_norm": 0.3331957161426544, "learning_rate": 0.0001, "loss": 0.8446, "step": 2434 }, { "epoch": 2.1642039773358515, "grad_norm": 0.8897263407707214, "learning_rate": 0.0001, "loss": 0.7964, "step": 2435 }, { "epoch": 2.165092767470281, "grad_norm": 0.3500574231147766, "learning_rate": 0.0001, "loss": 0.8289, "step": 2436 }, { "epoch": 2.165981557604711, "grad_norm": 0.45759961009025574, "learning_rate": 0.0001, "loss": 0.8902, "step": 2437 }, { "epoch": 2.16687034773914, "grad_norm": 0.34242385625839233, "learning_rate": 0.0001, "loss": 0.8415, "step": 2438 }, { "epoch": 2.1677591378735697, "grad_norm": 0.4210834205150604, "learning_rate": 0.0001, "loss": 0.9238, "step": 2439 }, { "epoch": 2.1686479280079993, "grad_norm": 0.6454192399978638, "learning_rate": 0.0001, "loss": 0.8155, "step": 2440 }, { "epoch": 2.1695367181424285, "grad_norm": 0.34665006399154663, "learning_rate": 0.0001, "loss": 0.8507, "step": 2441 }, { "epoch": 2.170425508276858, "grad_norm": 0.3612930178642273, "learning_rate": 0.0001, "loss": 0.8899, "step": 2442 }, { "epoch": 2.1713142984112874, "grad_norm": 0.36009481549263, "learning_rate": 0.0001, "loss": 0.8985, "step": 2443 }, { "epoch": 2.172203088545717, "grad_norm": 0.392411470413208, "learning_rate": 0.0001, "loss": 0.947, "step": 2444 }, { "epoch": 2.1730918786801467, "grad_norm": 0.346246600151062, "learning_rate": 0.0001, "loss": 0.8688, "step": 2445 }, { "epoch": 2.173980668814576, "grad_norm": 0.3527586758136749, "learning_rate": 0.0001, "loss": 0.8966, "step": 2446 }, { "epoch": 2.1748694589490056, "grad_norm": 0.3666391968727112, "learning_rate": 0.0001, "loss": 0.945, "step": 2447 }, { "epoch": 2.1757582490834353, "grad_norm": 0.36609384417533875, "learning_rate": 0.0001, "loss": 0.844, "step": 2448 }, { "epoch": 2.1766470392178645, "grad_norm": 0.33666878938674927, "learning_rate": 0.0001, "loss": 0.7822, "step": 2449 }, { "epoch": 2.177535829352294, "grad_norm": 0.41050276160240173, "learning_rate": 0.0001, "loss": 0.9306, "step": 2450 }, { "epoch": 2.178424619486724, "grad_norm": 0.33751270174980164, "learning_rate": 0.0001, "loss": 0.8642, "step": 2451 }, { "epoch": 2.179313409621153, "grad_norm": 0.32639411091804504, "learning_rate": 0.0001, "loss": 0.8057, "step": 2452 }, { "epoch": 2.1802021997555827, "grad_norm": 0.34592291712760925, "learning_rate": 0.0001, "loss": 0.9396, "step": 2453 }, { "epoch": 2.1810909898900124, "grad_norm": 0.32606473565101624, "learning_rate": 0.0001, "loss": 0.8235, "step": 2454 }, { "epoch": 2.1819797800244416, "grad_norm": 0.33683574199676514, "learning_rate": 0.0001, "loss": 0.8805, "step": 2455 }, { "epoch": 2.1828685701588713, "grad_norm": 0.36452654004096985, "learning_rate": 0.0001, "loss": 0.8843, "step": 2456 }, { "epoch": 2.183757360293301, "grad_norm": 0.3444773256778717, "learning_rate": 0.0001, "loss": 0.9527, "step": 2457 }, { "epoch": 2.18464615042773, "grad_norm": 0.32133087515830994, "learning_rate": 0.0001, "loss": 0.8259, "step": 2458 }, { "epoch": 2.18553494056216, "grad_norm": 0.3569784462451935, "learning_rate": 0.0001, "loss": 0.9475, "step": 2459 }, { "epoch": 2.1864237306965895, "grad_norm": 0.3464709222316742, "learning_rate": 0.0001, "loss": 0.8372, "step": 2460 }, { "epoch": 2.1873125208310187, "grad_norm": 0.30103108286857605, "learning_rate": 0.0001, "loss": 0.843, "step": 2461 }, { "epoch": 2.1882013109654483, "grad_norm": 0.31629863381385803, "learning_rate": 0.0001, "loss": 0.8342, "step": 2462 }, { "epoch": 2.189090101099878, "grad_norm": 0.31571292877197266, "learning_rate": 0.0001, "loss": 0.8531, "step": 2463 }, { "epoch": 2.1899788912343072, "grad_norm": 0.36305055022239685, "learning_rate": 0.0001, "loss": 0.8899, "step": 2464 }, { "epoch": 2.190867681368737, "grad_norm": 0.31181925535202026, "learning_rate": 0.0001, "loss": 0.8869, "step": 2465 }, { "epoch": 2.191756471503166, "grad_norm": 0.33491456508636475, "learning_rate": 0.0001, "loss": 0.911, "step": 2466 }, { "epoch": 2.1926452616375958, "grad_norm": 0.3482362926006317, "learning_rate": 0.0001, "loss": 0.8501, "step": 2467 }, { "epoch": 2.1935340517720254, "grad_norm": 0.3489706516265869, "learning_rate": 0.0001, "loss": 0.854, "step": 2468 }, { "epoch": 2.1944228419064546, "grad_norm": 0.33438587188720703, "learning_rate": 0.0001, "loss": 0.8383, "step": 2469 }, { "epoch": 2.1953116320408843, "grad_norm": 0.3124372363090515, "learning_rate": 0.0001, "loss": 0.8203, "step": 2470 }, { "epoch": 2.196200422175314, "grad_norm": 0.349856972694397, "learning_rate": 0.0001, "loss": 0.9404, "step": 2471 }, { "epoch": 2.197089212309743, "grad_norm": 0.3826901316642761, "learning_rate": 0.0001, "loss": 0.9455, "step": 2472 }, { "epoch": 2.197978002444173, "grad_norm": 0.34957224130630493, "learning_rate": 0.0001, "loss": 0.938, "step": 2473 }, { "epoch": 2.1988667925786025, "grad_norm": 0.28016459941864014, "learning_rate": 0.0001, "loss": 0.9001, "step": 2474 }, { "epoch": 2.1997555827130317, "grad_norm": 0.39419126510620117, "learning_rate": 0.0001, "loss": 0.9849, "step": 2475 }, { "epoch": 2.2006443728474614, "grad_norm": 0.3508966267108917, "learning_rate": 0.0001, "loss": 0.8562, "step": 2476 }, { "epoch": 2.201533162981891, "grad_norm": 0.32400640845298767, "learning_rate": 0.0001, "loss": 0.8503, "step": 2477 }, { "epoch": 2.2024219531163203, "grad_norm": 0.31029975414276123, "learning_rate": 0.0001, "loss": 0.9593, "step": 2478 }, { "epoch": 2.20331074325075, "grad_norm": 0.34035494923591614, "learning_rate": 0.0001, "loss": 1.0073, "step": 2479 }, { "epoch": 2.2041995333851796, "grad_norm": 0.34316977858543396, "learning_rate": 0.0001, "loss": 0.8212, "step": 2480 }, { "epoch": 2.205088323519609, "grad_norm": 0.3366788625717163, "learning_rate": 0.0001, "loss": 0.8808, "step": 2481 }, { "epoch": 2.2059771136540385, "grad_norm": 0.36008214950561523, "learning_rate": 0.0001, "loss": 0.8823, "step": 2482 }, { "epoch": 2.206865903788468, "grad_norm": 0.3107230067253113, "learning_rate": 0.0001, "loss": 0.9265, "step": 2483 }, { "epoch": 2.2077546939228974, "grad_norm": 0.328205406665802, "learning_rate": 0.0001, "loss": 0.8796, "step": 2484 }, { "epoch": 2.208643484057327, "grad_norm": 0.33115193247795105, "learning_rate": 0.0001, "loss": 0.9549, "step": 2485 }, { "epoch": 2.2095322741917567, "grad_norm": 0.31935736536979675, "learning_rate": 0.0001, "loss": 0.9173, "step": 2486 }, { "epoch": 2.210421064326186, "grad_norm": 0.3087455630302429, "learning_rate": 0.0001, "loss": 0.8084, "step": 2487 }, { "epoch": 2.2113098544606156, "grad_norm": 0.38220590353012085, "learning_rate": 0.0001, "loss": 1.0343, "step": 2488 }, { "epoch": 2.212198644595045, "grad_norm": 0.3595533072948456, "learning_rate": 0.0001, "loss": 0.9296, "step": 2489 }, { "epoch": 2.2130874347294744, "grad_norm": 0.35236823558807373, "learning_rate": 0.0001, "loss": 0.9115, "step": 2490 }, { "epoch": 2.213976224863904, "grad_norm": 0.2661668360233307, "learning_rate": 0.0001, "loss": 0.829, "step": 2491 }, { "epoch": 2.2148650149983333, "grad_norm": 0.3272991478443146, "learning_rate": 0.0001, "loss": 0.8619, "step": 2492 }, { "epoch": 2.215753805132763, "grad_norm": 0.3380112648010254, "learning_rate": 0.0001, "loss": 0.8567, "step": 2493 }, { "epoch": 2.2166425952671927, "grad_norm": 0.36716368794441223, "learning_rate": 0.0001, "loss": 0.9482, "step": 2494 }, { "epoch": 2.217531385401622, "grad_norm": 0.32290229201316833, "learning_rate": 0.0001, "loss": 0.8206, "step": 2495 }, { "epoch": 2.2184201755360515, "grad_norm": 0.3434554934501648, "learning_rate": 0.0001, "loss": 0.829, "step": 2496 }, { "epoch": 2.219308965670481, "grad_norm": 0.3272440433502197, "learning_rate": 0.0001, "loss": 0.9398, "step": 2497 }, { "epoch": 2.2201977558049104, "grad_norm": 0.35004308819770813, "learning_rate": 0.0001, "loss": 0.9388, "step": 2498 }, { "epoch": 2.22108654593934, "grad_norm": 0.38155871629714966, "learning_rate": 0.0001, "loss": 0.8243, "step": 2499 }, { "epoch": 2.2219753360737697, "grad_norm": 0.36860787868499756, "learning_rate": 0.0001, "loss": 0.9036, "step": 2500 }, { "epoch": 2.2219753360737697, "step": 2500, "total_flos": 1.3367420220730245e+18, "train_loss": 0.9573205997228622, "train_runtime": 29224.5457, "train_samples_per_second": 1.369, "train_steps_per_second": 0.086 } ], "logging_steps": 1.0, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.3367420220730245e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }