{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5959253603486163, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002979626801743082, "grad_norm": 3.1244261264801025, "learning_rate": 0.0, "loss": 1.0791, "step": 1 }, { "epoch": 0.0005959253603486164, "grad_norm": 3.44380259513855, "learning_rate": 2.9761904761904764e-08, "loss": 1.1166, "step": 2 }, { "epoch": 0.0008938880405229246, "grad_norm": 3.0464375019073486, "learning_rate": 5.952380952380953e-08, "loss": 1.0773, "step": 3 }, { "epoch": 0.0011918507206972327, "grad_norm": 3.4371163845062256, "learning_rate": 8.928571428571429e-08, "loss": 1.1197, "step": 4 }, { "epoch": 0.0014898134008715408, "grad_norm": 3.463773727416992, "learning_rate": 1.1904761904761906e-07, "loss": 1.0671, "step": 5 }, { "epoch": 0.001787776081045849, "grad_norm": 3.2587881088256836, "learning_rate": 1.4880952380952382e-07, "loss": 1.044, "step": 6 }, { "epoch": 0.002085738761220157, "grad_norm": 3.1563422679901123, "learning_rate": 1.7857142857142858e-07, "loss": 1.067, "step": 7 }, { "epoch": 0.0023837014413944655, "grad_norm": 3.1642303466796875, "learning_rate": 2.0833333333333333e-07, "loss": 1.1304, "step": 8 }, { "epoch": 0.0026816641215687735, "grad_norm": 3.3693394660949707, "learning_rate": 2.3809523809523811e-07, "loss": 1.1305, "step": 9 }, { "epoch": 0.0029796268017430816, "grad_norm": 3.046869993209839, "learning_rate": 2.6785714285714284e-07, "loss": 1.0495, "step": 10 }, { "epoch": 0.0032775894819173897, "grad_norm": 3.0591135025024414, "learning_rate": 2.9761904761904765e-07, "loss": 1.079, "step": 11 }, { "epoch": 0.003575552162091698, "grad_norm": 3.081220865249634, "learning_rate": 3.273809523809524e-07, "loss": 1.0133, "step": 12 }, { "epoch": 0.0038735148422660063, "grad_norm": 2.966681718826294, "learning_rate": 3.5714285714285716e-07, "loss": 1.0237, "step": 13 }, { "epoch": 0.004171477522440314, "grad_norm": 3.099379777908325, "learning_rate": 3.8690476190476196e-07, "loss": 1.1378, "step": 14 }, { "epoch": 0.004469440202614623, "grad_norm": 3.090646505355835, "learning_rate": 4.1666666666666667e-07, "loss": 1.1272, "step": 15 }, { "epoch": 0.004767402882788931, "grad_norm": 2.8938851356506348, "learning_rate": 4.4642857142857147e-07, "loss": 1.0334, "step": 16 }, { "epoch": 0.005065365562963239, "grad_norm": 3.3159046173095703, "learning_rate": 4.7619047619047623e-07, "loss": 1.1159, "step": 17 }, { "epoch": 0.005363328243137547, "grad_norm": 2.672910451889038, "learning_rate": 5.05952380952381e-07, "loss": 1.044, "step": 18 }, { "epoch": 0.005661290923311855, "grad_norm": 2.416795015335083, "learning_rate": 5.357142857142857e-07, "loss": 1.0055, "step": 19 }, { "epoch": 0.005959253603486163, "grad_norm": 2.769660472869873, "learning_rate": 5.654761904761905e-07, "loss": 1.0578, "step": 20 }, { "epoch": 0.006257216283660471, "grad_norm": 3.1771910190582275, "learning_rate": 5.952380952380953e-07, "loss": 1.0242, "step": 21 }, { "epoch": 0.006555178963834779, "grad_norm": 2.6926465034484863, "learning_rate": 6.25e-07, "loss": 1.0106, "step": 22 }, { "epoch": 0.0068531416440090875, "grad_norm": 2.533829689025879, "learning_rate": 6.547619047619048e-07, "loss": 1.0589, "step": 23 }, { "epoch": 0.007151104324183396, "grad_norm": 2.9328370094299316, "learning_rate": 6.845238095238097e-07, "loss": 1.0738, "step": 24 }, { "epoch": 0.0074490670043577045, "grad_norm": 2.1759138107299805, "learning_rate": 7.142857142857143e-07, "loss": 1.0317, "step": 25 }, { "epoch": 0.0077470296845320126, "grad_norm": 1.8581618070602417, "learning_rate": 7.440476190476191e-07, "loss": 1.0521, "step": 26 }, { "epoch": 0.00804499236470632, "grad_norm": 1.785060167312622, "learning_rate": 7.738095238095239e-07, "loss": 1.0483, "step": 27 }, { "epoch": 0.008342955044880628, "grad_norm": 1.8366209268569946, "learning_rate": 8.035714285714287e-07, "loss": 1.0467, "step": 28 }, { "epoch": 0.008640917725054938, "grad_norm": 1.7094985246658325, "learning_rate": 8.333333333333333e-07, "loss": 1.08, "step": 29 }, { "epoch": 0.008938880405229246, "grad_norm": 1.4586992263793945, "learning_rate": 8.630952380952382e-07, "loss": 0.9621, "step": 30 }, { "epoch": 0.009236843085403554, "grad_norm": 1.654887080192566, "learning_rate": 8.928571428571429e-07, "loss": 1.0169, "step": 31 }, { "epoch": 0.009534805765577862, "grad_norm": 1.4168285131454468, "learning_rate": 9.226190476190477e-07, "loss": 0.9862, "step": 32 }, { "epoch": 0.00983276844575217, "grad_norm": 1.4141634702682495, "learning_rate": 9.523809523809525e-07, "loss": 0.9741, "step": 33 }, { "epoch": 0.010130731125926478, "grad_norm": 1.186896562576294, "learning_rate": 9.821428571428572e-07, "loss": 0.9686, "step": 34 }, { "epoch": 0.010428693806100786, "grad_norm": 1.0775161981582642, "learning_rate": 1.011904761904762e-06, "loss": 0.9944, "step": 35 }, { "epoch": 0.010726656486275094, "grad_norm": 1.0778316259384155, "learning_rate": 1.0416666666666667e-06, "loss": 0.9942, "step": 36 }, { "epoch": 0.011024619166449402, "grad_norm": 1.0128413438796997, "learning_rate": 1.0714285714285714e-06, "loss": 0.9727, "step": 37 }, { "epoch": 0.01132258184662371, "grad_norm": 1.043727159500122, "learning_rate": 1.1011904761904762e-06, "loss": 1.0207, "step": 38 }, { "epoch": 0.011620544526798018, "grad_norm": 1.0442272424697876, "learning_rate": 1.130952380952381e-06, "loss": 1.0429, "step": 39 }, { "epoch": 0.011918507206972326, "grad_norm": 0.8829795718193054, "learning_rate": 1.160714285714286e-06, "loss": 0.938, "step": 40 }, { "epoch": 0.012216469887146635, "grad_norm": 0.8920165300369263, "learning_rate": 1.1904761904761906e-06, "loss": 0.9499, "step": 41 }, { "epoch": 0.012514432567320943, "grad_norm": 0.8303807973861694, "learning_rate": 1.2202380952380952e-06, "loss": 1.0088, "step": 42 }, { "epoch": 0.01281239524749525, "grad_norm": 0.7994042038917542, "learning_rate": 1.25e-06, "loss": 0.9187, "step": 43 }, { "epoch": 0.013110357927669559, "grad_norm": 0.7477161884307861, "learning_rate": 1.2797619047619048e-06, "loss": 0.9482, "step": 44 }, { "epoch": 0.013408320607843867, "grad_norm": 0.75196373462677, "learning_rate": 1.3095238095238096e-06, "loss": 1.0033, "step": 45 }, { "epoch": 0.013706283288018175, "grad_norm": 0.6549458503723145, "learning_rate": 1.3392857142857143e-06, "loss": 0.9267, "step": 46 }, { "epoch": 0.014004245968192485, "grad_norm": 0.6505201458930969, "learning_rate": 1.3690476190476193e-06, "loss": 0.9653, "step": 47 }, { "epoch": 0.014302208648366793, "grad_norm": 0.6912044286727905, "learning_rate": 1.398809523809524e-06, "loss": 0.9638, "step": 48 }, { "epoch": 0.014600171328541101, "grad_norm": 0.7041192650794983, "learning_rate": 1.4285714285714286e-06, "loss": 0.95, "step": 49 }, { "epoch": 0.014898134008715409, "grad_norm": 0.7128690481185913, "learning_rate": 1.4583333333333335e-06, "loss": 0.9374, "step": 50 }, { "epoch": 0.015196096688889717, "grad_norm": 0.7611543536186218, "learning_rate": 1.4880952380952381e-06, "loss": 0.9481, "step": 51 }, { "epoch": 0.015494059369064025, "grad_norm": 0.7015216946601868, "learning_rate": 1.5178571428571428e-06, "loss": 0.9783, "step": 52 }, { "epoch": 0.01579202204923833, "grad_norm": 0.6188274621963501, "learning_rate": 1.5476190476190479e-06, "loss": 0.9205, "step": 53 }, { "epoch": 0.01608998472941264, "grad_norm": 0.5838772058486938, "learning_rate": 1.5773809523809525e-06, "loss": 0.8994, "step": 54 }, { "epoch": 0.016387947409586948, "grad_norm": 0.5457233786582947, "learning_rate": 1.6071428571428574e-06, "loss": 0.9036, "step": 55 }, { "epoch": 0.016685910089761256, "grad_norm": 0.5556498765945435, "learning_rate": 1.636904761904762e-06, "loss": 0.9701, "step": 56 }, { "epoch": 0.016983872769935567, "grad_norm": 0.5316476821899414, "learning_rate": 1.6666666666666667e-06, "loss": 0.9581, "step": 57 }, { "epoch": 0.017281835450109875, "grad_norm": 0.5063617825508118, "learning_rate": 1.6964285714285717e-06, "loss": 0.925, "step": 58 }, { "epoch": 0.017579798130284183, "grad_norm": 0.5297480225563049, "learning_rate": 1.7261904761904764e-06, "loss": 0.9422, "step": 59 }, { "epoch": 0.01787776081045849, "grad_norm": 0.6286386251449585, "learning_rate": 1.755952380952381e-06, "loss": 0.9976, "step": 60 }, { "epoch": 0.0181757234906328, "grad_norm": 0.6573434472084045, "learning_rate": 1.7857142857142859e-06, "loss": 0.9702, "step": 61 }, { "epoch": 0.018473686170807108, "grad_norm": 0.5362038016319275, "learning_rate": 1.8154761904761905e-06, "loss": 0.9038, "step": 62 }, { "epoch": 0.018771648850981416, "grad_norm": 0.5523419976234436, "learning_rate": 1.8452380952380954e-06, "loss": 0.9804, "step": 63 }, { "epoch": 0.019069611531155724, "grad_norm": 0.5619953870773315, "learning_rate": 1.8750000000000003e-06, "loss": 0.8545, "step": 64 }, { "epoch": 0.019367574211330032, "grad_norm": 0.5033801794052124, "learning_rate": 1.904761904761905e-06, "loss": 0.9159, "step": 65 }, { "epoch": 0.01966553689150434, "grad_norm": 0.512747585773468, "learning_rate": 1.9345238095238096e-06, "loss": 0.9277, "step": 66 }, { "epoch": 0.019963499571678648, "grad_norm": 0.4727635681629181, "learning_rate": 1.9642857142857144e-06, "loss": 0.9122, "step": 67 }, { "epoch": 0.020261462251852956, "grad_norm": 0.4830302596092224, "learning_rate": 1.9940476190476193e-06, "loss": 0.898, "step": 68 }, { "epoch": 0.020559424932027264, "grad_norm": 0.487798273563385, "learning_rate": 2.023809523809524e-06, "loss": 0.9453, "step": 69 }, { "epoch": 0.020857387612201572, "grad_norm": 0.4499029219150543, "learning_rate": 2.0535714285714286e-06, "loss": 0.8866, "step": 70 }, { "epoch": 0.02115535029237588, "grad_norm": 0.4831499755382538, "learning_rate": 2.0833333333333334e-06, "loss": 0.9475, "step": 71 }, { "epoch": 0.02145331297255019, "grad_norm": 0.47613057494163513, "learning_rate": 2.1130952380952383e-06, "loss": 0.9084, "step": 72 }, { "epoch": 0.021751275652724496, "grad_norm": 0.4876677393913269, "learning_rate": 2.1428571428571427e-06, "loss": 0.9322, "step": 73 }, { "epoch": 0.022049238332898805, "grad_norm": 0.46994251012802124, "learning_rate": 2.172619047619048e-06, "loss": 0.8968, "step": 74 }, { "epoch": 0.022347201013073113, "grad_norm": 0.470572292804718, "learning_rate": 2.2023809523809525e-06, "loss": 0.934, "step": 75 }, { "epoch": 0.02264516369324742, "grad_norm": 0.4928075969219208, "learning_rate": 2.2321428571428573e-06, "loss": 0.9155, "step": 76 }, { "epoch": 0.02294312637342173, "grad_norm": 0.47621583938598633, "learning_rate": 2.261904761904762e-06, "loss": 0.9037, "step": 77 }, { "epoch": 0.023241089053596037, "grad_norm": 0.47395530343055725, "learning_rate": 2.2916666666666666e-06, "loss": 0.9576, "step": 78 }, { "epoch": 0.023539051733770345, "grad_norm": 0.47508686780929565, "learning_rate": 2.321428571428572e-06, "loss": 0.9182, "step": 79 }, { "epoch": 0.023837014413944653, "grad_norm": 0.45137453079223633, "learning_rate": 2.3511904761904763e-06, "loss": 0.8932, "step": 80 }, { "epoch": 0.02413497709411896, "grad_norm": 0.43199121952056885, "learning_rate": 2.380952380952381e-06, "loss": 0.8907, "step": 81 }, { "epoch": 0.02443293977429327, "grad_norm": 0.4940590262413025, "learning_rate": 2.410714285714286e-06, "loss": 0.9687, "step": 82 }, { "epoch": 0.024730902454467577, "grad_norm": 0.4669753611087799, "learning_rate": 2.4404761904761905e-06, "loss": 0.9435, "step": 83 }, { "epoch": 0.025028865134641885, "grad_norm": 0.45019495487213135, "learning_rate": 2.4702380952380953e-06, "loss": 0.8945, "step": 84 }, { "epoch": 0.025326827814816193, "grad_norm": 0.45232030749320984, "learning_rate": 2.5e-06, "loss": 0.9446, "step": 85 }, { "epoch": 0.0256247904949905, "grad_norm": 0.42479407787323, "learning_rate": 2.529761904761905e-06, "loss": 0.832, "step": 86 }, { "epoch": 0.02592275317516481, "grad_norm": 0.47908833622932434, "learning_rate": 2.5595238095238095e-06, "loss": 0.9384, "step": 87 }, { "epoch": 0.026220715855339118, "grad_norm": 0.49192699790000916, "learning_rate": 2.5892857142857148e-06, "loss": 0.9669, "step": 88 }, { "epoch": 0.026518678535513426, "grad_norm": 0.43540194630622864, "learning_rate": 2.6190476190476192e-06, "loss": 0.9107, "step": 89 }, { "epoch": 0.026816641215687734, "grad_norm": 0.4820593595504761, "learning_rate": 2.648809523809524e-06, "loss": 0.9847, "step": 90 }, { "epoch": 0.027114603895862042, "grad_norm": 0.46626460552215576, "learning_rate": 2.6785714285714285e-06, "loss": 0.9219, "step": 91 }, { "epoch": 0.02741256657603635, "grad_norm": 0.47896644473075867, "learning_rate": 2.7083333333333334e-06, "loss": 0.9815, "step": 92 }, { "epoch": 0.027710529256210658, "grad_norm": 0.46742215752601624, "learning_rate": 2.7380952380952387e-06, "loss": 0.8771, "step": 93 }, { "epoch": 0.02800849193638497, "grad_norm": 0.44341200590133667, "learning_rate": 2.767857142857143e-06, "loss": 0.9431, "step": 94 }, { "epoch": 0.028306454616559278, "grad_norm": 0.463361918926239, "learning_rate": 2.797619047619048e-06, "loss": 0.9401, "step": 95 }, { "epoch": 0.028604417296733586, "grad_norm": 0.453514039516449, "learning_rate": 2.8273809523809524e-06, "loss": 0.8765, "step": 96 }, { "epoch": 0.028902379976907894, "grad_norm": 0.4397393465042114, "learning_rate": 2.8571428571428573e-06, "loss": 0.8968, "step": 97 }, { "epoch": 0.029200342657082202, "grad_norm": 0.4268377423286438, "learning_rate": 2.8869047619047617e-06, "loss": 0.8797, "step": 98 }, { "epoch": 0.02949830533725651, "grad_norm": 0.47780126333236694, "learning_rate": 2.916666666666667e-06, "loss": 0.9006, "step": 99 }, { "epoch": 0.029796268017430818, "grad_norm": 0.4212696850299835, "learning_rate": 2.946428571428572e-06, "loss": 0.9118, "step": 100 }, { "epoch": 0.030094230697605126, "grad_norm": 0.4879840016365051, "learning_rate": 2.9761904761904763e-06, "loss": 0.9467, "step": 101 }, { "epoch": 0.030392193377779434, "grad_norm": 0.4617227613925934, "learning_rate": 3.005952380952381e-06, "loss": 0.8902, "step": 102 }, { "epoch": 0.030690156057953742, "grad_norm": 0.44039371609687805, "learning_rate": 3.0357142857142856e-06, "loss": 0.9228, "step": 103 }, { "epoch": 0.03098811873812805, "grad_norm": 0.4614720344543457, "learning_rate": 3.065476190476191e-06, "loss": 0.971, "step": 104 }, { "epoch": 0.03128608141830236, "grad_norm": 0.4567113220691681, "learning_rate": 3.0952380952380957e-06, "loss": 0.9025, "step": 105 }, { "epoch": 0.03158404409847666, "grad_norm": 0.4666142761707306, "learning_rate": 3.125e-06, "loss": 0.9523, "step": 106 }, { "epoch": 0.031882006778650974, "grad_norm": 0.429106742143631, "learning_rate": 3.154761904761905e-06, "loss": 0.9029, "step": 107 }, { "epoch": 0.03217996945882528, "grad_norm": 0.421153724193573, "learning_rate": 3.1845238095238094e-06, "loss": 0.8562, "step": 108 }, { "epoch": 0.03247793213899959, "grad_norm": 0.428710013628006, "learning_rate": 3.2142857142857147e-06, "loss": 0.8686, "step": 109 }, { "epoch": 0.032775894819173895, "grad_norm": 0.4266265630722046, "learning_rate": 3.2440476190476196e-06, "loss": 0.9008, "step": 110 }, { "epoch": 0.03307385749934821, "grad_norm": 0.4408479928970337, "learning_rate": 3.273809523809524e-06, "loss": 0.8632, "step": 111 }, { "epoch": 0.03337182017952251, "grad_norm": 0.43926018476486206, "learning_rate": 3.303571428571429e-06, "loss": 0.8777, "step": 112 }, { "epoch": 0.03366978285969682, "grad_norm": 0.44635215401649475, "learning_rate": 3.3333333333333333e-06, "loss": 0.9039, "step": 113 }, { "epoch": 0.033967745539871134, "grad_norm": 0.4410278797149658, "learning_rate": 3.3630952380952386e-06, "loss": 0.9095, "step": 114 }, { "epoch": 0.03426570822004544, "grad_norm": 0.4349464476108551, "learning_rate": 3.3928571428571435e-06, "loss": 0.9068, "step": 115 }, { "epoch": 0.03456367090021975, "grad_norm": 0.4464796185493469, "learning_rate": 3.422619047619048e-06, "loss": 0.8984, "step": 116 }, { "epoch": 0.034861633580394055, "grad_norm": 0.440498024225235, "learning_rate": 3.4523809523809528e-06, "loss": 0.8895, "step": 117 }, { "epoch": 0.03515959626056837, "grad_norm": 0.4387143552303314, "learning_rate": 3.482142857142857e-06, "loss": 0.8553, "step": 118 }, { "epoch": 0.03545755894074267, "grad_norm": 0.4271283745765686, "learning_rate": 3.511904761904762e-06, "loss": 0.9052, "step": 119 }, { "epoch": 0.03575552162091698, "grad_norm": 0.45256221294403076, "learning_rate": 3.5416666666666673e-06, "loss": 0.9058, "step": 120 }, { "epoch": 0.03605348430109129, "grad_norm": 0.47053638100624084, "learning_rate": 3.5714285714285718e-06, "loss": 0.9059, "step": 121 }, { "epoch": 0.0363514469812656, "grad_norm": 0.45863860845565796, "learning_rate": 3.6011904761904766e-06, "loss": 0.9446, "step": 122 }, { "epoch": 0.036649409661439904, "grad_norm": 0.41932588815689087, "learning_rate": 3.630952380952381e-06, "loss": 0.8577, "step": 123 }, { "epoch": 0.036947372341614215, "grad_norm": 0.45671793818473816, "learning_rate": 3.660714285714286e-06, "loss": 0.8999, "step": 124 }, { "epoch": 0.03724533502178852, "grad_norm": 0.46893569827079773, "learning_rate": 3.690476190476191e-06, "loss": 0.9324, "step": 125 }, { "epoch": 0.03754329770196283, "grad_norm": 0.4448380470275879, "learning_rate": 3.7202380952380957e-06, "loss": 0.9114, "step": 126 }, { "epoch": 0.037841260382137136, "grad_norm": 0.4434012174606323, "learning_rate": 3.7500000000000005e-06, "loss": 0.8897, "step": 127 }, { "epoch": 0.03813922306231145, "grad_norm": 0.4325112998485565, "learning_rate": 3.779761904761905e-06, "loss": 0.8819, "step": 128 }, { "epoch": 0.03843718574248575, "grad_norm": 0.43405723571777344, "learning_rate": 3.80952380952381e-06, "loss": 0.8718, "step": 129 }, { "epoch": 0.038735148422660064, "grad_norm": 0.4202880859375, "learning_rate": 3.839285714285715e-06, "loss": 0.9105, "step": 130 }, { "epoch": 0.03903311110283437, "grad_norm": 0.4502437710762024, "learning_rate": 3.869047619047619e-06, "loss": 0.8891, "step": 131 }, { "epoch": 0.03933107378300868, "grad_norm": 0.45120131969451904, "learning_rate": 3.898809523809524e-06, "loss": 0.8806, "step": 132 }, { "epoch": 0.039629036463182984, "grad_norm": 0.45555320382118225, "learning_rate": 3.928571428571429e-06, "loss": 0.9092, "step": 133 }, { "epoch": 0.039926999143357296, "grad_norm": 0.47341638803482056, "learning_rate": 3.958333333333333e-06, "loss": 0.89, "step": 134 }, { "epoch": 0.0402249618235316, "grad_norm": 0.44647476077079773, "learning_rate": 3.9880952380952386e-06, "loss": 0.8783, "step": 135 }, { "epoch": 0.04052292450370591, "grad_norm": 0.44241106510162354, "learning_rate": 4.017857142857143e-06, "loss": 0.9057, "step": 136 }, { "epoch": 0.04082088718388022, "grad_norm": 0.44842639565467834, "learning_rate": 4.047619047619048e-06, "loss": 0.8812, "step": 137 }, { "epoch": 0.04111884986405453, "grad_norm": 0.4141370952129364, "learning_rate": 4.077380952380953e-06, "loss": 0.8659, "step": 138 }, { "epoch": 0.04141681254422883, "grad_norm": 0.4200192093849182, "learning_rate": 4.107142857142857e-06, "loss": 0.8981, "step": 139 }, { "epoch": 0.041714775224403144, "grad_norm": 0.42679327726364136, "learning_rate": 4.136904761904762e-06, "loss": 0.8819, "step": 140 }, { "epoch": 0.04201273790457745, "grad_norm": 0.4665552079677582, "learning_rate": 4.166666666666667e-06, "loss": 0.905, "step": 141 }, { "epoch": 0.04231070058475176, "grad_norm": 0.44266387820243835, "learning_rate": 4.196428571428572e-06, "loss": 0.9064, "step": 142 }, { "epoch": 0.042608663264926065, "grad_norm": 0.47972339391708374, "learning_rate": 4.226190476190477e-06, "loss": 0.9095, "step": 143 }, { "epoch": 0.04290662594510038, "grad_norm": 0.45169147849082947, "learning_rate": 4.255952380952381e-06, "loss": 0.9141, "step": 144 }, { "epoch": 0.04320458862527468, "grad_norm": 0.42670682072639465, "learning_rate": 4.2857142857142855e-06, "loss": 0.8293, "step": 145 }, { "epoch": 0.04350255130544899, "grad_norm": 0.44024792313575745, "learning_rate": 4.315476190476191e-06, "loss": 0.8798, "step": 146 }, { "epoch": 0.0438005139856233, "grad_norm": 0.4199204444885254, "learning_rate": 4.345238095238096e-06, "loss": 0.8516, "step": 147 }, { "epoch": 0.04409847666579761, "grad_norm": 0.4429978132247925, "learning_rate": 4.3750000000000005e-06, "loss": 0.909, "step": 148 }, { "epoch": 0.044396439345971914, "grad_norm": 0.44615527987480164, "learning_rate": 4.404761904761905e-06, "loss": 0.8753, "step": 149 }, { "epoch": 0.044694402026146225, "grad_norm": 0.4358450472354889, "learning_rate": 4.434523809523809e-06, "loss": 0.8705, "step": 150 }, { "epoch": 0.04499236470632054, "grad_norm": 0.4348714351654053, "learning_rate": 4.464285714285715e-06, "loss": 0.9207, "step": 151 }, { "epoch": 0.04529032738649484, "grad_norm": 0.4443501830101013, "learning_rate": 4.49404761904762e-06, "loss": 0.8558, "step": 152 }, { "epoch": 0.04558829006666915, "grad_norm": 0.4495562016963959, "learning_rate": 4.523809523809524e-06, "loss": 0.9077, "step": 153 }, { "epoch": 0.04588625274684346, "grad_norm": 0.4733486771583557, "learning_rate": 4.553571428571429e-06, "loss": 0.9129, "step": 154 }, { "epoch": 0.04618421542701777, "grad_norm": 0.44653400778770447, "learning_rate": 4.583333333333333e-06, "loss": 0.9177, "step": 155 }, { "epoch": 0.046482178107192074, "grad_norm": 0.4165500998497009, "learning_rate": 4.6130952380952385e-06, "loss": 0.8254, "step": 156 }, { "epoch": 0.046780140787366385, "grad_norm": 0.43224039673805237, "learning_rate": 4.642857142857144e-06, "loss": 0.8474, "step": 157 }, { "epoch": 0.04707810346754069, "grad_norm": 0.41379058361053467, "learning_rate": 4.672619047619048e-06, "loss": 0.8491, "step": 158 }, { "epoch": 0.047376066147715, "grad_norm": 0.46250954270362854, "learning_rate": 4.702380952380953e-06, "loss": 0.8944, "step": 159 }, { "epoch": 0.047674028827889306, "grad_norm": 0.4260331392288208, "learning_rate": 4.732142857142857e-06, "loss": 0.8812, "step": 160 }, { "epoch": 0.04797199150806362, "grad_norm": 0.7473785281181335, "learning_rate": 4.761904761904762e-06, "loss": 0.8824, "step": 161 }, { "epoch": 0.04826995418823792, "grad_norm": 0.4553568959236145, "learning_rate": 4.791666666666668e-06, "loss": 0.8871, "step": 162 }, { "epoch": 0.048567916868412234, "grad_norm": 0.44239333271980286, "learning_rate": 4.821428571428572e-06, "loss": 0.8757, "step": 163 }, { "epoch": 0.04886587954858654, "grad_norm": 0.5126583576202393, "learning_rate": 4.8511904761904765e-06, "loss": 0.9648, "step": 164 }, { "epoch": 0.04916384222876085, "grad_norm": 0.4421823024749756, "learning_rate": 4.880952380952381e-06, "loss": 0.8868, "step": 165 }, { "epoch": 0.049461804908935154, "grad_norm": 0.4590631425380707, "learning_rate": 4.910714285714286e-06, "loss": 0.8645, "step": 166 }, { "epoch": 0.049759767589109466, "grad_norm": 0.43286260962486267, "learning_rate": 4.940476190476191e-06, "loss": 0.9016, "step": 167 }, { "epoch": 0.05005773026928377, "grad_norm": 0.44442832469940186, "learning_rate": 4.970238095238096e-06, "loss": 0.884, "step": 168 }, { "epoch": 0.05035569294945808, "grad_norm": 0.4276241958141327, "learning_rate": 5e-06, "loss": 0.8364, "step": 169 }, { "epoch": 0.05065365562963239, "grad_norm": 0.41683584451675415, "learning_rate": 5.029761904761905e-06, "loss": 0.8192, "step": 170 }, { "epoch": 0.0509516183098067, "grad_norm": 0.4456952214241028, "learning_rate": 5.05952380952381e-06, "loss": 0.9499, "step": 171 }, { "epoch": 0.051249580989981, "grad_norm": 0.44173622131347656, "learning_rate": 5.0892857142857146e-06, "loss": 0.8524, "step": 172 }, { "epoch": 0.051547543670155314, "grad_norm": 0.4558558762073517, "learning_rate": 5.119047619047619e-06, "loss": 0.9077, "step": 173 }, { "epoch": 0.05184550635032962, "grad_norm": 0.42778849601745605, "learning_rate": 5.1488095238095234e-06, "loss": 0.8791, "step": 174 }, { "epoch": 0.05214346903050393, "grad_norm": 0.44972923398017883, "learning_rate": 5.1785714285714296e-06, "loss": 0.8228, "step": 175 }, { "epoch": 0.052441431710678235, "grad_norm": 0.4365461766719818, "learning_rate": 5.208333333333334e-06, "loss": 0.8482, "step": 176 }, { "epoch": 0.05273939439085255, "grad_norm": 0.44673120975494385, "learning_rate": 5.2380952380952384e-06, "loss": 0.8984, "step": 177 }, { "epoch": 0.05303735707102685, "grad_norm": 0.4192390441894531, "learning_rate": 5.267857142857144e-06, "loss": 0.8376, "step": 178 }, { "epoch": 0.05333531975120116, "grad_norm": 0.4381980895996094, "learning_rate": 5.297619047619048e-06, "loss": 0.8781, "step": 179 }, { "epoch": 0.05363328243137547, "grad_norm": 0.4356313645839691, "learning_rate": 5.327380952380953e-06, "loss": 0.8816, "step": 180 }, { "epoch": 0.05393124511154978, "grad_norm": 0.44520509243011475, "learning_rate": 5.357142857142857e-06, "loss": 0.8691, "step": 181 }, { "epoch": 0.054229207791724084, "grad_norm": 0.44160404801368713, "learning_rate": 5.386904761904762e-06, "loss": 0.8315, "step": 182 }, { "epoch": 0.054527170471898395, "grad_norm": 0.4544958472251892, "learning_rate": 5.416666666666667e-06, "loss": 0.8707, "step": 183 }, { "epoch": 0.0548251331520727, "grad_norm": 0.44830402731895447, "learning_rate": 5.446428571428571e-06, "loss": 0.8906, "step": 184 }, { "epoch": 0.05512309583224701, "grad_norm": 0.4348909258842468, "learning_rate": 5.476190476190477e-06, "loss": 0.8344, "step": 185 }, { "epoch": 0.055421058512421316, "grad_norm": 0.43994957208633423, "learning_rate": 5.505952380952382e-06, "loss": 0.8617, "step": 186 }, { "epoch": 0.05571902119259563, "grad_norm": 0.42906102538108826, "learning_rate": 5.535714285714286e-06, "loss": 0.8581, "step": 187 }, { "epoch": 0.05601698387276994, "grad_norm": 0.43206915259361267, "learning_rate": 5.5654761904761915e-06, "loss": 0.8104, "step": 188 }, { "epoch": 0.056314946552944244, "grad_norm": 0.4183652698993683, "learning_rate": 5.595238095238096e-06, "loss": 0.845, "step": 189 }, { "epoch": 0.056612909233118555, "grad_norm": 0.4581888020038605, "learning_rate": 5.625e-06, "loss": 0.8133, "step": 190 }, { "epoch": 0.05691087191329286, "grad_norm": 0.4345604479312897, "learning_rate": 5.654761904761905e-06, "loss": 0.8912, "step": 191 }, { "epoch": 0.05720883459346717, "grad_norm": 0.4140756130218506, "learning_rate": 5.68452380952381e-06, "loss": 0.8372, "step": 192 }, { "epoch": 0.057506797273641476, "grad_norm": 0.4593667685985565, "learning_rate": 5.7142857142857145e-06, "loss": 0.9017, "step": 193 }, { "epoch": 0.05780475995381579, "grad_norm": 0.45131516456604004, "learning_rate": 5.744047619047619e-06, "loss": 0.8617, "step": 194 }, { "epoch": 0.05810272263399009, "grad_norm": 0.46563607454299927, "learning_rate": 5.773809523809523e-06, "loss": 0.8698, "step": 195 }, { "epoch": 0.058400685314164404, "grad_norm": 0.4295847415924072, "learning_rate": 5.8035714285714295e-06, "loss": 0.8934, "step": 196 }, { "epoch": 0.05869864799433871, "grad_norm": 0.4323939085006714, "learning_rate": 5.833333333333334e-06, "loss": 0.8464, "step": 197 }, { "epoch": 0.05899661067451302, "grad_norm": 0.43190956115722656, "learning_rate": 5.863095238095239e-06, "loss": 0.8173, "step": 198 }, { "epoch": 0.059294573354687324, "grad_norm": 0.4357495605945587, "learning_rate": 5.892857142857144e-06, "loss": 0.8204, "step": 199 }, { "epoch": 0.059592536034861636, "grad_norm": 0.4913332164287567, "learning_rate": 5.922619047619048e-06, "loss": 0.8507, "step": 200 }, { "epoch": 0.05989049871503594, "grad_norm": 0.44041842222213745, "learning_rate": 5.9523809523809525e-06, "loss": 0.8634, "step": 201 }, { "epoch": 0.06018846139521025, "grad_norm": 0.4505321681499481, "learning_rate": 5.982142857142858e-06, "loss": 0.8607, "step": 202 }, { "epoch": 0.06048642407538456, "grad_norm": 0.43640005588531494, "learning_rate": 6.011904761904762e-06, "loss": 0.8371, "step": 203 }, { "epoch": 0.06078438675555887, "grad_norm": 0.4439584016799927, "learning_rate": 6.041666666666667e-06, "loss": 0.8969, "step": 204 }, { "epoch": 0.06108234943573317, "grad_norm": 0.4225601553916931, "learning_rate": 6.071428571428571e-06, "loss": 0.8679, "step": 205 }, { "epoch": 0.061380312115907484, "grad_norm": 0.4633278250694275, "learning_rate": 6.101190476190477e-06, "loss": 0.8758, "step": 206 }, { "epoch": 0.06167827479608179, "grad_norm": 0.45114865899086, "learning_rate": 6.130952380952382e-06, "loss": 0.8672, "step": 207 }, { "epoch": 0.0619762374762561, "grad_norm": 0.43823346495628357, "learning_rate": 6.160714285714286e-06, "loss": 0.8154, "step": 208 }, { "epoch": 0.062274200156430405, "grad_norm": 0.43499240279197693, "learning_rate": 6.1904761904761914e-06, "loss": 0.8114, "step": 209 }, { "epoch": 0.06257216283660472, "grad_norm": 0.4600598216056824, "learning_rate": 6.220238095238096e-06, "loss": 0.8233, "step": 210 }, { "epoch": 0.06287012551677902, "grad_norm": 0.45372816920280457, "learning_rate": 6.25e-06, "loss": 0.892, "step": 211 }, { "epoch": 0.06316808819695333, "grad_norm": 0.4123630225658417, "learning_rate": 6.279761904761906e-06, "loss": 0.8195, "step": 212 }, { "epoch": 0.06346605087712764, "grad_norm": 0.425900936126709, "learning_rate": 6.30952380952381e-06, "loss": 0.8401, "step": 213 }, { "epoch": 0.06376401355730195, "grad_norm": 0.4495628774166107, "learning_rate": 6.3392857142857145e-06, "loss": 0.8707, "step": 214 }, { "epoch": 0.06406197623747625, "grad_norm": 0.45770758390426636, "learning_rate": 6.369047619047619e-06, "loss": 0.884, "step": 215 }, { "epoch": 0.06435993891765056, "grad_norm": 0.43433791399002075, "learning_rate": 6.398809523809524e-06, "loss": 0.8851, "step": 216 }, { "epoch": 0.06465790159782488, "grad_norm": 0.45469143986701965, "learning_rate": 6.4285714285714295e-06, "loss": 0.8428, "step": 217 }, { "epoch": 0.06495586427799918, "grad_norm": 0.4268864095211029, "learning_rate": 6.458333333333334e-06, "loss": 0.8159, "step": 218 }, { "epoch": 0.06525382695817349, "grad_norm": 0.45345690846443176, "learning_rate": 6.488095238095239e-06, "loss": 0.897, "step": 219 }, { "epoch": 0.06555178963834779, "grad_norm": 0.4494103193283081, "learning_rate": 6.517857142857144e-06, "loss": 0.8642, "step": 220 }, { "epoch": 0.06584975231852211, "grad_norm": 0.44220754504203796, "learning_rate": 6.547619047619048e-06, "loss": 0.8561, "step": 221 }, { "epoch": 0.06614771499869641, "grad_norm": 0.4213166832923889, "learning_rate": 6.5773809523809525e-06, "loss": 0.8176, "step": 222 }, { "epoch": 0.06644567767887072, "grad_norm": 0.41464102268218994, "learning_rate": 6.607142857142858e-06, "loss": 0.7852, "step": 223 }, { "epoch": 0.06674364035904502, "grad_norm": 0.4477230906486511, "learning_rate": 6.636904761904762e-06, "loss": 0.8522, "step": 224 }, { "epoch": 0.06704160303921934, "grad_norm": 0.4465818405151367, "learning_rate": 6.666666666666667e-06, "loss": 0.8233, "step": 225 }, { "epoch": 0.06733956571939365, "grad_norm": 0.44877326488494873, "learning_rate": 6.696428571428571e-06, "loss": 0.8299, "step": 226 }, { "epoch": 0.06763752839956795, "grad_norm": 0.4402409791946411, "learning_rate": 6.726190476190477e-06, "loss": 0.8232, "step": 227 }, { "epoch": 0.06793549107974227, "grad_norm": 0.42664897441864014, "learning_rate": 6.755952380952382e-06, "loss": 0.8381, "step": 228 }, { "epoch": 0.06823345375991657, "grad_norm": 0.4469190835952759, "learning_rate": 6.785714285714287e-06, "loss": 0.8382, "step": 229 }, { "epoch": 0.06853141644009088, "grad_norm": 0.4511435329914093, "learning_rate": 6.815476190476191e-06, "loss": 0.8829, "step": 230 }, { "epoch": 0.06882937912026518, "grad_norm": 0.43204864859580994, "learning_rate": 6.845238095238096e-06, "loss": 0.8522, "step": 231 }, { "epoch": 0.0691273418004395, "grad_norm": 0.4274302124977112, "learning_rate": 6.875e-06, "loss": 0.8728, "step": 232 }, { "epoch": 0.0694253044806138, "grad_norm": 0.44673213362693787, "learning_rate": 6.9047619047619055e-06, "loss": 0.8657, "step": 233 }, { "epoch": 0.06972326716078811, "grad_norm": 0.4676547944545746, "learning_rate": 6.93452380952381e-06, "loss": 0.9529, "step": 234 }, { "epoch": 0.07002122984096242, "grad_norm": 0.4478471279144287, "learning_rate": 6.964285714285714e-06, "loss": 0.8192, "step": 235 }, { "epoch": 0.07031919252113673, "grad_norm": 0.4769634008407593, "learning_rate": 6.994047619047619e-06, "loss": 0.8546, "step": 236 }, { "epoch": 0.07061715520131104, "grad_norm": 0.4771283268928528, "learning_rate": 7.023809523809524e-06, "loss": 0.9309, "step": 237 }, { "epoch": 0.07091511788148534, "grad_norm": 0.4384443163871765, "learning_rate": 7.053571428571429e-06, "loss": 0.8646, "step": 238 }, { "epoch": 0.07121308056165965, "grad_norm": 0.44072097539901733, "learning_rate": 7.083333333333335e-06, "loss": 0.8803, "step": 239 }, { "epoch": 0.07151104324183397, "grad_norm": 0.45785021781921387, "learning_rate": 7.113095238095239e-06, "loss": 0.8373, "step": 240 }, { "epoch": 0.07180900592200827, "grad_norm": 0.431471586227417, "learning_rate": 7.1428571428571436e-06, "loss": 0.8369, "step": 241 }, { "epoch": 0.07210696860218258, "grad_norm": 0.41161608695983887, "learning_rate": 7.172619047619048e-06, "loss": 0.8022, "step": 242 }, { "epoch": 0.07240493128235688, "grad_norm": 0.46845054626464844, "learning_rate": 7.202380952380953e-06, "loss": 0.8721, "step": 243 }, { "epoch": 0.0727028939625312, "grad_norm": 0.43750834465026855, "learning_rate": 7.232142857142858e-06, "loss": 0.8554, "step": 244 }, { "epoch": 0.0730008566427055, "grad_norm": 0.4418376088142395, "learning_rate": 7.261904761904762e-06, "loss": 0.7971, "step": 245 }, { "epoch": 0.07329881932287981, "grad_norm": 0.467026948928833, "learning_rate": 7.291666666666667e-06, "loss": 0.8905, "step": 246 }, { "epoch": 0.07359678200305411, "grad_norm": 0.4285699129104614, "learning_rate": 7.321428571428572e-06, "loss": 0.8548, "step": 247 }, { "epoch": 0.07389474468322843, "grad_norm": 0.4519418478012085, "learning_rate": 7.351190476190477e-06, "loss": 0.8449, "step": 248 }, { "epoch": 0.07419270736340274, "grad_norm": 0.4394064247608185, "learning_rate": 7.380952380952382e-06, "loss": 0.8235, "step": 249 }, { "epoch": 0.07449067004357704, "grad_norm": 0.4810203015804291, "learning_rate": 7.410714285714287e-06, "loss": 0.901, "step": 250 }, { "epoch": 0.07478863272375134, "grad_norm": 0.43417516350746155, "learning_rate": 7.440476190476191e-06, "loss": 0.8469, "step": 251 }, { "epoch": 0.07508659540392566, "grad_norm": 0.44011834263801575, "learning_rate": 7.470238095238096e-06, "loss": 0.8403, "step": 252 }, { "epoch": 0.07538455808409997, "grad_norm": 0.4219614863395691, "learning_rate": 7.500000000000001e-06, "loss": 0.7862, "step": 253 }, { "epoch": 0.07568252076427427, "grad_norm": 0.4581497311592102, "learning_rate": 7.5297619047619055e-06, "loss": 0.8715, "step": 254 }, { "epoch": 0.07598048344444858, "grad_norm": 0.42541468143463135, "learning_rate": 7.55952380952381e-06, "loss": 0.8086, "step": 255 }, { "epoch": 0.0762784461246229, "grad_norm": 0.4385511875152588, "learning_rate": 7.589285714285714e-06, "loss": 0.8344, "step": 256 }, { "epoch": 0.0765764088047972, "grad_norm": 0.4721318185329437, "learning_rate": 7.61904761904762e-06, "loss": 0.9129, "step": 257 }, { "epoch": 0.0768743714849715, "grad_norm": 0.4345463514328003, "learning_rate": 7.648809523809523e-06, "loss": 0.8777, "step": 258 }, { "epoch": 0.07717233416514581, "grad_norm": 0.47514408826828003, "learning_rate": 7.67857142857143e-06, "loss": 0.8465, "step": 259 }, { "epoch": 0.07747029684532013, "grad_norm": 0.4220348596572876, "learning_rate": 7.708333333333334e-06, "loss": 0.8236, "step": 260 }, { "epoch": 0.07776825952549443, "grad_norm": 0.4870074689388275, "learning_rate": 7.738095238095238e-06, "loss": 0.8636, "step": 261 }, { "epoch": 0.07806622220566874, "grad_norm": 0.44031956791877747, "learning_rate": 7.767857142857144e-06, "loss": 0.8036, "step": 262 }, { "epoch": 0.07836418488584306, "grad_norm": 0.46993595361709595, "learning_rate": 7.797619047619049e-06, "loss": 0.8744, "step": 263 }, { "epoch": 0.07866214756601736, "grad_norm": 0.4338163137435913, "learning_rate": 7.827380952380953e-06, "loss": 0.8596, "step": 264 }, { "epoch": 0.07896011024619166, "grad_norm": 0.4179791510105133, "learning_rate": 7.857142857142858e-06, "loss": 0.8182, "step": 265 }, { "epoch": 0.07925807292636597, "grad_norm": 0.434553861618042, "learning_rate": 7.886904761904762e-06, "loss": 0.8126, "step": 266 }, { "epoch": 0.07955603560654029, "grad_norm": 0.44742393493652344, "learning_rate": 7.916666666666667e-06, "loss": 0.9022, "step": 267 }, { "epoch": 0.07985399828671459, "grad_norm": 0.4458036720752716, "learning_rate": 7.946428571428571e-06, "loss": 0.8876, "step": 268 }, { "epoch": 0.0801519609668889, "grad_norm": 0.43985581398010254, "learning_rate": 7.976190476190477e-06, "loss": 0.8213, "step": 269 }, { "epoch": 0.0804499236470632, "grad_norm": 0.46211549639701843, "learning_rate": 8.005952380952382e-06, "loss": 0.8251, "step": 270 }, { "epoch": 0.08074788632723752, "grad_norm": 0.44748368859291077, "learning_rate": 8.035714285714286e-06, "loss": 0.8527, "step": 271 }, { "epoch": 0.08104584900741182, "grad_norm": 0.4547558128833771, "learning_rate": 8.065476190476192e-06, "loss": 0.8539, "step": 272 }, { "epoch": 0.08134381168758613, "grad_norm": 0.4298979938030243, "learning_rate": 8.095238095238097e-06, "loss": 0.8308, "step": 273 }, { "epoch": 0.08164177436776043, "grad_norm": 0.4201894700527191, "learning_rate": 8.125000000000001e-06, "loss": 0.7705, "step": 274 }, { "epoch": 0.08193973704793475, "grad_norm": 0.4482576549053192, "learning_rate": 8.154761904761905e-06, "loss": 0.7786, "step": 275 }, { "epoch": 0.08223769972810906, "grad_norm": 0.4452459216117859, "learning_rate": 8.18452380952381e-06, "loss": 0.8253, "step": 276 }, { "epoch": 0.08253566240828336, "grad_norm": 0.46517354249954224, "learning_rate": 8.214285714285714e-06, "loss": 0.8713, "step": 277 }, { "epoch": 0.08283362508845767, "grad_norm": 0.43335846066474915, "learning_rate": 8.244047619047619e-06, "loss": 0.8498, "step": 278 }, { "epoch": 0.08313158776863198, "grad_norm": 0.44680821895599365, "learning_rate": 8.273809523809523e-06, "loss": 0.8586, "step": 279 }, { "epoch": 0.08342955044880629, "grad_norm": 0.4653940796852112, "learning_rate": 8.30357142857143e-06, "loss": 0.8745, "step": 280 }, { "epoch": 0.0837275131289806, "grad_norm": 0.4642374515533447, "learning_rate": 8.333333333333334e-06, "loss": 0.8634, "step": 281 }, { "epoch": 0.0840254758091549, "grad_norm": 0.41972967982292175, "learning_rate": 8.36309523809524e-06, "loss": 0.8017, "step": 282 }, { "epoch": 0.08432343848932922, "grad_norm": 0.4606432020664215, "learning_rate": 8.392857142857144e-06, "loss": 0.8669, "step": 283 }, { "epoch": 0.08462140116950352, "grad_norm": 0.4177170693874359, "learning_rate": 8.422619047619049e-06, "loss": 0.7702, "step": 284 }, { "epoch": 0.08491936384967783, "grad_norm": 0.4431699812412262, "learning_rate": 8.452380952380953e-06, "loss": 0.8088, "step": 285 }, { "epoch": 0.08521732652985213, "grad_norm": 0.42295727133750916, "learning_rate": 8.482142857142858e-06, "loss": 0.7878, "step": 286 }, { "epoch": 0.08551528921002645, "grad_norm": 0.47301793098449707, "learning_rate": 8.511904761904762e-06, "loss": 0.9065, "step": 287 }, { "epoch": 0.08581325189020075, "grad_norm": 0.45198720693588257, "learning_rate": 8.541666666666666e-06, "loss": 0.8367, "step": 288 }, { "epoch": 0.08611121457037506, "grad_norm": 0.4279087483882904, "learning_rate": 8.571428571428571e-06, "loss": 0.8425, "step": 289 }, { "epoch": 0.08640917725054936, "grad_norm": 0.46242713928222656, "learning_rate": 8.601190476190477e-06, "loss": 0.863, "step": 290 }, { "epoch": 0.08670713993072368, "grad_norm": 0.4571658968925476, "learning_rate": 8.630952380952381e-06, "loss": 0.8399, "step": 291 }, { "epoch": 0.08700510261089799, "grad_norm": 0.4324505627155304, "learning_rate": 8.660714285714286e-06, "loss": 0.8386, "step": 292 }, { "epoch": 0.08730306529107229, "grad_norm": 0.4449789822101593, "learning_rate": 8.690476190476192e-06, "loss": 0.8195, "step": 293 }, { "epoch": 0.0876010279712466, "grad_norm": 0.44687506556510925, "learning_rate": 8.720238095238096e-06, "loss": 0.8626, "step": 294 }, { "epoch": 0.08789899065142091, "grad_norm": 0.4689542055130005, "learning_rate": 8.750000000000001e-06, "loss": 0.8919, "step": 295 }, { "epoch": 0.08819695333159522, "grad_norm": 0.45117467641830444, "learning_rate": 8.779761904761905e-06, "loss": 0.8111, "step": 296 }, { "epoch": 0.08849491601176952, "grad_norm": 0.4455942213535309, "learning_rate": 8.80952380952381e-06, "loss": 0.8325, "step": 297 }, { "epoch": 0.08879287869194383, "grad_norm": 0.4475480318069458, "learning_rate": 8.839285714285714e-06, "loss": 0.8813, "step": 298 }, { "epoch": 0.08909084137211815, "grad_norm": 0.4561273455619812, "learning_rate": 8.869047619047619e-06, "loss": 0.8432, "step": 299 }, { "epoch": 0.08938880405229245, "grad_norm": 0.4637228548526764, "learning_rate": 8.898809523809525e-06, "loss": 0.8244, "step": 300 }, { "epoch": 0.08968676673246676, "grad_norm": 0.4699828624725342, "learning_rate": 8.92857142857143e-06, "loss": 0.8929, "step": 301 }, { "epoch": 0.08998472941264107, "grad_norm": 0.41704750061035156, "learning_rate": 8.958333333333334e-06, "loss": 0.8017, "step": 302 }, { "epoch": 0.09028269209281538, "grad_norm": 0.46975040435791016, "learning_rate": 8.98809523809524e-06, "loss": 0.8518, "step": 303 }, { "epoch": 0.09058065477298968, "grad_norm": 0.4363666772842407, "learning_rate": 9.017857142857144e-06, "loss": 0.7656, "step": 304 }, { "epoch": 0.09087861745316399, "grad_norm": 0.4625988006591797, "learning_rate": 9.047619047619049e-06, "loss": 0.9311, "step": 305 }, { "epoch": 0.0911765801333383, "grad_norm": 0.45612746477127075, "learning_rate": 9.077380952380953e-06, "loss": 0.8161, "step": 306 }, { "epoch": 0.09147454281351261, "grad_norm": 0.44814276695251465, "learning_rate": 9.107142857142858e-06, "loss": 0.808, "step": 307 }, { "epoch": 0.09177250549368692, "grad_norm": 0.45561328530311584, "learning_rate": 9.136904761904762e-06, "loss": 0.8347, "step": 308 }, { "epoch": 0.09207046817386122, "grad_norm": 0.4581974148750305, "learning_rate": 9.166666666666666e-06, "loss": 0.8393, "step": 309 }, { "epoch": 0.09236843085403554, "grad_norm": 0.4595828354358673, "learning_rate": 9.196428571428571e-06, "loss": 0.8555, "step": 310 }, { "epoch": 0.09266639353420984, "grad_norm": 0.4495246708393097, "learning_rate": 9.226190476190477e-06, "loss": 0.8571, "step": 311 }, { "epoch": 0.09296435621438415, "grad_norm": 0.430372416973114, "learning_rate": 9.255952380952381e-06, "loss": 0.811, "step": 312 }, { "epoch": 0.09326231889455845, "grad_norm": 0.4460920989513397, "learning_rate": 9.285714285714288e-06, "loss": 0.8714, "step": 313 }, { "epoch": 0.09356028157473277, "grad_norm": 0.4535922706127167, "learning_rate": 9.315476190476192e-06, "loss": 0.8481, "step": 314 }, { "epoch": 0.09385824425490708, "grad_norm": 0.4503169655799866, "learning_rate": 9.345238095238096e-06, "loss": 0.8073, "step": 315 }, { "epoch": 0.09415620693508138, "grad_norm": 0.4372020363807678, "learning_rate": 9.375000000000001e-06, "loss": 0.8383, "step": 316 }, { "epoch": 0.09445416961525568, "grad_norm": 0.4314536154270172, "learning_rate": 9.404761904761905e-06, "loss": 0.8458, "step": 317 }, { "epoch": 0.09475213229543, "grad_norm": 0.46163350343704224, "learning_rate": 9.43452380952381e-06, "loss": 0.7861, "step": 318 }, { "epoch": 0.09505009497560431, "grad_norm": 0.4365135729312897, "learning_rate": 9.464285714285714e-06, "loss": 0.8201, "step": 319 }, { "epoch": 0.09534805765577861, "grad_norm": 0.44700679183006287, "learning_rate": 9.494047619047619e-06, "loss": 0.8428, "step": 320 }, { "epoch": 0.09564602033595292, "grad_norm": 0.4734896719455719, "learning_rate": 9.523809523809525e-06, "loss": 0.843, "step": 321 }, { "epoch": 0.09594398301612724, "grad_norm": 0.4385888874530792, "learning_rate": 9.55357142857143e-06, "loss": 0.8571, "step": 322 }, { "epoch": 0.09624194569630154, "grad_norm": 0.435771644115448, "learning_rate": 9.583333333333335e-06, "loss": 0.8199, "step": 323 }, { "epoch": 0.09653990837647584, "grad_norm": 0.4613741636276245, "learning_rate": 9.61309523809524e-06, "loss": 0.8007, "step": 324 }, { "epoch": 0.09683787105665015, "grad_norm": 0.44617676734924316, "learning_rate": 9.642857142857144e-06, "loss": 0.8375, "step": 325 }, { "epoch": 0.09713583373682447, "grad_norm": 0.47633469104766846, "learning_rate": 9.672619047619049e-06, "loss": 0.8671, "step": 326 }, { "epoch": 0.09743379641699877, "grad_norm": 0.4520438313484192, "learning_rate": 9.702380952380953e-06, "loss": 0.8374, "step": 327 }, { "epoch": 0.09773175909717308, "grad_norm": 0.45184773206710815, "learning_rate": 9.732142857142858e-06, "loss": 0.8357, "step": 328 }, { "epoch": 0.09802972177734738, "grad_norm": 0.44382697343826294, "learning_rate": 9.761904761904762e-06, "loss": 0.8052, "step": 329 }, { "epoch": 0.0983276844575217, "grad_norm": 0.4614221453666687, "learning_rate": 9.791666666666666e-06, "loss": 0.8192, "step": 330 }, { "epoch": 0.098625647137696, "grad_norm": 0.4119229316711426, "learning_rate": 9.821428571428573e-06, "loss": 0.8091, "step": 331 }, { "epoch": 0.09892360981787031, "grad_norm": 0.435222327709198, "learning_rate": 9.851190476190477e-06, "loss": 0.8437, "step": 332 }, { "epoch": 0.09922157249804461, "grad_norm": 0.43353399634361267, "learning_rate": 9.880952380952381e-06, "loss": 0.8329, "step": 333 }, { "epoch": 0.09951953517821893, "grad_norm": 0.462616890668869, "learning_rate": 9.910714285714288e-06, "loss": 0.8742, "step": 334 }, { "epoch": 0.09981749785839324, "grad_norm": 0.4411180019378662, "learning_rate": 9.940476190476192e-06, "loss": 0.7696, "step": 335 }, { "epoch": 0.10011546053856754, "grad_norm": 0.461515873670578, "learning_rate": 9.970238095238096e-06, "loss": 0.8183, "step": 336 }, { "epoch": 0.10041342321874185, "grad_norm": 0.4494532644748688, "learning_rate": 1e-05, "loss": 0.8568, "step": 337 }, { "epoch": 0.10071138589891616, "grad_norm": 0.4770655632019043, "learning_rate": 9.99999729642598e-06, "loss": 0.8497, "step": 338 }, { "epoch": 0.10100934857909047, "grad_norm": 0.464408278465271, "learning_rate": 9.999989185706846e-06, "loss": 0.8591, "step": 339 }, { "epoch": 0.10130731125926477, "grad_norm": 0.45243039727211, "learning_rate": 9.999975667851366e-06, "loss": 0.8139, "step": 340 }, { "epoch": 0.10160527393943909, "grad_norm": 0.4484204947948456, "learning_rate": 9.999956742874162e-06, "loss": 0.8169, "step": 341 }, { "epoch": 0.1019032366196134, "grad_norm": 0.44056135416030884, "learning_rate": 9.999932410795697e-06, "loss": 0.8453, "step": 342 }, { "epoch": 0.1022011992997877, "grad_norm": 0.4422775208950043, "learning_rate": 9.999902671642285e-06, "loss": 0.8621, "step": 343 }, { "epoch": 0.102499161979962, "grad_norm": 0.43026235699653625, "learning_rate": 9.99986752544609e-06, "loss": 0.7996, "step": 344 }, { "epoch": 0.10279712466013632, "grad_norm": 0.4360259473323822, "learning_rate": 9.999826972245115e-06, "loss": 0.8296, "step": 345 }, { "epoch": 0.10309508734031063, "grad_norm": 0.46168243885040283, "learning_rate": 9.99978101208322e-06, "loss": 0.875, "step": 346 }, { "epoch": 0.10339305002048493, "grad_norm": 0.44053173065185547, "learning_rate": 9.999729645010105e-06, "loss": 0.8376, "step": 347 }, { "epoch": 0.10369101270065924, "grad_norm": 0.4546683728694916, "learning_rate": 9.99967287108132e-06, "loss": 0.8286, "step": 348 }, { "epoch": 0.10398897538083356, "grad_norm": 0.43043702840805054, "learning_rate": 9.999610690358263e-06, "loss": 0.8071, "step": 349 }, { "epoch": 0.10428693806100786, "grad_norm": 0.42279356718063354, "learning_rate": 9.999543102908178e-06, "loss": 0.8177, "step": 350 }, { "epoch": 0.10458490074118217, "grad_norm": 0.4474245607852936, "learning_rate": 9.999470108804156e-06, "loss": 0.806, "step": 351 }, { "epoch": 0.10488286342135647, "grad_norm": 0.4298347532749176, "learning_rate": 9.999391708125134e-06, "loss": 0.8269, "step": 352 }, { "epoch": 0.10518082610153079, "grad_norm": 0.45362961292266846, "learning_rate": 9.999307900955898e-06, "loss": 0.8196, "step": 353 }, { "epoch": 0.1054787887817051, "grad_norm": 0.47332948446273804, "learning_rate": 9.999218687387081e-06, "loss": 0.8233, "step": 354 }, { "epoch": 0.1057767514618794, "grad_norm": 0.4430951774120331, "learning_rate": 9.999124067515158e-06, "loss": 0.7823, "step": 355 }, { "epoch": 0.1060747141420537, "grad_norm": 0.45825430750846863, "learning_rate": 9.999024041442455e-06, "loss": 0.806, "step": 356 }, { "epoch": 0.10637267682222802, "grad_norm": 0.44608044624328613, "learning_rate": 9.998918609277144e-06, "loss": 0.7771, "step": 357 }, { "epoch": 0.10667063950240233, "grad_norm": 0.4534968435764313, "learning_rate": 9.998807771133241e-06, "loss": 0.844, "step": 358 }, { "epoch": 0.10696860218257663, "grad_norm": 0.49156180024147034, "learning_rate": 9.998691527130609e-06, "loss": 0.8696, "step": 359 }, { "epoch": 0.10726656486275093, "grad_norm": 0.4962518513202667, "learning_rate": 9.998569877394961e-06, "loss": 0.8413, "step": 360 }, { "epoch": 0.10756452754292525, "grad_norm": 0.4482788145542145, "learning_rate": 9.99844282205785e-06, "loss": 0.7665, "step": 361 }, { "epoch": 0.10786249022309956, "grad_norm": 0.4612559378147125, "learning_rate": 9.998310361256678e-06, "loss": 0.856, "step": 362 }, { "epoch": 0.10816045290327386, "grad_norm": 0.4336289167404175, "learning_rate": 9.998172495134692e-06, "loss": 0.7627, "step": 363 }, { "epoch": 0.10845841558344817, "grad_norm": 0.46035903692245483, "learning_rate": 9.998029223840986e-06, "loss": 0.7778, "step": 364 }, { "epoch": 0.10875637826362249, "grad_norm": 0.4648987948894501, "learning_rate": 9.997880547530494e-06, "loss": 0.88, "step": 365 }, { "epoch": 0.10905434094379679, "grad_norm": 0.4816872775554657, "learning_rate": 9.997726466364003e-06, "loss": 0.8894, "step": 366 }, { "epoch": 0.1093523036239711, "grad_norm": 0.445965051651001, "learning_rate": 9.99756698050814e-06, "loss": 0.7752, "step": 367 }, { "epoch": 0.1096502663041454, "grad_norm": 0.4469074308872223, "learning_rate": 9.997402090135377e-06, "loss": 0.7908, "step": 368 }, { "epoch": 0.10994822898431972, "grad_norm": 0.45743241906166077, "learning_rate": 9.99723179542403e-06, "loss": 0.8262, "step": 369 }, { "epoch": 0.11024619166449402, "grad_norm": 0.4579828679561615, "learning_rate": 9.997056096558264e-06, "loss": 0.8464, "step": 370 }, { "epoch": 0.11054415434466833, "grad_norm": 0.42541900277137756, "learning_rate": 9.996874993728083e-06, "loss": 0.773, "step": 371 }, { "epoch": 0.11084211702484263, "grad_norm": 0.47497400641441345, "learning_rate": 9.996688487129335e-06, "loss": 0.8227, "step": 372 }, { "epoch": 0.11114007970501695, "grad_norm": 0.4370492994785309, "learning_rate": 9.996496576963716e-06, "loss": 0.8387, "step": 373 }, { "epoch": 0.11143804238519125, "grad_norm": 0.43119439482688904, "learning_rate": 9.996299263438765e-06, "loss": 0.7698, "step": 374 }, { "epoch": 0.11173600506536556, "grad_norm": 0.43310031294822693, "learning_rate": 9.99609654676786e-06, "loss": 0.7699, "step": 375 }, { "epoch": 0.11203396774553988, "grad_norm": 0.47822192311286926, "learning_rate": 9.995888427170226e-06, "loss": 0.8429, "step": 376 }, { "epoch": 0.11233193042571418, "grad_norm": 0.45406052470207214, "learning_rate": 9.995674904870929e-06, "loss": 0.8128, "step": 377 }, { "epoch": 0.11262989310588849, "grad_norm": 0.4138409495353699, "learning_rate": 9.99545598010088e-06, "loss": 0.7859, "step": 378 }, { "epoch": 0.11292785578606279, "grad_norm": 0.43571943044662476, "learning_rate": 9.995231653096826e-06, "loss": 0.7812, "step": 379 }, { "epoch": 0.11322581846623711, "grad_norm": 0.44058656692504883, "learning_rate": 9.995001924101368e-06, "loss": 0.7777, "step": 380 }, { "epoch": 0.11352378114641141, "grad_norm": 0.4526267349720001, "learning_rate": 9.994766793362936e-06, "loss": 0.8221, "step": 381 }, { "epoch": 0.11382174382658572, "grad_norm": 0.4531581997871399, "learning_rate": 9.99452626113581e-06, "loss": 0.8714, "step": 382 }, { "epoch": 0.11411970650676002, "grad_norm": 0.4537730813026428, "learning_rate": 9.994280327680109e-06, "loss": 0.8046, "step": 383 }, { "epoch": 0.11441766918693434, "grad_norm": 0.4391308128833771, "learning_rate": 9.994028993261789e-06, "loss": 0.8068, "step": 384 }, { "epoch": 0.11471563186710865, "grad_norm": 0.47409799695014954, "learning_rate": 9.993772258152656e-06, "loss": 0.818, "step": 385 }, { "epoch": 0.11501359454728295, "grad_norm": 0.4408448338508606, "learning_rate": 9.993510122630346e-06, "loss": 0.8178, "step": 386 }, { "epoch": 0.11531155722745726, "grad_norm": 0.44910043478012085, "learning_rate": 9.993242586978345e-06, "loss": 0.8454, "step": 387 }, { "epoch": 0.11560951990763157, "grad_norm": 0.4543640911579132, "learning_rate": 9.992969651485968e-06, "loss": 0.8315, "step": 388 }, { "epoch": 0.11590748258780588, "grad_norm": 0.41494789719581604, "learning_rate": 9.992691316448382e-06, "loss": 0.7609, "step": 389 }, { "epoch": 0.11620544526798018, "grad_norm": 0.48728737235069275, "learning_rate": 9.992407582166582e-06, "loss": 0.8313, "step": 390 }, { "epoch": 0.11650340794815449, "grad_norm": 0.4488702416419983, "learning_rate": 9.992118448947408e-06, "loss": 0.8095, "step": 391 }, { "epoch": 0.11680137062832881, "grad_norm": 0.42593449354171753, "learning_rate": 9.991823917103539e-06, "loss": 0.83, "step": 392 }, { "epoch": 0.11709933330850311, "grad_norm": 0.45822906494140625, "learning_rate": 9.991523986953487e-06, "loss": 0.815, "step": 393 }, { "epoch": 0.11739729598867742, "grad_norm": 0.42860767245292664, "learning_rate": 9.991218658821609e-06, "loss": 0.7676, "step": 394 }, { "epoch": 0.11769525866885172, "grad_norm": 0.4358139634132385, "learning_rate": 9.990907933038091e-06, "loss": 0.7755, "step": 395 }, { "epoch": 0.11799322134902604, "grad_norm": 0.4387064278125763, "learning_rate": 9.990591809938968e-06, "loss": 0.8135, "step": 396 }, { "epoch": 0.11829118402920034, "grad_norm": 0.442857563495636, "learning_rate": 9.990270289866099e-06, "loss": 0.8091, "step": 397 }, { "epoch": 0.11858914670937465, "grad_norm": 0.419209748506546, "learning_rate": 9.989943373167189e-06, "loss": 0.8249, "step": 398 }, { "epoch": 0.11888710938954895, "grad_norm": 0.4570047855377197, "learning_rate": 9.98961106019577e-06, "loss": 0.8933, "step": 399 }, { "epoch": 0.11918507206972327, "grad_norm": 0.4366416037082672, "learning_rate": 9.989273351311222e-06, "loss": 0.8162, "step": 400 }, { "epoch": 0.11948303474989758, "grad_norm": 0.4550735354423523, "learning_rate": 9.98893024687875e-06, "loss": 0.8217, "step": 401 }, { "epoch": 0.11978099743007188, "grad_norm": 0.43284013867378235, "learning_rate": 9.988581747269397e-06, "loss": 0.8131, "step": 402 }, { "epoch": 0.12007896011024619, "grad_norm": 0.4315564036369324, "learning_rate": 9.988227852860042e-06, "loss": 0.8065, "step": 403 }, { "epoch": 0.1203769227904205, "grad_norm": 0.4316180646419525, "learning_rate": 9.987868564033396e-06, "loss": 0.7969, "step": 404 }, { "epoch": 0.12067488547059481, "grad_norm": 0.45488640666007996, "learning_rate": 9.987503881178004e-06, "loss": 0.8231, "step": 405 }, { "epoch": 0.12097284815076911, "grad_norm": 0.4185773432254791, "learning_rate": 9.987133804688247e-06, "loss": 0.7916, "step": 406 }, { "epoch": 0.12127081083094342, "grad_norm": 0.4486062824726105, "learning_rate": 9.986758334964333e-06, "loss": 0.8391, "step": 407 }, { "epoch": 0.12156877351111774, "grad_norm": 0.4506734311580658, "learning_rate": 9.986377472412311e-06, "loss": 0.8327, "step": 408 }, { "epoch": 0.12186673619129204, "grad_norm": 0.43692904710769653, "learning_rate": 9.985991217444053e-06, "loss": 0.7939, "step": 409 }, { "epoch": 0.12216469887146635, "grad_norm": 0.4649176001548767, "learning_rate": 9.98559957047727e-06, "loss": 0.8608, "step": 410 }, { "epoch": 0.12246266155164065, "grad_norm": 0.43806663155555725, "learning_rate": 9.985202531935496e-06, "loss": 0.7629, "step": 411 }, { "epoch": 0.12276062423181497, "grad_norm": 0.4385770559310913, "learning_rate": 9.984800102248105e-06, "loss": 0.8069, "step": 412 }, { "epoch": 0.12305858691198927, "grad_norm": 0.41685423254966736, "learning_rate": 9.984392281850293e-06, "loss": 0.8103, "step": 413 }, { "epoch": 0.12335654959216358, "grad_norm": 0.43887874484062195, "learning_rate": 9.98397907118309e-06, "loss": 0.7902, "step": 414 }, { "epoch": 0.1236545122723379, "grad_norm": 0.4662448465824127, "learning_rate": 9.983560470693354e-06, "loss": 0.8686, "step": 415 }, { "epoch": 0.1239524749525122, "grad_norm": 0.4399791359901428, "learning_rate": 9.983136480833773e-06, "loss": 0.8166, "step": 416 }, { "epoch": 0.1242504376326865, "grad_norm": 0.41617995500564575, "learning_rate": 9.982707102062863e-06, "loss": 0.8155, "step": 417 }, { "epoch": 0.12454840031286081, "grad_norm": 0.4192359447479248, "learning_rate": 9.982272334844964e-06, "loss": 0.795, "step": 418 }, { "epoch": 0.12484636299303513, "grad_norm": 0.4392685294151306, "learning_rate": 9.981832179650251e-06, "loss": 0.8034, "step": 419 }, { "epoch": 0.12514432567320943, "grad_norm": 0.4487253427505493, "learning_rate": 9.981386636954713e-06, "loss": 0.837, "step": 420 }, { "epoch": 0.12544228835338375, "grad_norm": 0.4271675646305084, "learning_rate": 9.98093570724018e-06, "loss": 0.7799, "step": 421 }, { "epoch": 0.12574025103355804, "grad_norm": 0.44783201813697815, "learning_rate": 9.9804793909943e-06, "loss": 0.8396, "step": 422 }, { "epoch": 0.12603821371373236, "grad_norm": 0.4500182271003723, "learning_rate": 9.980017688710542e-06, "loss": 0.8214, "step": 423 }, { "epoch": 0.12633617639390665, "grad_norm": 0.45347052812576294, "learning_rate": 9.97955060088821e-06, "loss": 0.8384, "step": 424 }, { "epoch": 0.12663413907408097, "grad_norm": 0.4413428008556366, "learning_rate": 9.979078128032424e-06, "loss": 0.8109, "step": 425 }, { "epoch": 0.1269321017542553, "grad_norm": 0.4482523798942566, "learning_rate": 9.978600270654131e-06, "loss": 0.8163, "step": 426 }, { "epoch": 0.12723006443442958, "grad_norm": 0.4338665306568146, "learning_rate": 9.978117029270098e-06, "loss": 0.8161, "step": 427 }, { "epoch": 0.1275280271146039, "grad_norm": 0.43441489338874817, "learning_rate": 9.977628404402918e-06, "loss": 0.8336, "step": 428 }, { "epoch": 0.12782598979477822, "grad_norm": 0.4508003890514374, "learning_rate": 9.977134396581008e-06, "loss": 0.7987, "step": 429 }, { "epoch": 0.1281239524749525, "grad_norm": 0.42367222905158997, "learning_rate": 9.976635006338598e-06, "loss": 0.7722, "step": 430 }, { "epoch": 0.12842191515512683, "grad_norm": 0.42868027091026306, "learning_rate": 9.976130234215743e-06, "loss": 0.7623, "step": 431 }, { "epoch": 0.12871987783530112, "grad_norm": 0.4461642801761627, "learning_rate": 9.975620080758321e-06, "loss": 0.815, "step": 432 }, { "epoch": 0.12901784051547543, "grad_norm": 0.4450134038925171, "learning_rate": 9.975104546518026e-06, "loss": 0.7492, "step": 433 }, { "epoch": 0.12931580319564975, "grad_norm": 0.44301509857177734, "learning_rate": 9.974583632052373e-06, "loss": 0.8026, "step": 434 }, { "epoch": 0.12961376587582404, "grad_norm": 0.4301476776599884, "learning_rate": 9.974057337924695e-06, "loss": 0.8118, "step": 435 }, { "epoch": 0.12991172855599836, "grad_norm": 0.46609169244766235, "learning_rate": 9.973525664704137e-06, "loss": 0.8148, "step": 436 }, { "epoch": 0.13020969123617268, "grad_norm": 0.4507199823856354, "learning_rate": 9.972988612965673e-06, "loss": 0.8282, "step": 437 }, { "epoch": 0.13050765391634697, "grad_norm": 0.4439927935600281, "learning_rate": 9.972446183290082e-06, "loss": 0.8319, "step": 438 }, { "epoch": 0.1308056165965213, "grad_norm": 0.4421440064907074, "learning_rate": 9.971898376263966e-06, "loss": 0.7802, "step": 439 }, { "epoch": 0.13110357927669558, "grad_norm": 0.43711912631988525, "learning_rate": 9.971345192479738e-06, "loss": 0.7945, "step": 440 }, { "epoch": 0.1314015419568699, "grad_norm": 0.43256676197052, "learning_rate": 9.970786632535627e-06, "loss": 0.8015, "step": 441 }, { "epoch": 0.13169950463704422, "grad_norm": 0.44375714659690857, "learning_rate": 9.970222697035679e-06, "loss": 0.8725, "step": 442 }, { "epoch": 0.1319974673172185, "grad_norm": 0.4452434182167053, "learning_rate": 9.969653386589749e-06, "loss": 0.813, "step": 443 }, { "epoch": 0.13229542999739283, "grad_norm": 0.4254850447177887, "learning_rate": 9.969078701813505e-06, "loss": 0.7976, "step": 444 }, { "epoch": 0.13259339267756715, "grad_norm": 0.434270441532135, "learning_rate": 9.968498643328427e-06, "loss": 0.881, "step": 445 }, { "epoch": 0.13289135535774144, "grad_norm": 0.4523266851902008, "learning_rate": 9.967913211761813e-06, "loss": 0.8292, "step": 446 }, { "epoch": 0.13318931803791575, "grad_norm": 0.48667654395103455, "learning_rate": 9.967322407746762e-06, "loss": 0.8422, "step": 447 }, { "epoch": 0.13348728071809005, "grad_norm": 0.42540228366851807, "learning_rate": 9.966726231922188e-06, "loss": 0.7824, "step": 448 }, { "epoch": 0.13378524339826436, "grad_norm": 0.4267347455024719, "learning_rate": 9.966124684932811e-06, "loss": 0.8036, "step": 449 }, { "epoch": 0.13408320607843868, "grad_norm": 0.41667813062667847, "learning_rate": 9.965517767429165e-06, "loss": 0.7872, "step": 450 }, { "epoch": 0.13438116875861297, "grad_norm": 0.46928688883781433, "learning_rate": 9.964905480067585e-06, "loss": 0.8074, "step": 451 }, { "epoch": 0.1346791314387873, "grad_norm": 0.46049174666404724, "learning_rate": 9.964287823510222e-06, "loss": 0.8378, "step": 452 }, { "epoch": 0.1349770941189616, "grad_norm": 0.43071913719177246, "learning_rate": 9.96366479842502e-06, "loss": 0.8032, "step": 453 }, { "epoch": 0.1352750567991359, "grad_norm": 0.43020790815353394, "learning_rate": 9.963036405485747e-06, "loss": 0.8458, "step": 454 }, { "epoch": 0.13557301947931022, "grad_norm": 0.42616328597068787, "learning_rate": 9.962402645371957e-06, "loss": 0.8036, "step": 455 }, { "epoch": 0.13587098215948454, "grad_norm": 0.4324115514755249, "learning_rate": 9.96176351876902e-06, "loss": 0.7969, "step": 456 }, { "epoch": 0.13616894483965883, "grad_norm": 0.4453105032444, "learning_rate": 9.961119026368107e-06, "loss": 0.8329, "step": 457 }, { "epoch": 0.13646690751983315, "grad_norm": 0.46170443296432495, "learning_rate": 9.960469168866192e-06, "loss": 0.8129, "step": 458 }, { "epoch": 0.13676487020000744, "grad_norm": 0.448223739862442, "learning_rate": 9.959813946966048e-06, "loss": 0.8331, "step": 459 }, { "epoch": 0.13706283288018176, "grad_norm": 0.421773761510849, "learning_rate": 9.959153361376254e-06, "loss": 0.7798, "step": 460 }, { "epoch": 0.13736079556035607, "grad_norm": 0.4486232399940491, "learning_rate": 9.958487412811184e-06, "loss": 0.8891, "step": 461 }, { "epoch": 0.13765875824053037, "grad_norm": 0.42223700881004333, "learning_rate": 9.957816101991015e-06, "loss": 0.7495, "step": 462 }, { "epoch": 0.13795672092070468, "grad_norm": 0.4491265118122101, "learning_rate": 9.957139429641723e-06, "loss": 0.8457, "step": 463 }, { "epoch": 0.138254683600879, "grad_norm": 0.44002753496170044, "learning_rate": 9.956457396495083e-06, "loss": 0.8126, "step": 464 }, { "epoch": 0.1385526462810533, "grad_norm": 0.4285680651664734, "learning_rate": 9.955770003288663e-06, "loss": 0.7692, "step": 465 }, { "epoch": 0.1388506089612276, "grad_norm": 0.4280463755130768, "learning_rate": 9.955077250765833e-06, "loss": 0.8255, "step": 466 }, { "epoch": 0.1391485716414019, "grad_norm": 0.4269079864025116, "learning_rate": 9.954379139675753e-06, "loss": 0.782, "step": 467 }, { "epoch": 0.13944653432157622, "grad_norm": 0.4445587992668152, "learning_rate": 9.953675670773384e-06, "loss": 0.8237, "step": 468 }, { "epoch": 0.13974449700175054, "grad_norm": 0.43859168887138367, "learning_rate": 9.952966844819479e-06, "loss": 0.8206, "step": 469 }, { "epoch": 0.14004245968192483, "grad_norm": 0.42063581943511963, "learning_rate": 9.95225266258058e-06, "loss": 0.7909, "step": 470 }, { "epoch": 0.14034042236209915, "grad_norm": 0.4376140236854553, "learning_rate": 9.951533124829024e-06, "loss": 0.7956, "step": 471 }, { "epoch": 0.14063838504227347, "grad_norm": 0.4519414007663727, "learning_rate": 9.950808232342945e-06, "loss": 0.8011, "step": 472 }, { "epoch": 0.14093634772244776, "grad_norm": 0.47149237990379333, "learning_rate": 9.950077985906259e-06, "loss": 0.8267, "step": 473 }, { "epoch": 0.14123431040262208, "grad_norm": 0.418052613735199, "learning_rate": 9.949342386308679e-06, "loss": 0.786, "step": 474 }, { "epoch": 0.14153227308279637, "grad_norm": 0.4265899360179901, "learning_rate": 9.948601434345704e-06, "loss": 0.8061, "step": 475 }, { "epoch": 0.14183023576297069, "grad_norm": 0.4369049668312073, "learning_rate": 9.947855130818618e-06, "loss": 0.7718, "step": 476 }, { "epoch": 0.142128198443145, "grad_norm": 0.4536924958229065, "learning_rate": 9.9471034765345e-06, "loss": 0.8304, "step": 477 }, { "epoch": 0.1424261611233193, "grad_norm": 0.44105738401412964, "learning_rate": 9.94634647230621e-06, "loss": 0.8509, "step": 478 }, { "epoch": 0.1427241238034936, "grad_norm": 0.44205665588378906, "learning_rate": 9.945584118952392e-06, "loss": 0.8142, "step": 479 }, { "epoch": 0.14302208648366793, "grad_norm": 0.4375988841056824, "learning_rate": 9.944816417297482e-06, "loss": 0.7874, "step": 480 }, { "epoch": 0.14332004916384222, "grad_norm": 0.44625169038772583, "learning_rate": 9.944043368171692e-06, "loss": 0.7945, "step": 481 }, { "epoch": 0.14361801184401654, "grad_norm": 0.43976661562919617, "learning_rate": 9.94326497241102e-06, "loss": 0.8225, "step": 482 }, { "epoch": 0.14391597452419083, "grad_norm": 0.4386625587940216, "learning_rate": 9.942481230857249e-06, "loss": 0.7961, "step": 483 }, { "epoch": 0.14421393720436515, "grad_norm": 0.4405531883239746, "learning_rate": 9.941692144357938e-06, "loss": 0.7938, "step": 484 }, { "epoch": 0.14451189988453947, "grad_norm": 0.44884464144706726, "learning_rate": 9.940897713766428e-06, "loss": 0.8093, "step": 485 }, { "epoch": 0.14480986256471376, "grad_norm": 0.4623242914676666, "learning_rate": 9.940097939941843e-06, "loss": 0.8567, "step": 486 }, { "epoch": 0.14510782524488808, "grad_norm": 0.47886964678764343, "learning_rate": 9.93929282374908e-06, "loss": 0.832, "step": 487 }, { "epoch": 0.1454057879250624, "grad_norm": 0.43132802844047546, "learning_rate": 9.938482366058814e-06, "loss": 0.7834, "step": 488 }, { "epoch": 0.1457037506052367, "grad_norm": 0.46928003430366516, "learning_rate": 9.9376665677475e-06, "loss": 0.8418, "step": 489 }, { "epoch": 0.146001713285411, "grad_norm": 0.4581755995750427, "learning_rate": 9.936845429697369e-06, "loss": 0.8114, "step": 490 }, { "epoch": 0.14629967596558532, "grad_norm": 0.4166138172149658, "learning_rate": 9.936018952796417e-06, "loss": 0.7967, "step": 491 }, { "epoch": 0.14659763864575961, "grad_norm": 0.43533584475517273, "learning_rate": 9.935187137938427e-06, "loss": 0.8353, "step": 492 }, { "epoch": 0.14689560132593393, "grad_norm": 0.44667741656303406, "learning_rate": 9.934349986022946e-06, "loss": 0.8317, "step": 493 }, { "epoch": 0.14719356400610822, "grad_norm": 0.44888830184936523, "learning_rate": 9.933507497955292e-06, "loss": 0.8174, "step": 494 }, { "epoch": 0.14749152668628254, "grad_norm": 0.46241915225982666, "learning_rate": 9.93265967464656e-06, "loss": 0.8514, "step": 495 }, { "epoch": 0.14778948936645686, "grad_norm": 0.4498129189014435, "learning_rate": 9.931806517013612e-06, "loss": 0.7824, "step": 496 }, { "epoch": 0.14808745204663115, "grad_norm": 0.43418675661087036, "learning_rate": 9.930948025979076e-06, "loss": 0.8287, "step": 497 }, { "epoch": 0.14838541472680547, "grad_norm": 0.4767434298992157, "learning_rate": 9.93008420247135e-06, "loss": 0.7997, "step": 498 }, { "epoch": 0.1486833774069798, "grad_norm": 0.4500294029712677, "learning_rate": 9.929215047424598e-06, "loss": 0.8001, "step": 499 }, { "epoch": 0.14898134008715408, "grad_norm": 0.47594842314720154, "learning_rate": 9.928340561778748e-06, "loss": 0.8219, "step": 500 }, { "epoch": 0.1492793027673284, "grad_norm": 0.43770483136177063, "learning_rate": 9.927460746479501e-06, "loss": 0.7888, "step": 501 }, { "epoch": 0.1495772654475027, "grad_norm": 0.42728111147880554, "learning_rate": 9.926575602478309e-06, "loss": 0.8404, "step": 502 }, { "epoch": 0.149875228127677, "grad_norm": 0.4243437647819519, "learning_rate": 9.925685130732396e-06, "loss": 0.7973, "step": 503 }, { "epoch": 0.15017319080785133, "grad_norm": 0.42418187856674194, "learning_rate": 9.924789332204743e-06, "loss": 0.7519, "step": 504 }, { "epoch": 0.15047115348802562, "grad_norm": 0.45427075028419495, "learning_rate": 9.923888207864093e-06, "loss": 0.8269, "step": 505 }, { "epoch": 0.15076911616819993, "grad_norm": 0.4241939187049866, "learning_rate": 9.92298175868495e-06, "loss": 0.7208, "step": 506 }, { "epoch": 0.15106707884837425, "grad_norm": 0.4593588411808014, "learning_rate": 9.922069985647576e-06, "loss": 0.7882, "step": 507 }, { "epoch": 0.15136504152854854, "grad_norm": 0.449123352766037, "learning_rate": 9.921152889737985e-06, "loss": 0.8168, "step": 508 }, { "epoch": 0.15166300420872286, "grad_norm": 0.4293762743473053, "learning_rate": 9.920230471947957e-06, "loss": 0.7966, "step": 509 }, { "epoch": 0.15196096688889715, "grad_norm": 0.43696144223213196, "learning_rate": 9.919302733275015e-06, "loss": 0.8051, "step": 510 }, { "epoch": 0.15225892956907147, "grad_norm": 0.46048763394355774, "learning_rate": 9.91836967472245e-06, "loss": 0.8789, "step": 511 }, { "epoch": 0.1525568922492458, "grad_norm": 0.45033976435661316, "learning_rate": 9.917431297299297e-06, "loss": 0.8209, "step": 512 }, { "epoch": 0.15285485492942008, "grad_norm": 0.45003750920295715, "learning_rate": 9.916487602020344e-06, "loss": 0.7816, "step": 513 }, { "epoch": 0.1531528176095944, "grad_norm": 0.44182878732681274, "learning_rate": 9.91553858990613e-06, "loss": 0.8211, "step": 514 }, { "epoch": 0.15345078028976872, "grad_norm": 0.44483861327171326, "learning_rate": 9.91458426198295e-06, "loss": 0.7971, "step": 515 }, { "epoch": 0.153748742969943, "grad_norm": 0.43113207817077637, "learning_rate": 9.913624619282835e-06, "loss": 0.7627, "step": 516 }, { "epoch": 0.15404670565011733, "grad_norm": 0.4577217698097229, "learning_rate": 9.912659662843578e-06, "loss": 0.8413, "step": 517 }, { "epoch": 0.15434466833029162, "grad_norm": 0.4342012107372284, "learning_rate": 9.911689393708707e-06, "loss": 0.7875, "step": 518 }, { "epoch": 0.15464263101046594, "grad_norm": 0.4530002176761627, "learning_rate": 9.9107138129275e-06, "loss": 0.8166, "step": 519 }, { "epoch": 0.15494059369064025, "grad_norm": 0.4360707998275757, "learning_rate": 9.909732921554982e-06, "loss": 0.8236, "step": 520 }, { "epoch": 0.15523855637081455, "grad_norm": 0.44726189970970154, "learning_rate": 9.908746720651914e-06, "loss": 0.7925, "step": 521 }, { "epoch": 0.15553651905098886, "grad_norm": 0.43377047777175903, "learning_rate": 9.907755211284807e-06, "loss": 0.8127, "step": 522 }, { "epoch": 0.15583448173116318, "grad_norm": 0.41775742173194885, "learning_rate": 9.906758394525905e-06, "loss": 0.7722, "step": 523 }, { "epoch": 0.15613244441133747, "grad_norm": 0.4309692978858948, "learning_rate": 9.905756271453198e-06, "loss": 0.799, "step": 524 }, { "epoch": 0.1564304070915118, "grad_norm": 0.4536021053791046, "learning_rate": 9.904748843150407e-06, "loss": 0.7884, "step": 525 }, { "epoch": 0.1567283697716861, "grad_norm": 0.4355040490627289, "learning_rate": 9.903736110707001e-06, "loss": 0.7667, "step": 526 }, { "epoch": 0.1570263324518604, "grad_norm": 0.427120566368103, "learning_rate": 9.902718075218176e-06, "loss": 0.8023, "step": 527 }, { "epoch": 0.15732429513203472, "grad_norm": 0.4440107047557831, "learning_rate": 9.901694737784864e-06, "loss": 0.8138, "step": 528 }, { "epoch": 0.157622257812209, "grad_norm": 0.43493953347206116, "learning_rate": 9.900666099513734e-06, "loss": 0.8013, "step": 529 }, { "epoch": 0.15792022049238333, "grad_norm": 0.4667569398880005, "learning_rate": 9.899632161517187e-06, "loss": 0.7905, "step": 530 }, { "epoch": 0.15821818317255765, "grad_norm": 0.4225075840950012, "learning_rate": 9.898592924913353e-06, "loss": 0.7552, "step": 531 }, { "epoch": 0.15851614585273194, "grad_norm": 0.4328601062297821, "learning_rate": 9.897548390826092e-06, "loss": 0.7987, "step": 532 }, { "epoch": 0.15881410853290626, "grad_norm": 0.4600571393966675, "learning_rate": 9.896498560384996e-06, "loss": 0.8294, "step": 533 }, { "epoch": 0.15911207121308057, "grad_norm": 0.4488067328929901, "learning_rate": 9.895443434725382e-06, "loss": 0.7989, "step": 534 }, { "epoch": 0.15941003389325487, "grad_norm": 0.42931830883026123, "learning_rate": 9.894383014988294e-06, "loss": 0.809, "step": 535 }, { "epoch": 0.15970799657342918, "grad_norm": 0.4342135787010193, "learning_rate": 9.893317302320501e-06, "loss": 0.7469, "step": 536 }, { "epoch": 0.16000595925360347, "grad_norm": 0.46612685918807983, "learning_rate": 9.892246297874497e-06, "loss": 0.8434, "step": 537 }, { "epoch": 0.1603039219337778, "grad_norm": 0.4532231092453003, "learning_rate": 9.891170002808498e-06, "loss": 0.8516, "step": 538 }, { "epoch": 0.1606018846139521, "grad_norm": 0.43740570545196533, "learning_rate": 9.89008841828644e-06, "loss": 0.8111, "step": 539 }, { "epoch": 0.1608998472941264, "grad_norm": 0.43567419052124023, "learning_rate": 9.889001545477984e-06, "loss": 0.7746, "step": 540 }, { "epoch": 0.16119780997430072, "grad_norm": 0.4601244032382965, "learning_rate": 9.8879093855585e-06, "loss": 0.8171, "step": 541 }, { "epoch": 0.16149577265447504, "grad_norm": 0.4863916039466858, "learning_rate": 9.886811939709089e-06, "loss": 0.7941, "step": 542 }, { "epoch": 0.16179373533464933, "grad_norm": 0.4400995969772339, "learning_rate": 9.885709209116557e-06, "loss": 0.8239, "step": 543 }, { "epoch": 0.16209169801482365, "grad_norm": 0.4158054292201996, "learning_rate": 9.884601194973432e-06, "loss": 0.6939, "step": 544 }, { "epoch": 0.16238966069499794, "grad_norm": 0.4609817862510681, "learning_rate": 9.883487898477951e-06, "loss": 0.8264, "step": 545 }, { "epoch": 0.16268762337517226, "grad_norm": 0.4383307099342346, "learning_rate": 9.882369320834068e-06, "loss": 0.7792, "step": 546 }, { "epoch": 0.16298558605534658, "grad_norm": 0.45086583495140076, "learning_rate": 9.881245463251446e-06, "loss": 0.8195, "step": 547 }, { "epoch": 0.16328354873552087, "grad_norm": 0.4327777326107025, "learning_rate": 9.880116326945455e-06, "loss": 0.7916, "step": 548 }, { "epoch": 0.16358151141569519, "grad_norm": 0.48214995861053467, "learning_rate": 9.878981913137178e-06, "loss": 0.8232, "step": 549 }, { "epoch": 0.1638794740958695, "grad_norm": 0.4498960077762604, "learning_rate": 9.877842223053406e-06, "loss": 0.8134, "step": 550 }, { "epoch": 0.1641774367760438, "grad_norm": 0.44083136320114136, "learning_rate": 9.876697257926632e-06, "loss": 0.7982, "step": 551 }, { "epoch": 0.1644753994562181, "grad_norm": 0.44026824831962585, "learning_rate": 9.875547018995052e-06, "loss": 0.7627, "step": 552 }, { "epoch": 0.1647733621363924, "grad_norm": 0.44473356008529663, "learning_rate": 9.874391507502572e-06, "loss": 0.8151, "step": 553 }, { "epoch": 0.16507132481656672, "grad_norm": 0.4847549498081207, "learning_rate": 9.873230724698797e-06, "loss": 0.8662, "step": 554 }, { "epoch": 0.16536928749674104, "grad_norm": 0.4360688626766205, "learning_rate": 9.872064671839029e-06, "loss": 0.8291, "step": 555 }, { "epoch": 0.16566725017691533, "grad_norm": 0.43601882457733154, "learning_rate": 9.870893350184274e-06, "loss": 0.8048, "step": 556 }, { "epoch": 0.16596521285708965, "grad_norm": 0.43026798963546753, "learning_rate": 9.869716761001234e-06, "loss": 0.7519, "step": 557 }, { "epoch": 0.16626317553726397, "grad_norm": 0.4556662440299988, "learning_rate": 9.868534905562306e-06, "loss": 0.8546, "step": 558 }, { "epoch": 0.16656113821743826, "grad_norm": 0.4316236674785614, "learning_rate": 9.867347785145584e-06, "loss": 0.8059, "step": 559 }, { "epoch": 0.16685910089761258, "grad_norm": 0.4355992078781128, "learning_rate": 9.866155401034856e-06, "loss": 0.8053, "step": 560 }, { "epoch": 0.16715706357778687, "grad_norm": 0.43576356768608093, "learning_rate": 9.864957754519602e-06, "loss": 0.7883, "step": 561 }, { "epoch": 0.1674550262579612, "grad_norm": 0.42826154828071594, "learning_rate": 9.86375484689499e-06, "loss": 0.7288, "step": 562 }, { "epoch": 0.1677529889381355, "grad_norm": 0.43339434266090393, "learning_rate": 9.862546679461882e-06, "loss": 0.7906, "step": 563 }, { "epoch": 0.1680509516183098, "grad_norm": 0.4407076835632324, "learning_rate": 9.861333253526826e-06, "loss": 0.8304, "step": 564 }, { "epoch": 0.16834891429848411, "grad_norm": 0.44356203079223633, "learning_rate": 9.860114570402055e-06, "loss": 0.7864, "step": 565 }, { "epoch": 0.16864687697865843, "grad_norm": 0.40894874930381775, "learning_rate": 9.85889063140549e-06, "loss": 0.7819, "step": 566 }, { "epoch": 0.16894483965883272, "grad_norm": 0.41408395767211914, "learning_rate": 9.857661437860735e-06, "loss": 0.7753, "step": 567 }, { "epoch": 0.16924280233900704, "grad_norm": 0.4399091303348541, "learning_rate": 9.856426991097077e-06, "loss": 0.81, "step": 568 }, { "epoch": 0.16954076501918136, "grad_norm": 0.44112658500671387, "learning_rate": 9.85518729244948e-06, "loss": 0.7672, "step": 569 }, { "epoch": 0.16983872769935565, "grad_norm": 0.4537188708782196, "learning_rate": 9.853942343258596e-06, "loss": 0.8056, "step": 570 }, { "epoch": 0.17013669037952997, "grad_norm": 0.43311551213264465, "learning_rate": 9.852692144870746e-06, "loss": 0.7554, "step": 571 }, { "epoch": 0.17043465305970426, "grad_norm": 0.43048954010009766, "learning_rate": 9.851436698637932e-06, "loss": 0.7763, "step": 572 }, { "epoch": 0.17073261573987858, "grad_norm": 0.41111141443252563, "learning_rate": 9.850176005917835e-06, "loss": 0.7883, "step": 573 }, { "epoch": 0.1710305784200529, "grad_norm": 0.4324644207954407, "learning_rate": 9.848910068073799e-06, "loss": 0.8108, "step": 574 }, { "epoch": 0.1713285411002272, "grad_norm": 0.44743847846984863, "learning_rate": 9.84763888647485e-06, "loss": 0.7932, "step": 575 }, { "epoch": 0.1716265037804015, "grad_norm": 0.4240942895412445, "learning_rate": 9.846362462495682e-06, "loss": 0.7787, "step": 576 }, { "epoch": 0.17192446646057583, "grad_norm": 0.4266510605812073, "learning_rate": 9.845080797516655e-06, "loss": 0.7735, "step": 577 }, { "epoch": 0.17222242914075012, "grad_norm": 0.4296509623527527, "learning_rate": 9.843793892923801e-06, "loss": 0.8095, "step": 578 }, { "epoch": 0.17252039182092443, "grad_norm": 0.4529971480369568, "learning_rate": 9.84250175010882e-06, "loss": 0.7629, "step": 579 }, { "epoch": 0.17281835450109873, "grad_norm": 0.4344656765460968, "learning_rate": 9.841204370469066e-06, "loss": 0.7899, "step": 580 }, { "epoch": 0.17311631718127304, "grad_norm": 0.42121410369873047, "learning_rate": 9.839901755407572e-06, "loss": 0.7554, "step": 581 }, { "epoch": 0.17341427986144736, "grad_norm": 0.43178629875183105, "learning_rate": 9.838593906333018e-06, "loss": 0.8102, "step": 582 }, { "epoch": 0.17371224254162165, "grad_norm": 0.4340752959251404, "learning_rate": 9.837280824659755e-06, "loss": 0.7796, "step": 583 }, { "epoch": 0.17401020522179597, "grad_norm": 0.4354706108570099, "learning_rate": 9.835962511807786e-06, "loss": 0.7993, "step": 584 }, { "epoch": 0.1743081679019703, "grad_norm": 0.4145393967628479, "learning_rate": 9.834638969202774e-06, "loss": 0.7862, "step": 585 }, { "epoch": 0.17460613058214458, "grad_norm": 0.4493045508861542, "learning_rate": 9.833310198276037e-06, "loss": 0.7941, "step": 586 }, { "epoch": 0.1749040932623189, "grad_norm": 0.42931944131851196, "learning_rate": 9.831976200464551e-06, "loss": 0.8223, "step": 587 }, { "epoch": 0.1752020559424932, "grad_norm": 0.4153611660003662, "learning_rate": 9.830636977210934e-06, "loss": 0.8077, "step": 588 }, { "epoch": 0.1755000186226675, "grad_norm": 0.4432365894317627, "learning_rate": 9.829292529963467e-06, "loss": 0.8329, "step": 589 }, { "epoch": 0.17579798130284183, "grad_norm": 0.4226314425468445, "learning_rate": 9.827942860176072e-06, "loss": 0.7617, "step": 590 }, { "epoch": 0.17609594398301612, "grad_norm": 0.4454766809940338, "learning_rate": 9.826587969308322e-06, "loss": 0.8462, "step": 591 }, { "epoch": 0.17639390666319044, "grad_norm": 0.4389556348323822, "learning_rate": 9.825227858825439e-06, "loss": 0.7955, "step": 592 }, { "epoch": 0.17669186934336475, "grad_norm": 0.4516342878341675, "learning_rate": 9.823862530198285e-06, "loss": 0.7961, "step": 593 }, { "epoch": 0.17698983202353905, "grad_norm": 0.43754610419273376, "learning_rate": 9.822491984903367e-06, "loss": 0.8458, "step": 594 }, { "epoch": 0.17728779470371336, "grad_norm": 0.42383718490600586, "learning_rate": 9.821116224422832e-06, "loss": 0.817, "step": 595 }, { "epoch": 0.17758575738388765, "grad_norm": 0.40285003185272217, "learning_rate": 9.819735250244469e-06, "loss": 0.8049, "step": 596 }, { "epoch": 0.17788372006406197, "grad_norm": 0.43438956141471863, "learning_rate": 9.818349063861703e-06, "loss": 0.7974, "step": 597 }, { "epoch": 0.1781816827442363, "grad_norm": 0.46335262060165405, "learning_rate": 9.816957666773601e-06, "loss": 0.7877, "step": 598 }, { "epoch": 0.17847964542441058, "grad_norm": 0.42925113439559937, "learning_rate": 9.815561060484857e-06, "loss": 0.7703, "step": 599 }, { "epoch": 0.1787776081045849, "grad_norm": 0.4416888952255249, "learning_rate": 9.814159246505803e-06, "loss": 0.7857, "step": 600 }, { "epoch": 0.17907557078475922, "grad_norm": 0.42968836426734924, "learning_rate": 9.812752226352405e-06, "loss": 0.8382, "step": 601 }, { "epoch": 0.1793735334649335, "grad_norm": 0.4462573230266571, "learning_rate": 9.811340001546252e-06, "loss": 0.7949, "step": 602 }, { "epoch": 0.17967149614510783, "grad_norm": 0.4179610311985016, "learning_rate": 9.80992257361457e-06, "loss": 0.7564, "step": 603 }, { "epoch": 0.17996945882528215, "grad_norm": 0.42330700159072876, "learning_rate": 9.808499944090204e-06, "loss": 0.7669, "step": 604 }, { "epoch": 0.18026742150545644, "grad_norm": 0.42339032888412476, "learning_rate": 9.80707211451163e-06, "loss": 0.7717, "step": 605 }, { "epoch": 0.18056538418563076, "grad_norm": 0.44835034012794495, "learning_rate": 9.805639086422944e-06, "loss": 0.8127, "step": 606 }, { "epoch": 0.18086334686580505, "grad_norm": 0.4259271025657654, "learning_rate": 9.804200861373866e-06, "loss": 0.7793, "step": 607 }, { "epoch": 0.18116130954597937, "grad_norm": 0.4456773102283478, "learning_rate": 9.802757440919734e-06, "loss": 0.7896, "step": 608 }, { "epoch": 0.18145927222615368, "grad_norm": 0.4379430413246155, "learning_rate": 9.801308826621505e-06, "loss": 0.7806, "step": 609 }, { "epoch": 0.18175723490632797, "grad_norm": 0.42454832792282104, "learning_rate": 9.799855020045756e-06, "loss": 0.8095, "step": 610 }, { "epoch": 0.1820551975865023, "grad_norm": 0.41606152057647705, "learning_rate": 9.798396022764673e-06, "loss": 0.7679, "step": 611 }, { "epoch": 0.1823531602666766, "grad_norm": 0.44875243306159973, "learning_rate": 9.796931836356062e-06, "loss": 0.8114, "step": 612 }, { "epoch": 0.1826511229468509, "grad_norm": 0.43045029044151306, "learning_rate": 9.795462462403339e-06, "loss": 0.8003, "step": 613 }, { "epoch": 0.18294908562702522, "grad_norm": 0.4321276843547821, "learning_rate": 9.793987902495522e-06, "loss": 0.8126, "step": 614 }, { "epoch": 0.1832470483071995, "grad_norm": 0.43846407532691956, "learning_rate": 9.79250815822725e-06, "loss": 0.8485, "step": 615 }, { "epoch": 0.18354501098737383, "grad_norm": 0.41620397567749023, "learning_rate": 9.791023231198757e-06, "loss": 0.7452, "step": 616 }, { "epoch": 0.18384297366754815, "grad_norm": 0.42242833971977234, "learning_rate": 9.789533123015893e-06, "loss": 0.7881, "step": 617 }, { "epoch": 0.18414093634772244, "grad_norm": 0.4209827482700348, "learning_rate": 9.7880378352901e-06, "loss": 0.8115, "step": 618 }, { "epoch": 0.18443889902789676, "grad_norm": 0.44570618867874146, "learning_rate": 9.786537369638429e-06, "loss": 0.7989, "step": 619 }, { "epoch": 0.18473686170807108, "grad_norm": 0.43412548303604126, "learning_rate": 9.785031727683528e-06, "loss": 0.7623, "step": 620 }, { "epoch": 0.18503482438824537, "grad_norm": 0.43142980337142944, "learning_rate": 9.783520911053642e-06, "loss": 0.7922, "step": 621 }, { "epoch": 0.18533278706841969, "grad_norm": 0.4264300763607025, "learning_rate": 9.782004921382612e-06, "loss": 0.8033, "step": 622 }, { "epoch": 0.18563074974859398, "grad_norm": 0.4410349726676941, "learning_rate": 9.780483760309876e-06, "loss": 0.7679, "step": 623 }, { "epoch": 0.1859287124287683, "grad_norm": 0.4315160810947418, "learning_rate": 9.778957429480463e-06, "loss": 0.7944, "step": 624 }, { "epoch": 0.1862266751089426, "grad_norm": 0.4013029634952545, "learning_rate": 9.77742593054499e-06, "loss": 0.7517, "step": 625 }, { "epoch": 0.1865246377891169, "grad_norm": 0.42481091618537903, "learning_rate": 9.775889265159667e-06, "loss": 0.7834, "step": 626 }, { "epoch": 0.18682260046929122, "grad_norm": 0.41402578353881836, "learning_rate": 9.774347434986287e-06, "loss": 0.805, "step": 627 }, { "epoch": 0.18712056314946554, "grad_norm": 0.42417481541633606, "learning_rate": 9.772800441692234e-06, "loss": 0.7971, "step": 628 }, { "epoch": 0.18741852582963983, "grad_norm": 0.435710072517395, "learning_rate": 9.771248286950472e-06, "loss": 0.7968, "step": 629 }, { "epoch": 0.18771648850981415, "grad_norm": 0.43094027042388916, "learning_rate": 9.769690972439545e-06, "loss": 0.8031, "step": 630 }, { "epoch": 0.18801445118998844, "grad_norm": 0.4466800391674042, "learning_rate": 9.768128499843579e-06, "loss": 0.7671, "step": 631 }, { "epoch": 0.18831241387016276, "grad_norm": 0.46381083130836487, "learning_rate": 9.76656087085228e-06, "loss": 0.8268, "step": 632 }, { "epoch": 0.18861037655033708, "grad_norm": 0.4458133578300476, "learning_rate": 9.76498808716093e-06, "loss": 0.8519, "step": 633 }, { "epoch": 0.18890833923051137, "grad_norm": 0.4384130537509918, "learning_rate": 9.763410150470378e-06, "loss": 0.8272, "step": 634 }, { "epoch": 0.1892063019106857, "grad_norm": 0.4333811104297638, "learning_rate": 9.761827062487056e-06, "loss": 0.7305, "step": 635 }, { "epoch": 0.18950426459086, "grad_norm": 0.4350837171077728, "learning_rate": 9.760238824922962e-06, "loss": 0.7394, "step": 636 }, { "epoch": 0.1898022272710343, "grad_norm": 0.4497371315956116, "learning_rate": 9.758645439495662e-06, "loss": 0.8362, "step": 637 }, { "epoch": 0.19010018995120861, "grad_norm": 0.4223061501979828, "learning_rate": 9.757046907928291e-06, "loss": 0.7831, "step": 638 }, { "epoch": 0.19039815263138293, "grad_norm": 0.42122185230255127, "learning_rate": 9.755443231949548e-06, "loss": 0.7959, "step": 639 }, { "epoch": 0.19069611531155722, "grad_norm": 0.44597384333610535, "learning_rate": 9.753834413293695e-06, "loss": 0.8639, "step": 640 }, { "epoch": 0.19099407799173154, "grad_norm": 0.4516172409057617, "learning_rate": 9.752220453700556e-06, "loss": 0.7922, "step": 641 }, { "epoch": 0.19129204067190583, "grad_norm": 0.42263251543045044, "learning_rate": 9.750601354915516e-06, "loss": 0.7685, "step": 642 }, { "epoch": 0.19159000335208015, "grad_norm": 0.4246070384979248, "learning_rate": 9.748977118689516e-06, "loss": 0.7938, "step": 643 }, { "epoch": 0.19188796603225447, "grad_norm": 0.4520643949508667, "learning_rate": 9.747347746779052e-06, "loss": 0.8233, "step": 644 }, { "epoch": 0.19218592871242876, "grad_norm": 0.4654163718223572, "learning_rate": 9.745713240946177e-06, "loss": 0.8011, "step": 645 }, { "epoch": 0.19248389139260308, "grad_norm": 0.43705472350120544, "learning_rate": 9.744073602958493e-06, "loss": 0.8239, "step": 646 }, { "epoch": 0.1927818540727774, "grad_norm": 0.4432106614112854, "learning_rate": 9.742428834589152e-06, "loss": 0.824, "step": 647 }, { "epoch": 0.1930798167529517, "grad_norm": 0.43141478300094604, "learning_rate": 9.740778937616858e-06, "loss": 0.7822, "step": 648 }, { "epoch": 0.193377779433126, "grad_norm": 0.41723155975341797, "learning_rate": 9.739123913825855e-06, "loss": 0.7457, "step": 649 }, { "epoch": 0.1936757421133003, "grad_norm": 0.44804713129997253, "learning_rate": 9.737463765005934e-06, "loss": 0.8064, "step": 650 }, { "epoch": 0.19397370479347462, "grad_norm": 0.4133176803588867, "learning_rate": 9.735798492952435e-06, "loss": 0.8116, "step": 651 }, { "epoch": 0.19427166747364893, "grad_norm": 0.4027285575866699, "learning_rate": 9.734128099466227e-06, "loss": 0.7512, "step": 652 }, { "epoch": 0.19456963015382323, "grad_norm": 0.43247896432876587, "learning_rate": 9.732452586353727e-06, "loss": 0.7555, "step": 653 }, { "epoch": 0.19486759283399754, "grad_norm": 0.4446199834346771, "learning_rate": 9.73077195542688e-06, "loss": 0.7942, "step": 654 }, { "epoch": 0.19516555551417186, "grad_norm": 0.45987439155578613, "learning_rate": 9.729086208503174e-06, "loss": 0.8086, "step": 655 }, { "epoch": 0.19546351819434615, "grad_norm": 0.4186185896396637, "learning_rate": 9.727395347405624e-06, "loss": 0.7624, "step": 656 }, { "epoch": 0.19576148087452047, "grad_norm": 0.4368334114551544, "learning_rate": 9.725699373962778e-06, "loss": 0.7926, "step": 657 }, { "epoch": 0.19605944355469476, "grad_norm": 0.42097970843315125, "learning_rate": 9.723998290008709e-06, "loss": 0.7777, "step": 658 }, { "epoch": 0.19635740623486908, "grad_norm": 0.41991859674453735, "learning_rate": 9.722292097383024e-06, "loss": 0.7675, "step": 659 }, { "epoch": 0.1966553689150434, "grad_norm": 0.40364593267440796, "learning_rate": 9.720580797930845e-06, "loss": 0.7554, "step": 660 }, { "epoch": 0.1969533315952177, "grad_norm": 0.44092264771461487, "learning_rate": 9.718864393502828e-06, "loss": 0.7927, "step": 661 }, { "epoch": 0.197251294275392, "grad_norm": 0.44391563534736633, "learning_rate": 9.71714288595514e-06, "loss": 0.7809, "step": 662 }, { "epoch": 0.19754925695556633, "grad_norm": 0.42969176173210144, "learning_rate": 9.715416277149469e-06, "loss": 0.7927, "step": 663 }, { "epoch": 0.19784721963574062, "grad_norm": 0.42373907566070557, "learning_rate": 9.713684568953023e-06, "loss": 0.8096, "step": 664 }, { "epoch": 0.19814518231591494, "grad_norm": 0.4161315858364105, "learning_rate": 9.711947763238523e-06, "loss": 0.7868, "step": 665 }, { "epoch": 0.19844314499608923, "grad_norm": 0.41807207465171814, "learning_rate": 9.7102058618842e-06, "loss": 0.7613, "step": 666 }, { "epoch": 0.19874110767626355, "grad_norm": 0.4127216339111328, "learning_rate": 9.708458866773803e-06, "loss": 0.7741, "step": 667 }, { "epoch": 0.19903907035643786, "grad_norm": 0.4264618158340454, "learning_rate": 9.706706779796576e-06, "loss": 0.7858, "step": 668 }, { "epoch": 0.19933703303661215, "grad_norm": 0.42385661602020264, "learning_rate": 9.704949602847282e-06, "loss": 0.7293, "step": 669 }, { "epoch": 0.19963499571678647, "grad_norm": 0.39673057198524475, "learning_rate": 9.703187337826186e-06, "loss": 0.7404, "step": 670 }, { "epoch": 0.1999329583969608, "grad_norm": 0.44886425137519836, "learning_rate": 9.70141998663905e-06, "loss": 0.7941, "step": 671 }, { "epoch": 0.20023092107713508, "grad_norm": 0.42239850759506226, "learning_rate": 9.699647551197142e-06, "loss": 0.7402, "step": 672 }, { "epoch": 0.2005288837573094, "grad_norm": 0.4410516321659088, "learning_rate": 9.697870033417226e-06, "loss": 0.811, "step": 673 }, { "epoch": 0.2008268464374837, "grad_norm": 0.4216253161430359, "learning_rate": 9.696087435221562e-06, "loss": 0.7885, "step": 674 }, { "epoch": 0.201124809117658, "grad_norm": 0.4702225625514984, "learning_rate": 9.694299758537905e-06, "loss": 0.7736, "step": 675 }, { "epoch": 0.20142277179783233, "grad_norm": 0.42728155851364136, "learning_rate": 9.692507005299499e-06, "loss": 0.7845, "step": 676 }, { "epoch": 0.20172073447800662, "grad_norm": 0.42187970876693726, "learning_rate": 9.690709177445084e-06, "loss": 0.7835, "step": 677 }, { "epoch": 0.20201869715818094, "grad_norm": 0.43492591381073, "learning_rate": 9.688906276918883e-06, "loss": 0.8551, "step": 678 }, { "epoch": 0.20231665983835526, "grad_norm": 0.4242953062057495, "learning_rate": 9.687098305670606e-06, "loss": 0.7807, "step": 679 }, { "epoch": 0.20261462251852955, "grad_norm": 0.432669073343277, "learning_rate": 9.685285265655444e-06, "loss": 0.7859, "step": 680 }, { "epoch": 0.20291258519870387, "grad_norm": 0.4527738094329834, "learning_rate": 9.683467158834076e-06, "loss": 0.8505, "step": 681 }, { "epoch": 0.20321054787887818, "grad_norm": 0.4490962624549866, "learning_rate": 9.681643987172656e-06, "loss": 0.7705, "step": 682 }, { "epoch": 0.20350851055905247, "grad_norm": 0.45207998156547546, "learning_rate": 9.679815752642814e-06, "loss": 0.8056, "step": 683 }, { "epoch": 0.2038064732392268, "grad_norm": 0.41610947251319885, "learning_rate": 9.677982457221658e-06, "loss": 0.7969, "step": 684 }, { "epoch": 0.20410443591940108, "grad_norm": 0.43091249465942383, "learning_rate": 9.67614410289177e-06, "loss": 0.7579, "step": 685 }, { "epoch": 0.2044023985995754, "grad_norm": 0.4388624429702759, "learning_rate": 9.674300691641194e-06, "loss": 0.8206, "step": 686 }, { "epoch": 0.20470036127974972, "grad_norm": 0.4264225661754608, "learning_rate": 9.672452225463458e-06, "loss": 0.7646, "step": 687 }, { "epoch": 0.204998323959924, "grad_norm": 0.4286390542984009, "learning_rate": 9.67059870635754e-06, "loss": 0.7895, "step": 688 }, { "epoch": 0.20529628664009833, "grad_norm": 0.4425601363182068, "learning_rate": 9.668740136327898e-06, "loss": 0.8329, "step": 689 }, { "epoch": 0.20559424932027265, "grad_norm": 0.43018388748168945, "learning_rate": 9.666876517384441e-06, "loss": 0.8013, "step": 690 }, { "epoch": 0.20589221200044694, "grad_norm": 0.43569415807724, "learning_rate": 9.665007851542541e-06, "loss": 0.7902, "step": 691 }, { "epoch": 0.20619017468062126, "grad_norm": 0.4561285376548767, "learning_rate": 9.663134140823031e-06, "loss": 0.7929, "step": 692 }, { "epoch": 0.20648813736079555, "grad_norm": 0.4289737045764923, "learning_rate": 9.661255387252195e-06, "loss": 0.7618, "step": 693 }, { "epoch": 0.20678610004096987, "grad_norm": 0.42727822065353394, "learning_rate": 9.659371592861772e-06, "loss": 0.78, "step": 694 }, { "epoch": 0.20708406272114419, "grad_norm": 0.4296926259994507, "learning_rate": 9.657482759688957e-06, "loss": 0.7892, "step": 695 }, { "epoch": 0.20738202540131848, "grad_norm": 0.41573357582092285, "learning_rate": 9.655588889776385e-06, "loss": 0.782, "step": 696 }, { "epoch": 0.2076799880814928, "grad_norm": 0.4244844913482666, "learning_rate": 9.653689985172148e-06, "loss": 0.7801, "step": 697 }, { "epoch": 0.2079779507616671, "grad_norm": 0.4274427890777588, "learning_rate": 9.651786047929772e-06, "loss": 0.8083, "step": 698 }, { "epoch": 0.2082759134418414, "grad_norm": 0.4298088848590851, "learning_rate": 9.649877080108239e-06, "loss": 0.7653, "step": 699 }, { "epoch": 0.20857387612201572, "grad_norm": 0.42512091994285583, "learning_rate": 9.647963083771957e-06, "loss": 0.7663, "step": 700 }, { "epoch": 0.20887183880219, "grad_norm": 0.43867695331573486, "learning_rate": 9.646044060990778e-06, "loss": 0.7577, "step": 701 }, { "epoch": 0.20916980148236433, "grad_norm": 0.41578346490859985, "learning_rate": 9.644120013839993e-06, "loss": 0.751, "step": 702 }, { "epoch": 0.20946776416253865, "grad_norm": 0.43413275480270386, "learning_rate": 9.642190944400323e-06, "loss": 0.7961, "step": 703 }, { "epoch": 0.20976572684271294, "grad_norm": 0.42895805835723877, "learning_rate": 9.640256854757921e-06, "loss": 0.7646, "step": 704 }, { "epoch": 0.21006368952288726, "grad_norm": 0.435161292552948, "learning_rate": 9.638317747004369e-06, "loss": 0.8106, "step": 705 }, { "epoch": 0.21036165220306158, "grad_norm": 0.44427958130836487, "learning_rate": 9.636373623236672e-06, "loss": 0.8242, "step": 706 }, { "epoch": 0.21065961488323587, "grad_norm": 0.4361114203929901, "learning_rate": 9.634424485557267e-06, "loss": 0.778, "step": 707 }, { "epoch": 0.2109575775634102, "grad_norm": 0.4281226396560669, "learning_rate": 9.632470336074009e-06, "loss": 0.8027, "step": 708 }, { "epoch": 0.21125554024358448, "grad_norm": 0.46293097734451294, "learning_rate": 9.630511176900172e-06, "loss": 0.776, "step": 709 }, { "epoch": 0.2115535029237588, "grad_norm": 0.4355347156524658, "learning_rate": 9.628547010154449e-06, "loss": 0.7818, "step": 710 }, { "epoch": 0.21185146560393311, "grad_norm": 0.4164382517337799, "learning_rate": 9.626577837960947e-06, "loss": 0.7261, "step": 711 }, { "epoch": 0.2121494282841074, "grad_norm": 0.41936761140823364, "learning_rate": 9.624603662449188e-06, "loss": 0.773, "step": 712 }, { "epoch": 0.21244739096428172, "grad_norm": 0.4399654269218445, "learning_rate": 9.622624485754104e-06, "loss": 0.8033, "step": 713 }, { "epoch": 0.21274535364445604, "grad_norm": 0.4517837464809418, "learning_rate": 9.620640310016036e-06, "loss": 0.7878, "step": 714 }, { "epoch": 0.21304331632463033, "grad_norm": 0.43587207794189453, "learning_rate": 9.618651137380729e-06, "loss": 0.7804, "step": 715 }, { "epoch": 0.21334127900480465, "grad_norm": 0.43398943543434143, "learning_rate": 9.616656969999334e-06, "loss": 0.8006, "step": 716 }, { "epoch": 0.21363924168497897, "grad_norm": 0.4055356979370117, "learning_rate": 9.614657810028402e-06, "loss": 0.7497, "step": 717 }, { "epoch": 0.21393720436515326, "grad_norm": 0.4483594000339508, "learning_rate": 9.612653659629884e-06, "loss": 0.767, "step": 718 }, { "epoch": 0.21423516704532758, "grad_norm": 0.4102337956428528, "learning_rate": 9.610644520971129e-06, "loss": 0.7865, "step": 719 }, { "epoch": 0.21453312972550187, "grad_norm": 0.4471098482608795, "learning_rate": 9.608630396224876e-06, "loss": 0.8297, "step": 720 }, { "epoch": 0.2148310924056762, "grad_norm": 0.43259644508361816, "learning_rate": 9.60661128756926e-06, "loss": 0.7794, "step": 721 }, { "epoch": 0.2151290550858505, "grad_norm": 0.42484912276268005, "learning_rate": 9.604587197187809e-06, "loss": 0.776, "step": 722 }, { "epoch": 0.2154270177660248, "grad_norm": 0.4235680401325226, "learning_rate": 9.60255812726943e-06, "loss": 0.7792, "step": 723 }, { "epoch": 0.21572498044619912, "grad_norm": 0.42460060119628906, "learning_rate": 9.60052408000842e-06, "loss": 0.803, "step": 724 }, { "epoch": 0.21602294312637343, "grad_norm": 0.4313143789768219, "learning_rate": 9.598485057604458e-06, "loss": 0.8066, "step": 725 }, { "epoch": 0.21632090580654773, "grad_norm": 0.4411279857158661, "learning_rate": 9.596441062262602e-06, "loss": 0.8148, "step": 726 }, { "epoch": 0.21661886848672204, "grad_norm": 0.4420432150363922, "learning_rate": 9.594392096193294e-06, "loss": 0.8216, "step": 727 }, { "epoch": 0.21691683116689633, "grad_norm": 0.416064977645874, "learning_rate": 9.59233816161234e-06, "loss": 0.7754, "step": 728 }, { "epoch": 0.21721479384707065, "grad_norm": 0.4199596345424652, "learning_rate": 9.590279260740932e-06, "loss": 0.7731, "step": 729 }, { "epoch": 0.21751275652724497, "grad_norm": 0.45451900362968445, "learning_rate": 9.58821539580562e-06, "loss": 0.8086, "step": 730 }, { "epoch": 0.21781071920741926, "grad_norm": 0.4234671890735626, "learning_rate": 9.586146569038332e-06, "loss": 0.7796, "step": 731 }, { "epoch": 0.21810868188759358, "grad_norm": 0.4232145845890045, "learning_rate": 9.58407278267636e-06, "loss": 0.7848, "step": 732 }, { "epoch": 0.2184066445677679, "grad_norm": 0.4316883683204651, "learning_rate": 9.581994038962356e-06, "loss": 0.856, "step": 733 }, { "epoch": 0.2187046072479422, "grad_norm": 0.418620228767395, "learning_rate": 9.579910340144335e-06, "loss": 0.7897, "step": 734 }, { "epoch": 0.2190025699281165, "grad_norm": 0.4266875386238098, "learning_rate": 9.57782168847567e-06, "loss": 0.8091, "step": 735 }, { "epoch": 0.2193005326082908, "grad_norm": 0.4251216650009155, "learning_rate": 9.575728086215093e-06, "loss": 0.7564, "step": 736 }, { "epoch": 0.21959849528846512, "grad_norm": 0.4177185893058777, "learning_rate": 9.573629535626685e-06, "loss": 0.7581, "step": 737 }, { "epoch": 0.21989645796863944, "grad_norm": 0.4171562194824219, "learning_rate": 9.571526038979883e-06, "loss": 0.7498, "step": 738 }, { "epoch": 0.22019442064881373, "grad_norm": 0.4368170201778412, "learning_rate": 9.56941759854947e-06, "loss": 0.7649, "step": 739 }, { "epoch": 0.22049238332898805, "grad_norm": 0.4155232310295105, "learning_rate": 9.567304216615574e-06, "loss": 0.7696, "step": 740 }, { "epoch": 0.22079034600916236, "grad_norm": 0.43209031224250793, "learning_rate": 9.565185895463669e-06, "loss": 0.7839, "step": 741 }, { "epoch": 0.22108830868933665, "grad_norm": 0.4308395981788635, "learning_rate": 9.563062637384574e-06, "loss": 0.8109, "step": 742 }, { "epoch": 0.22138627136951097, "grad_norm": 0.4467731714248657, "learning_rate": 9.560934444674438e-06, "loss": 0.762, "step": 743 }, { "epoch": 0.22168423404968526, "grad_norm": 0.4020759165287018, "learning_rate": 9.558801319634756e-06, "loss": 0.7837, "step": 744 }, { "epoch": 0.22198219672985958, "grad_norm": 0.44077128171920776, "learning_rate": 9.55666326457235e-06, "loss": 0.7307, "step": 745 }, { "epoch": 0.2222801594100339, "grad_norm": 0.41470280289649963, "learning_rate": 9.554520281799377e-06, "loss": 0.7745, "step": 746 }, { "epoch": 0.2225781220902082, "grad_norm": 0.45578980445861816, "learning_rate": 9.552372373633321e-06, "loss": 0.7452, "step": 747 }, { "epoch": 0.2228760847703825, "grad_norm": 0.4778141975402832, "learning_rate": 9.550219542396995e-06, "loss": 0.8343, "step": 748 }, { "epoch": 0.22317404745055683, "grad_norm": 0.4479955732822418, "learning_rate": 9.548061790418533e-06, "loss": 0.7938, "step": 749 }, { "epoch": 0.22347201013073112, "grad_norm": 0.4472423493862152, "learning_rate": 9.545899120031392e-06, "loss": 0.8185, "step": 750 }, { "epoch": 0.22376997281090544, "grad_norm": 0.43098342418670654, "learning_rate": 9.543731533574349e-06, "loss": 0.7369, "step": 751 }, { "epoch": 0.22406793549107976, "grad_norm": 0.4441945254802704, "learning_rate": 9.541559033391497e-06, "loss": 0.8144, "step": 752 }, { "epoch": 0.22436589817125405, "grad_norm": 0.4668506681919098, "learning_rate": 9.539381621832238e-06, "loss": 0.7816, "step": 753 }, { "epoch": 0.22466386085142837, "grad_norm": 0.4563528001308441, "learning_rate": 9.537199301251292e-06, "loss": 0.7993, "step": 754 }, { "epoch": 0.22496182353160266, "grad_norm": 0.46585187315940857, "learning_rate": 9.535012074008688e-06, "loss": 0.8185, "step": 755 }, { "epoch": 0.22525978621177697, "grad_norm": 0.4397961497306824, "learning_rate": 9.532819942469752e-06, "loss": 0.78, "step": 756 }, { "epoch": 0.2255577488919513, "grad_norm": 0.4402436912059784, "learning_rate": 9.530622909005125e-06, "loss": 0.7768, "step": 757 }, { "epoch": 0.22585571157212558, "grad_norm": 0.42605629563331604, "learning_rate": 9.52842097599074e-06, "loss": 0.7831, "step": 758 }, { "epoch": 0.2261536742522999, "grad_norm": 0.4295814335346222, "learning_rate": 9.526214145807837e-06, "loss": 0.7579, "step": 759 }, { "epoch": 0.22645163693247422, "grad_norm": 0.4337891936302185, "learning_rate": 9.524002420842944e-06, "loss": 0.7983, "step": 760 }, { "epoch": 0.2267495996126485, "grad_norm": 0.4422137141227722, "learning_rate": 9.521785803487888e-06, "loss": 0.7739, "step": 761 }, { "epoch": 0.22704756229282283, "grad_norm": 0.4624215364456177, "learning_rate": 9.519564296139784e-06, "loss": 0.7989, "step": 762 }, { "epoch": 0.22734552497299712, "grad_norm": 0.43990784883499146, "learning_rate": 9.517337901201035e-06, "loss": 0.7734, "step": 763 }, { "epoch": 0.22764348765317144, "grad_norm": 0.4662509262561798, "learning_rate": 9.51510662107933e-06, "loss": 0.7337, "step": 764 }, { "epoch": 0.22794145033334576, "grad_norm": 0.432779461145401, "learning_rate": 9.512870458187644e-06, "loss": 0.7488, "step": 765 }, { "epoch": 0.22823941301352005, "grad_norm": 0.43612611293792725, "learning_rate": 9.510629414944229e-06, "loss": 0.7623, "step": 766 }, { "epoch": 0.22853737569369437, "grad_norm": 0.43317270278930664, "learning_rate": 9.508383493772612e-06, "loss": 0.7859, "step": 767 }, { "epoch": 0.22883533837386869, "grad_norm": 0.4157412052154541, "learning_rate": 9.506132697101601e-06, "loss": 0.7705, "step": 768 }, { "epoch": 0.22913330105404298, "grad_norm": 0.4522346258163452, "learning_rate": 9.503877027365277e-06, "loss": 0.7669, "step": 769 }, { "epoch": 0.2294312637342173, "grad_norm": 0.421061247587204, "learning_rate": 9.501616487002985e-06, "loss": 0.7731, "step": 770 }, { "epoch": 0.22972922641439159, "grad_norm": 0.4325638711452484, "learning_rate": 9.49935107845934e-06, "loss": 0.7923, "step": 771 }, { "epoch": 0.2300271890945659, "grad_norm": 0.4344866871833801, "learning_rate": 9.497080804184225e-06, "loss": 0.7785, "step": 772 }, { "epoch": 0.23032515177474022, "grad_norm": 0.43878617882728577, "learning_rate": 9.494805666632776e-06, "loss": 0.8048, "step": 773 }, { "epoch": 0.2306231144549145, "grad_norm": 0.4464431703090668, "learning_rate": 9.4925256682654e-06, "loss": 0.8073, "step": 774 }, { "epoch": 0.23092107713508883, "grad_norm": 0.4230099022388458, "learning_rate": 9.490240811547751e-06, "loss": 0.7575, "step": 775 }, { "epoch": 0.23121903981526315, "grad_norm": 0.427174836397171, "learning_rate": 9.487951098950744e-06, "loss": 0.7523, "step": 776 }, { "epoch": 0.23151700249543744, "grad_norm": 0.4299585819244385, "learning_rate": 9.485656532950536e-06, "loss": 0.7831, "step": 777 }, { "epoch": 0.23181496517561176, "grad_norm": 0.4389987587928772, "learning_rate": 9.483357116028547e-06, "loss": 0.7697, "step": 778 }, { "epoch": 0.23211292785578605, "grad_norm": 0.42435556650161743, "learning_rate": 9.481052850671427e-06, "loss": 0.7929, "step": 779 }, { "epoch": 0.23241089053596037, "grad_norm": 0.4137531518936157, "learning_rate": 9.47874373937108e-06, "loss": 0.8013, "step": 780 }, { "epoch": 0.2327088532161347, "grad_norm": 0.44368651509284973, "learning_rate": 9.47642978462465e-06, "loss": 0.7884, "step": 781 }, { "epoch": 0.23300681589630898, "grad_norm": 0.44244545698165894, "learning_rate": 9.474110988934512e-06, "loss": 0.8247, "step": 782 }, { "epoch": 0.2333047785764833, "grad_norm": 0.44734129309654236, "learning_rate": 9.471787354808282e-06, "loss": 0.7893, "step": 783 }, { "epoch": 0.23360274125665761, "grad_norm": 0.4227079153060913, "learning_rate": 9.469458884758807e-06, "loss": 0.7755, "step": 784 }, { "epoch": 0.2339007039368319, "grad_norm": 0.41927456855773926, "learning_rate": 9.467125581304163e-06, "loss": 0.7707, "step": 785 }, { "epoch": 0.23419866661700622, "grad_norm": 0.425749272108078, "learning_rate": 9.464787446967652e-06, "loss": 0.8195, "step": 786 }, { "epoch": 0.23449662929718051, "grad_norm": 0.41660913825035095, "learning_rate": 9.462444484277804e-06, "loss": 0.7799, "step": 787 }, { "epoch": 0.23479459197735483, "grad_norm": 0.4416177570819855, "learning_rate": 9.460096695768367e-06, "loss": 0.8226, "step": 788 }, { "epoch": 0.23509255465752915, "grad_norm": 0.43158313632011414, "learning_rate": 9.45774408397831e-06, "loss": 0.805, "step": 789 }, { "epoch": 0.23539051733770344, "grad_norm": 0.4583197832107544, "learning_rate": 9.455386651451816e-06, "loss": 0.8215, "step": 790 }, { "epoch": 0.23568848001787776, "grad_norm": 0.45189374685287476, "learning_rate": 9.453024400738282e-06, "loss": 0.8024, "step": 791 }, { "epoch": 0.23598644269805208, "grad_norm": 0.44654136896133423, "learning_rate": 9.450657334392317e-06, "loss": 0.7515, "step": 792 }, { "epoch": 0.23628440537822637, "grad_norm": 0.43402862548828125, "learning_rate": 9.448285454973739e-06, "loss": 0.7531, "step": 793 }, { "epoch": 0.2365823680584007, "grad_norm": 0.4220675528049469, "learning_rate": 9.445908765047562e-06, "loss": 0.7749, "step": 794 }, { "epoch": 0.236880330738575, "grad_norm": 0.43355607986450195, "learning_rate": 9.443527267184015e-06, "loss": 0.8116, "step": 795 }, { "epoch": 0.2371782934187493, "grad_norm": 0.4237934648990631, "learning_rate": 9.441140963958515e-06, "loss": 0.7694, "step": 796 }, { "epoch": 0.23747625609892362, "grad_norm": 0.43690553307533264, "learning_rate": 9.438749857951687e-06, "loss": 0.8207, "step": 797 }, { "epoch": 0.2377742187790979, "grad_norm": 0.41167691349983215, "learning_rate": 9.43635395174934e-06, "loss": 0.7473, "step": 798 }, { "epoch": 0.23807218145927223, "grad_norm": 0.4328611493110657, "learning_rate": 9.433953247942478e-06, "loss": 0.8034, "step": 799 }, { "epoch": 0.23837014413944654, "grad_norm": 0.4350714087486267, "learning_rate": 9.431547749127295e-06, "loss": 0.7804, "step": 800 }, { "epoch": 0.23866810681962083, "grad_norm": 0.4180876314640045, "learning_rate": 9.429137457905166e-06, "loss": 0.7845, "step": 801 }, { "epoch": 0.23896606949979515, "grad_norm": 0.4411112368106842, "learning_rate": 9.426722376882654e-06, "loss": 0.7987, "step": 802 }, { "epoch": 0.23926403217996947, "grad_norm": 0.44115111231803894, "learning_rate": 9.424302508671497e-06, "loss": 0.7925, "step": 803 }, { "epoch": 0.23956199486014376, "grad_norm": 0.42536935210227966, "learning_rate": 9.421877855888615e-06, "loss": 0.838, "step": 804 }, { "epoch": 0.23985995754031808, "grad_norm": 0.4546675682067871, "learning_rate": 9.419448421156096e-06, "loss": 0.799, "step": 805 }, { "epoch": 0.24015792022049237, "grad_norm": 0.4298256039619446, "learning_rate": 9.417014207101202e-06, "loss": 0.7974, "step": 806 }, { "epoch": 0.2404558829006667, "grad_norm": 0.40863409638404846, "learning_rate": 9.41457521635637e-06, "loss": 0.7688, "step": 807 }, { "epoch": 0.240753845580841, "grad_norm": 0.412500262260437, "learning_rate": 9.41213145155919e-06, "loss": 0.7326, "step": 808 }, { "epoch": 0.2410518082610153, "grad_norm": 0.4555049538612366, "learning_rate": 9.409682915352427e-06, "loss": 0.8555, "step": 809 }, { "epoch": 0.24134977094118962, "grad_norm": 0.4487118422985077, "learning_rate": 9.407229610383996e-06, "loss": 0.7483, "step": 810 }, { "epoch": 0.24164773362136394, "grad_norm": 0.42939889430999756, "learning_rate": 9.404771539306978e-06, "loss": 0.7861, "step": 811 }, { "epoch": 0.24194569630153823, "grad_norm": 0.4451942443847656, "learning_rate": 9.4023087047796e-06, "loss": 0.7841, "step": 812 }, { "epoch": 0.24224365898171255, "grad_norm": 0.4249064326286316, "learning_rate": 9.399841109465246e-06, "loss": 0.7772, "step": 813 }, { "epoch": 0.24254162166188684, "grad_norm": 0.4324464797973633, "learning_rate": 9.397368756032445e-06, "loss": 0.7329, "step": 814 }, { "epoch": 0.24283958434206115, "grad_norm": 0.43788713216781616, "learning_rate": 9.394891647154879e-06, "loss": 0.7951, "step": 815 }, { "epoch": 0.24313754702223547, "grad_norm": 0.43921443819999695, "learning_rate": 9.392409785511358e-06, "loss": 0.7611, "step": 816 }, { "epoch": 0.24343550970240976, "grad_norm": 0.43168798089027405, "learning_rate": 9.389923173785847e-06, "loss": 0.7818, "step": 817 }, { "epoch": 0.24373347238258408, "grad_norm": 0.4548880159854889, "learning_rate": 9.38743181466744e-06, "loss": 0.7766, "step": 818 }, { "epoch": 0.2440314350627584, "grad_norm": 0.41462671756744385, "learning_rate": 9.384935710850364e-06, "loss": 0.7705, "step": 819 }, { "epoch": 0.2443293977429327, "grad_norm": 0.43719160556793213, "learning_rate": 9.382434865033985e-06, "loss": 0.8107, "step": 820 }, { "epoch": 0.244627360423107, "grad_norm": 0.4376414716243744, "learning_rate": 9.379929279922785e-06, "loss": 0.8114, "step": 821 }, { "epoch": 0.2449253231032813, "grad_norm": 0.4218818247318268, "learning_rate": 9.377418958226385e-06, "loss": 0.8041, "step": 822 }, { "epoch": 0.24522328578345562, "grad_norm": 0.4311424791812897, "learning_rate": 9.374903902659516e-06, "loss": 0.8029, "step": 823 }, { "epoch": 0.24552124846362994, "grad_norm": 0.4447738528251648, "learning_rate": 9.372384115942034e-06, "loss": 0.7575, "step": 824 }, { "epoch": 0.24581921114380423, "grad_norm": 0.4226698577404022, "learning_rate": 9.369859600798914e-06, "loss": 0.7984, "step": 825 }, { "epoch": 0.24611717382397855, "grad_norm": 0.4459143280982971, "learning_rate": 9.367330359960239e-06, "loss": 0.7921, "step": 826 }, { "epoch": 0.24641513650415287, "grad_norm": 0.4613502621650696, "learning_rate": 9.364796396161207e-06, "loss": 0.7864, "step": 827 }, { "epoch": 0.24671309918432716, "grad_norm": 0.4165995121002197, "learning_rate": 9.362257712142118e-06, "loss": 0.7657, "step": 828 }, { "epoch": 0.24701106186450147, "grad_norm": 0.43035662174224854, "learning_rate": 9.359714310648383e-06, "loss": 0.7977, "step": 829 }, { "epoch": 0.2473090245446758, "grad_norm": 0.4222116768360138, "learning_rate": 9.357166194430509e-06, "loss": 0.7441, "step": 830 }, { "epoch": 0.24760698722485008, "grad_norm": 0.4269767999649048, "learning_rate": 9.354613366244108e-06, "loss": 0.7722, "step": 831 }, { "epoch": 0.2479049499050244, "grad_norm": 0.42692193388938904, "learning_rate": 9.352055828849879e-06, "loss": 0.7902, "step": 832 }, { "epoch": 0.2482029125851987, "grad_norm": 0.4122476577758789, "learning_rate": 9.349493585013625e-06, "loss": 0.7554, "step": 833 }, { "epoch": 0.248500875265373, "grad_norm": 0.4117843210697174, "learning_rate": 9.346926637506229e-06, "loss": 0.7574, "step": 834 }, { "epoch": 0.24879883794554733, "grad_norm": 0.45276376605033875, "learning_rate": 9.344354989103662e-06, "loss": 0.8091, "step": 835 }, { "epoch": 0.24909680062572162, "grad_norm": 0.43268904089927673, "learning_rate": 9.341778642586984e-06, "loss": 0.7779, "step": 836 }, { "epoch": 0.24939476330589594, "grad_norm": 0.43194571137428284, "learning_rate": 9.339197600742331e-06, "loss": 0.7645, "step": 837 }, { "epoch": 0.24969272598607026, "grad_norm": 0.4139356017112732, "learning_rate": 9.33661186636092e-06, "loss": 0.7613, "step": 838 }, { "epoch": 0.24999068866624455, "grad_norm": 0.43096089363098145, "learning_rate": 9.334021442239036e-06, "loss": 0.7913, "step": 839 }, { "epoch": 0.25028865134641887, "grad_norm": 0.43842166662216187, "learning_rate": 9.331426331178044e-06, "loss": 0.8234, "step": 840 }, { "epoch": 0.25058661402659316, "grad_norm": 0.4145195186138153, "learning_rate": 9.328826535984374e-06, "loss": 0.7251, "step": 841 }, { "epoch": 0.2508845767067675, "grad_norm": 0.4230673611164093, "learning_rate": 9.32622205946952e-06, "loss": 0.776, "step": 842 }, { "epoch": 0.2511825393869418, "grad_norm": 0.43353500962257385, "learning_rate": 9.32361290445004e-06, "loss": 0.7712, "step": 843 }, { "epoch": 0.2514805020671161, "grad_norm": 0.43357017636299133, "learning_rate": 9.320999073747557e-06, "loss": 0.7598, "step": 844 }, { "epoch": 0.2517784647472904, "grad_norm": 0.4321240186691284, "learning_rate": 9.318380570188735e-06, "loss": 0.7898, "step": 845 }, { "epoch": 0.2520764274274647, "grad_norm": 0.4369968771934509, "learning_rate": 9.315757396605309e-06, "loss": 0.7819, "step": 846 }, { "epoch": 0.252374390107639, "grad_norm": 0.45207393169403076, "learning_rate": 9.313129555834053e-06, "loss": 0.8038, "step": 847 }, { "epoch": 0.2526723527878133, "grad_norm": 0.43222910165786743, "learning_rate": 9.310497050716794e-06, "loss": 0.7921, "step": 848 }, { "epoch": 0.25297031546798765, "grad_norm": 0.41990602016448975, "learning_rate": 9.307859884100399e-06, "loss": 0.7962, "step": 849 }, { "epoch": 0.25326827814816194, "grad_norm": 0.42566487193107605, "learning_rate": 9.305218058836778e-06, "loss": 0.7696, "step": 850 }, { "epoch": 0.25356624082833623, "grad_norm": 0.4148566424846649, "learning_rate": 9.302571577782881e-06, "loss": 0.7435, "step": 851 }, { "epoch": 0.2538642035085106, "grad_norm": 0.4349095821380615, "learning_rate": 9.29992044380069e-06, "loss": 0.7844, "step": 852 }, { "epoch": 0.25416216618868487, "grad_norm": 0.43746015429496765, "learning_rate": 9.297264659757218e-06, "loss": 0.7827, "step": 853 }, { "epoch": 0.25446012886885916, "grad_norm": 0.42558470368385315, "learning_rate": 9.294604228524514e-06, "loss": 0.7282, "step": 854 }, { "epoch": 0.2547580915490335, "grad_norm": 0.40940043330192566, "learning_rate": 9.29193915297964e-06, "loss": 0.7429, "step": 855 }, { "epoch": 0.2550560542292078, "grad_norm": 0.43153926730155945, "learning_rate": 9.289269436004692e-06, "loss": 0.7809, "step": 856 }, { "epoch": 0.2553540169093821, "grad_norm": 0.44316428899765015, "learning_rate": 9.28659508048678e-06, "loss": 0.7876, "step": 857 }, { "epoch": 0.25565197958955643, "grad_norm": 0.435019314289093, "learning_rate": 9.28391608931803e-06, "loss": 0.7773, "step": 858 }, { "epoch": 0.2559499422697307, "grad_norm": 0.45403623580932617, "learning_rate": 9.281232465395584e-06, "loss": 0.7903, "step": 859 }, { "epoch": 0.256247904949905, "grad_norm": 0.40584635734558105, "learning_rate": 9.278544211621593e-06, "loss": 0.7183, "step": 860 }, { "epoch": 0.2565458676300793, "grad_norm": 0.40424615144729614, "learning_rate": 9.275851330903212e-06, "loss": 0.7044, "step": 861 }, { "epoch": 0.25684383031025365, "grad_norm": 0.4259452223777771, "learning_rate": 9.273153826152604e-06, "loss": 0.8228, "step": 862 }, { "epoch": 0.25714179299042794, "grad_norm": 0.43459808826446533, "learning_rate": 9.270451700286928e-06, "loss": 0.7089, "step": 863 }, { "epoch": 0.25743975567060223, "grad_norm": 0.4281349182128906, "learning_rate": 9.267744956228347e-06, "loss": 0.7967, "step": 864 }, { "epoch": 0.2577377183507766, "grad_norm": 0.45212414860725403, "learning_rate": 9.26503359690401e-06, "loss": 0.8149, "step": 865 }, { "epoch": 0.25803568103095087, "grad_norm": 0.4262511730194092, "learning_rate": 9.262317625246061e-06, "loss": 0.7652, "step": 866 }, { "epoch": 0.25833364371112516, "grad_norm": 0.4217842221260071, "learning_rate": 9.259597044191635e-06, "loss": 0.7651, "step": 867 }, { "epoch": 0.2586316063912995, "grad_norm": 0.43619224429130554, "learning_rate": 9.25687185668285e-06, "loss": 0.7734, "step": 868 }, { "epoch": 0.2589295690714738, "grad_norm": 0.4308563768863678, "learning_rate": 9.254142065666802e-06, "loss": 0.8017, "step": 869 }, { "epoch": 0.2592275317516481, "grad_norm": 0.4390491545200348, "learning_rate": 9.251407674095565e-06, "loss": 0.7897, "step": 870 }, { "epoch": 0.25952549443182243, "grad_norm": 0.4305284023284912, "learning_rate": 9.248668684926199e-06, "loss": 0.7986, "step": 871 }, { "epoch": 0.2598234571119967, "grad_norm": 0.4587078392505646, "learning_rate": 9.24592510112072e-06, "loss": 0.7804, "step": 872 }, { "epoch": 0.260121419792171, "grad_norm": 0.3946491777896881, "learning_rate": 9.243176925646125e-06, "loss": 0.7586, "step": 873 }, { "epoch": 0.26041938247234536, "grad_norm": 0.42059147357940674, "learning_rate": 9.24042416147437e-06, "loss": 0.7567, "step": 874 }, { "epoch": 0.26071734515251965, "grad_norm": 0.43899062275886536, "learning_rate": 9.237666811582377e-06, "loss": 0.7887, "step": 875 }, { "epoch": 0.26101530783269394, "grad_norm": 0.4542399048805237, "learning_rate": 9.234904878952026e-06, "loss": 0.7744, "step": 876 }, { "epoch": 0.2613132705128683, "grad_norm": 0.4268796741962433, "learning_rate": 9.232138366570154e-06, "loss": 0.7336, "step": 877 }, { "epoch": 0.2616112331930426, "grad_norm": 0.4323793351650238, "learning_rate": 9.229367277428547e-06, "loss": 0.7321, "step": 878 }, { "epoch": 0.26190919587321687, "grad_norm": 0.424514502286911, "learning_rate": 9.226591614523944e-06, "loss": 0.8011, "step": 879 }, { "epoch": 0.26220715855339116, "grad_norm": 0.42081788182258606, "learning_rate": 9.223811380858029e-06, "loss": 0.8084, "step": 880 }, { "epoch": 0.2625051212335655, "grad_norm": 0.4073812663555145, "learning_rate": 9.22102657943743e-06, "loss": 0.7337, "step": 881 }, { "epoch": 0.2628030839137398, "grad_norm": 0.4527837932109833, "learning_rate": 9.218237213273708e-06, "loss": 0.7865, "step": 882 }, { "epoch": 0.2631010465939141, "grad_norm": 0.4368094801902771, "learning_rate": 9.215443285383375e-06, "loss": 0.79, "step": 883 }, { "epoch": 0.26339900927408844, "grad_norm": 0.4208792746067047, "learning_rate": 9.21264479878786e-06, "loss": 0.7982, "step": 884 }, { "epoch": 0.2636969719542627, "grad_norm": 0.4094206988811493, "learning_rate": 9.209841756513535e-06, "loss": 0.7542, "step": 885 }, { "epoch": 0.263994934634437, "grad_norm": 0.44005078077316284, "learning_rate": 9.207034161591689e-06, "loss": 0.7995, "step": 886 }, { "epoch": 0.26429289731461136, "grad_norm": 0.41605010628700256, "learning_rate": 9.20422201705854e-06, "loss": 0.7476, "step": 887 }, { "epoch": 0.26459085999478565, "grad_norm": 0.43873661756515503, "learning_rate": 9.201405325955222e-06, "loss": 0.7842, "step": 888 }, { "epoch": 0.26488882267495995, "grad_norm": 0.4268430769443512, "learning_rate": 9.198584091327792e-06, "loss": 0.8257, "step": 889 }, { "epoch": 0.2651867853551343, "grad_norm": 0.41748034954071045, "learning_rate": 9.195758316227212e-06, "loss": 0.7608, "step": 890 }, { "epoch": 0.2654847480353086, "grad_norm": 0.42829155921936035, "learning_rate": 9.192928003709365e-06, "loss": 0.7829, "step": 891 }, { "epoch": 0.2657827107154829, "grad_norm": 0.42252838611602783, "learning_rate": 9.19009315683503e-06, "loss": 0.7527, "step": 892 }, { "epoch": 0.2660806733956572, "grad_norm": 0.4455977976322174, "learning_rate": 9.187253778669893e-06, "loss": 0.8133, "step": 893 }, { "epoch": 0.2663786360758315, "grad_norm": 0.41617825627326965, "learning_rate": 9.184409872284547e-06, "loss": 0.8074, "step": 894 }, { "epoch": 0.2666765987560058, "grad_norm": 0.42229339480400085, "learning_rate": 9.181561440754474e-06, "loss": 0.759, "step": 895 }, { "epoch": 0.2669745614361801, "grad_norm": 0.4231303334236145, "learning_rate": 9.17870848716005e-06, "loss": 0.761, "step": 896 }, { "epoch": 0.26727252411635444, "grad_norm": 0.4557526111602783, "learning_rate": 9.175851014586545e-06, "loss": 0.7976, "step": 897 }, { "epoch": 0.26757048679652873, "grad_norm": 0.4390769302845001, "learning_rate": 9.172989026124117e-06, "loss": 0.7469, "step": 898 }, { "epoch": 0.267868449476703, "grad_norm": 0.4452618956565857, "learning_rate": 9.170122524867802e-06, "loss": 0.7984, "step": 899 }, { "epoch": 0.26816641215687737, "grad_norm": 0.43199318647384644, "learning_rate": 9.16725151391752e-06, "loss": 0.8091, "step": 900 }, { "epoch": 0.26846437483705166, "grad_norm": 0.43193402886390686, "learning_rate": 9.16437599637807e-06, "loss": 0.7346, "step": 901 }, { "epoch": 0.26876233751722595, "grad_norm": 0.4428356885910034, "learning_rate": 9.161495975359116e-06, "loss": 0.8055, "step": 902 }, { "epoch": 0.2690603001974003, "grad_norm": 0.41652411222457886, "learning_rate": 9.158611453975203e-06, "loss": 0.7706, "step": 903 }, { "epoch": 0.2693582628775746, "grad_norm": 0.398971825838089, "learning_rate": 9.155722435345736e-06, "loss": 0.7612, "step": 904 }, { "epoch": 0.2696562255577489, "grad_norm": 0.41312122344970703, "learning_rate": 9.152828922594984e-06, "loss": 0.7395, "step": 905 }, { "epoch": 0.2699541882379232, "grad_norm": 0.42090773582458496, "learning_rate": 9.149930918852079e-06, "loss": 0.7502, "step": 906 }, { "epoch": 0.2702521509180975, "grad_norm": 0.4145689010620117, "learning_rate": 9.14702842725101e-06, "loss": 0.7173, "step": 907 }, { "epoch": 0.2705501135982718, "grad_norm": 0.39954444766044617, "learning_rate": 9.144121450930614e-06, "loss": 0.7615, "step": 908 }, { "epoch": 0.27084807627844615, "grad_norm": 0.4271208941936493, "learning_rate": 9.141209993034583e-06, "loss": 0.8106, "step": 909 }, { "epoch": 0.27114603895862044, "grad_norm": 0.4281978905200958, "learning_rate": 9.138294056711452e-06, "loss": 0.822, "step": 910 }, { "epoch": 0.27144400163879473, "grad_norm": 0.424578458070755, "learning_rate": 9.135373645114603e-06, "loss": 0.7777, "step": 911 }, { "epoch": 0.2717419643189691, "grad_norm": 0.4227428436279297, "learning_rate": 9.132448761402254e-06, "loss": 0.7059, "step": 912 }, { "epoch": 0.27203992699914337, "grad_norm": 0.4126034379005432, "learning_rate": 9.129519408737461e-06, "loss": 0.7988, "step": 913 }, { "epoch": 0.27233788967931766, "grad_norm": 0.4289577603340149, "learning_rate": 9.126585590288115e-06, "loss": 0.7926, "step": 914 }, { "epoch": 0.27263585235949195, "grad_norm": 0.4278191924095154, "learning_rate": 9.123647309226932e-06, "loss": 0.7608, "step": 915 }, { "epoch": 0.2729338150396663, "grad_norm": 0.42958348989486694, "learning_rate": 9.120704568731455e-06, "loss": 0.7819, "step": 916 }, { "epoch": 0.2732317777198406, "grad_norm": 0.43810129165649414, "learning_rate": 9.117757371984053e-06, "loss": 0.7723, "step": 917 }, { "epoch": 0.2735297404000149, "grad_norm": 0.43591946363449097, "learning_rate": 9.114805722171912e-06, "loss": 0.786, "step": 918 }, { "epoch": 0.2738277030801892, "grad_norm": 0.41927027702331543, "learning_rate": 9.111849622487032e-06, "loss": 0.7662, "step": 919 }, { "epoch": 0.2741256657603635, "grad_norm": 0.412334680557251, "learning_rate": 9.108889076126226e-06, "loss": 0.7426, "step": 920 }, { "epoch": 0.2744236284405378, "grad_norm": 0.43852120637893677, "learning_rate": 9.105924086291118e-06, "loss": 0.8548, "step": 921 }, { "epoch": 0.27472159112071215, "grad_norm": 0.4445139467716217, "learning_rate": 9.102954656188138e-06, "loss": 0.7758, "step": 922 }, { "epoch": 0.27501955380088644, "grad_norm": 0.46849504113197327, "learning_rate": 9.09998078902851e-06, "loss": 0.8096, "step": 923 }, { "epoch": 0.27531751648106073, "grad_norm": 0.4167250096797943, "learning_rate": 9.097002488028268e-06, "loss": 0.8019, "step": 924 }, { "epoch": 0.2756154791612351, "grad_norm": 0.43559810519218445, "learning_rate": 9.09401975640823e-06, "loss": 0.7536, "step": 925 }, { "epoch": 0.27591344184140937, "grad_norm": 0.4368812143802643, "learning_rate": 9.091032597394012e-06, "loss": 0.7998, "step": 926 }, { "epoch": 0.27621140452158366, "grad_norm": 0.424889475107193, "learning_rate": 9.088041014216019e-06, "loss": 0.8002, "step": 927 }, { "epoch": 0.276509367201758, "grad_norm": 0.436631441116333, "learning_rate": 9.085045010109433e-06, "loss": 0.7772, "step": 928 }, { "epoch": 0.2768073298819323, "grad_norm": 0.4361373484134674, "learning_rate": 9.082044588314224e-06, "loss": 0.777, "step": 929 }, { "epoch": 0.2771052925621066, "grad_norm": 0.43156692385673523, "learning_rate": 9.079039752075137e-06, "loss": 0.7728, "step": 930 }, { "epoch": 0.2774032552422809, "grad_norm": 0.42506295442581177, "learning_rate": 9.07603050464169e-06, "loss": 0.7588, "step": 931 }, { "epoch": 0.2777012179224552, "grad_norm": 0.42249932885169983, "learning_rate": 9.073016849268172e-06, "loss": 0.7871, "step": 932 }, { "epoch": 0.2779991806026295, "grad_norm": 0.43025729060173035, "learning_rate": 9.069998789213644e-06, "loss": 0.7543, "step": 933 }, { "epoch": 0.2782971432828038, "grad_norm": 0.41047433018684387, "learning_rate": 9.066976327741917e-06, "loss": 0.7721, "step": 934 }, { "epoch": 0.27859510596297815, "grad_norm": 0.43525633215904236, "learning_rate": 9.063949468121576e-06, "loss": 0.8204, "step": 935 }, { "epoch": 0.27889306864315244, "grad_norm": 0.41752514243125916, "learning_rate": 9.060918213625957e-06, "loss": 0.7804, "step": 936 }, { "epoch": 0.27919103132332673, "grad_norm": 0.41439223289489746, "learning_rate": 9.057882567533145e-06, "loss": 0.8042, "step": 937 }, { "epoch": 0.2794889940035011, "grad_norm": 0.4178325831890106, "learning_rate": 9.054842533125981e-06, "loss": 0.7772, "step": 938 }, { "epoch": 0.27978695668367537, "grad_norm": 0.42388367652893066, "learning_rate": 9.051798113692043e-06, "loss": 0.7701, "step": 939 }, { "epoch": 0.28008491936384966, "grad_norm": 0.4107518792152405, "learning_rate": 9.048749312523664e-06, "loss": 0.7838, "step": 940 }, { "epoch": 0.280382882044024, "grad_norm": 0.4476031959056854, "learning_rate": 9.0456961329179e-06, "loss": 0.8054, "step": 941 }, { "epoch": 0.2806808447241983, "grad_norm": 0.412369042634964, "learning_rate": 9.042638578176558e-06, "loss": 0.7573, "step": 942 }, { "epoch": 0.2809788074043726, "grad_norm": 0.4082777500152588, "learning_rate": 9.03957665160616e-06, "loss": 0.7895, "step": 943 }, { "epoch": 0.28127677008454693, "grad_norm": 0.44257837533950806, "learning_rate": 9.03651035651797e-06, "loss": 0.7842, "step": 944 }, { "epoch": 0.2815747327647212, "grad_norm": 0.43410831689834595, "learning_rate": 9.033439696227966e-06, "loss": 0.7251, "step": 945 }, { "epoch": 0.2818726954448955, "grad_norm": 0.4205782413482666, "learning_rate": 9.030364674056853e-06, "loss": 0.7386, "step": 946 }, { "epoch": 0.28217065812506986, "grad_norm": 0.42461472749710083, "learning_rate": 9.027285293330052e-06, "loss": 0.7763, "step": 947 }, { "epoch": 0.28246862080524415, "grad_norm": 0.4243285655975342, "learning_rate": 9.024201557377697e-06, "loss": 0.7627, "step": 948 }, { "epoch": 0.28276658348541844, "grad_norm": 0.4105435907840729, "learning_rate": 9.021113469534628e-06, "loss": 0.74, "step": 949 }, { "epoch": 0.28306454616559273, "grad_norm": 0.41979560256004333, "learning_rate": 9.018021033140398e-06, "loss": 0.7814, "step": 950 }, { "epoch": 0.2833625088457671, "grad_norm": 0.4249080717563629, "learning_rate": 9.014924251539256e-06, "loss": 0.7588, "step": 951 }, { "epoch": 0.28366047152594137, "grad_norm": 0.40093064308166504, "learning_rate": 9.011823128080157e-06, "loss": 0.7548, "step": 952 }, { "epoch": 0.28395843420611566, "grad_norm": 0.42459458112716675, "learning_rate": 9.008717666116744e-06, "loss": 0.8018, "step": 953 }, { "epoch": 0.28425639688629, "grad_norm": 0.425436407327652, "learning_rate": 9.005607869007358e-06, "loss": 0.7903, "step": 954 }, { "epoch": 0.2845543595664643, "grad_norm": 0.44178900122642517, "learning_rate": 9.002493740115026e-06, "loss": 0.8043, "step": 955 }, { "epoch": 0.2848523222466386, "grad_norm": 0.4691467583179474, "learning_rate": 8.99937528280746e-06, "loss": 0.8411, "step": 956 }, { "epoch": 0.28515028492681294, "grad_norm": 0.43360698223114014, "learning_rate": 8.996252500457046e-06, "loss": 0.7763, "step": 957 }, { "epoch": 0.2854482476069872, "grad_norm": 0.43464329838752747, "learning_rate": 8.99312539644086e-06, "loss": 0.8087, "step": 958 }, { "epoch": 0.2857462102871615, "grad_norm": 0.47156545519828796, "learning_rate": 8.98999397414064e-06, "loss": 0.863, "step": 959 }, { "epoch": 0.28604417296733586, "grad_norm": 0.4277881681919098, "learning_rate": 8.986858236942804e-06, "loss": 0.8069, "step": 960 }, { "epoch": 0.28634213564751015, "grad_norm": 0.4219008684158325, "learning_rate": 8.983718188238428e-06, "loss": 0.7834, "step": 961 }, { "epoch": 0.28664009832768444, "grad_norm": 0.42039385437965393, "learning_rate": 8.980573831423253e-06, "loss": 0.7816, "step": 962 }, { "epoch": 0.2869380610078588, "grad_norm": 0.4653097987174988, "learning_rate": 8.97742516989768e-06, "loss": 0.7487, "step": 963 }, { "epoch": 0.2872360236880331, "grad_norm": 0.4438186585903168, "learning_rate": 8.974272207066767e-06, "loss": 0.7729, "step": 964 }, { "epoch": 0.2875339863682074, "grad_norm": 0.42774060368537903, "learning_rate": 8.97111494634022e-06, "loss": 0.8071, "step": 965 }, { "epoch": 0.28783194904838166, "grad_norm": 0.40935760736465454, "learning_rate": 8.96795339113239e-06, "loss": 0.7585, "step": 966 }, { "epoch": 0.288129911728556, "grad_norm": 0.44249454140663147, "learning_rate": 8.964787544862285e-06, "loss": 0.7787, "step": 967 }, { "epoch": 0.2884278744087303, "grad_norm": 0.42188560962677, "learning_rate": 8.961617410953537e-06, "loss": 0.7275, "step": 968 }, { "epoch": 0.2887258370889046, "grad_norm": 0.42883557081222534, "learning_rate": 8.958442992834428e-06, "loss": 0.7552, "step": 969 }, { "epoch": 0.28902379976907894, "grad_norm": 0.42095375061035156, "learning_rate": 8.955264293937865e-06, "loss": 0.7141, "step": 970 }, { "epoch": 0.28932176244925323, "grad_norm": 0.42243123054504395, "learning_rate": 8.952081317701386e-06, "loss": 0.7697, "step": 971 }, { "epoch": 0.2896197251294275, "grad_norm": 0.4220430552959442, "learning_rate": 8.94889406756716e-06, "loss": 0.7733, "step": 972 }, { "epoch": 0.28991768780960187, "grad_norm": 0.41057878732681274, "learning_rate": 8.94570254698197e-06, "loss": 0.771, "step": 973 }, { "epoch": 0.29021565048977616, "grad_norm": 0.41766127943992615, "learning_rate": 8.94250675939722e-06, "loss": 0.7669, "step": 974 }, { "epoch": 0.29051361316995045, "grad_norm": 0.43557193875312805, "learning_rate": 8.939306708268934e-06, "loss": 0.7552, "step": 975 }, { "epoch": 0.2908115758501248, "grad_norm": 0.4357999265193939, "learning_rate": 8.936102397057737e-06, "loss": 0.7948, "step": 976 }, { "epoch": 0.2911095385302991, "grad_norm": 0.4122772812843323, "learning_rate": 8.93289382922887e-06, "loss": 0.7368, "step": 977 }, { "epoch": 0.2914075012104734, "grad_norm": 0.4376829266548157, "learning_rate": 8.929681008252171e-06, "loss": 0.7996, "step": 978 }, { "epoch": 0.2917054638906477, "grad_norm": 0.4416571259498596, "learning_rate": 8.926463937602081e-06, "loss": 0.8093, "step": 979 }, { "epoch": 0.292003426570822, "grad_norm": 0.44324541091918945, "learning_rate": 8.923242620757634e-06, "loss": 0.758, "step": 980 }, { "epoch": 0.2923013892509963, "grad_norm": 0.4390997886657715, "learning_rate": 8.920017061202458e-06, "loss": 0.7548, "step": 981 }, { "epoch": 0.29259935193117065, "grad_norm": 0.4270910918712616, "learning_rate": 8.916787262424768e-06, "loss": 0.7457, "step": 982 }, { "epoch": 0.29289731461134494, "grad_norm": 0.436987966299057, "learning_rate": 8.913553227917366e-06, "loss": 0.8104, "step": 983 }, { "epoch": 0.29319527729151923, "grad_norm": 0.4338602125644684, "learning_rate": 8.910314961177633e-06, "loss": 0.7614, "step": 984 }, { "epoch": 0.2934932399716935, "grad_norm": 0.457612544298172, "learning_rate": 8.907072465707522e-06, "loss": 0.7978, "step": 985 }, { "epoch": 0.29379120265186787, "grad_norm": 0.42325571179389954, "learning_rate": 8.90382574501357e-06, "loss": 0.7897, "step": 986 }, { "epoch": 0.29408916533204216, "grad_norm": 0.4008309543132782, "learning_rate": 8.90057480260687e-06, "loss": 0.762, "step": 987 }, { "epoch": 0.29438712801221645, "grad_norm": 0.4499809145927429, "learning_rate": 8.897319642003092e-06, "loss": 0.7651, "step": 988 }, { "epoch": 0.2946850906923908, "grad_norm": 0.41584426164627075, "learning_rate": 8.894060266722461e-06, "loss": 0.7373, "step": 989 }, { "epoch": 0.2949830533725651, "grad_norm": 0.4230383336544037, "learning_rate": 8.890796680289767e-06, "loss": 0.7966, "step": 990 }, { "epoch": 0.2952810160527394, "grad_norm": 0.40172526240348816, "learning_rate": 8.88752888623434e-06, "loss": 0.7453, "step": 991 }, { "epoch": 0.2955789787329137, "grad_norm": 0.4164963960647583, "learning_rate": 8.884256888090076e-06, "loss": 0.7575, "step": 992 }, { "epoch": 0.295876941413088, "grad_norm": 0.43570059537887573, "learning_rate": 8.880980689395408e-06, "loss": 0.7848, "step": 993 }, { "epoch": 0.2961749040932623, "grad_norm": 0.4313972592353821, "learning_rate": 8.877700293693316e-06, "loss": 0.7565, "step": 994 }, { "epoch": 0.29647286677343665, "grad_norm": 0.43945807218551636, "learning_rate": 8.874415704531316e-06, "loss": 0.7986, "step": 995 }, { "epoch": 0.29677082945361094, "grad_norm": 0.4161829948425293, "learning_rate": 8.871126925461459e-06, "loss": 0.795, "step": 996 }, { "epoch": 0.29706879213378523, "grad_norm": 0.4219336211681366, "learning_rate": 8.867833960040331e-06, "loss": 0.8043, "step": 997 }, { "epoch": 0.2973667548139596, "grad_norm": 0.42536213994026184, "learning_rate": 8.864536811829038e-06, "loss": 0.7994, "step": 998 }, { "epoch": 0.29766471749413387, "grad_norm": 0.4447495639324188, "learning_rate": 8.861235484393218e-06, "loss": 0.7554, "step": 999 }, { "epoch": 0.29796268017430816, "grad_norm": 0.43489551544189453, "learning_rate": 8.857929981303022e-06, "loss": 0.8059, "step": 1000 }, { "epoch": 0.29826064285448245, "grad_norm": 0.43018990755081177, "learning_rate": 8.854620306133118e-06, "loss": 0.774, "step": 1001 }, { "epoch": 0.2985586055346568, "grad_norm": 0.4343620836734772, "learning_rate": 8.851306462462689e-06, "loss": 0.7731, "step": 1002 }, { "epoch": 0.2988565682148311, "grad_norm": 0.4382462203502655, "learning_rate": 8.847988453875423e-06, "loss": 0.7903, "step": 1003 }, { "epoch": 0.2991545308950054, "grad_norm": 0.4331133961677551, "learning_rate": 8.84466628395951e-06, "loss": 0.8021, "step": 1004 }, { "epoch": 0.2994524935751797, "grad_norm": 0.4116860330104828, "learning_rate": 8.841339956307647e-06, "loss": 0.7148, "step": 1005 }, { "epoch": 0.299750456255354, "grad_norm": 0.4209303557872772, "learning_rate": 8.838009474517022e-06, "loss": 0.7646, "step": 1006 }, { "epoch": 0.3000484189355283, "grad_norm": 0.4389602243900299, "learning_rate": 8.834674842189314e-06, "loss": 0.7725, "step": 1007 }, { "epoch": 0.30034638161570265, "grad_norm": 0.4069165587425232, "learning_rate": 8.831336062930697e-06, "loss": 0.7569, "step": 1008 }, { "epoch": 0.30064434429587694, "grad_norm": 0.4373205602169037, "learning_rate": 8.827993140351825e-06, "loss": 0.7413, "step": 1009 }, { "epoch": 0.30094230697605123, "grad_norm": 0.4289036989212036, "learning_rate": 8.824646078067831e-06, "loss": 0.7664, "step": 1010 }, { "epoch": 0.3012402696562256, "grad_norm": 0.43298622965812683, "learning_rate": 8.821294879698327e-06, "loss": 0.7754, "step": 1011 }, { "epoch": 0.30153823233639987, "grad_norm": 0.41397884488105774, "learning_rate": 8.817939548867403e-06, "loss": 0.7657, "step": 1012 }, { "epoch": 0.30183619501657416, "grad_norm": 0.43379777669906616, "learning_rate": 8.814580089203608e-06, "loss": 0.7813, "step": 1013 }, { "epoch": 0.3021341576967485, "grad_norm": 0.4450131356716156, "learning_rate": 8.811216504339963e-06, "loss": 0.7755, "step": 1014 }, { "epoch": 0.3024321203769228, "grad_norm": 0.43981751799583435, "learning_rate": 8.807848797913949e-06, "loss": 0.78, "step": 1015 }, { "epoch": 0.3027300830570971, "grad_norm": 0.41197219491004944, "learning_rate": 8.804476973567502e-06, "loss": 0.7442, "step": 1016 }, { "epoch": 0.30302804573727143, "grad_norm": 0.42295488715171814, "learning_rate": 8.801101034947015e-06, "loss": 0.7764, "step": 1017 }, { "epoch": 0.3033260084174457, "grad_norm": 0.44448795914649963, "learning_rate": 8.797720985703323e-06, "loss": 0.7724, "step": 1018 }, { "epoch": 0.30362397109762, "grad_norm": 0.4202626347541809, "learning_rate": 8.794336829491718e-06, "loss": 0.7451, "step": 1019 }, { "epoch": 0.3039219337777943, "grad_norm": 0.4201606214046478, "learning_rate": 8.790948569971921e-06, "loss": 0.785, "step": 1020 }, { "epoch": 0.30421989645796865, "grad_norm": 0.4151608347892761, "learning_rate": 8.787556210808101e-06, "loss": 0.762, "step": 1021 }, { "epoch": 0.30451785913814294, "grad_norm": 0.4044322371482849, "learning_rate": 8.784159755668852e-06, "loss": 0.7488, "step": 1022 }, { "epoch": 0.30481582181831723, "grad_norm": 0.41898661851882935, "learning_rate": 8.780759208227202e-06, "loss": 0.7408, "step": 1023 }, { "epoch": 0.3051137844984916, "grad_norm": 0.42571327090263367, "learning_rate": 8.777354572160606e-06, "loss": 0.7869, "step": 1024 }, { "epoch": 0.30541174717866587, "grad_norm": 0.44300898909568787, "learning_rate": 8.773945851150934e-06, "loss": 0.8209, "step": 1025 }, { "epoch": 0.30570970985884016, "grad_norm": 0.4257313013076782, "learning_rate": 8.770533048884483e-06, "loss": 0.778, "step": 1026 }, { "epoch": 0.3060076725390145, "grad_norm": 0.4134843647480011, "learning_rate": 8.767116169051952e-06, "loss": 0.7673, "step": 1027 }, { "epoch": 0.3063056352191888, "grad_norm": 0.43889355659484863, "learning_rate": 8.763695215348462e-06, "loss": 0.7609, "step": 1028 }, { "epoch": 0.3066035978993631, "grad_norm": 0.4281556308269501, "learning_rate": 8.760270191473532e-06, "loss": 0.7589, "step": 1029 }, { "epoch": 0.30690156057953744, "grad_norm": 0.4477680027484894, "learning_rate": 8.756841101131081e-06, "loss": 0.8458, "step": 1030 }, { "epoch": 0.3071995232597117, "grad_norm": 0.4279250204563141, "learning_rate": 8.753407948029433e-06, "loss": 0.8087, "step": 1031 }, { "epoch": 0.307497485939886, "grad_norm": 0.42505520582199097, "learning_rate": 8.749970735881298e-06, "loss": 0.7525, "step": 1032 }, { "epoch": 0.30779544862006036, "grad_norm": 0.42256543040275574, "learning_rate": 8.746529468403781e-06, "loss": 0.7384, "step": 1033 }, { "epoch": 0.30809341130023465, "grad_norm": 0.4340267777442932, "learning_rate": 8.743084149318372e-06, "loss": 0.7983, "step": 1034 }, { "epoch": 0.30839137398040894, "grad_norm": 0.4179762303829193, "learning_rate": 8.739634782350938e-06, "loss": 0.7531, "step": 1035 }, { "epoch": 0.30868933666058324, "grad_norm": 0.4406876266002655, "learning_rate": 8.736181371231728e-06, "loss": 0.7502, "step": 1036 }, { "epoch": 0.3089872993407576, "grad_norm": 0.4257606565952301, "learning_rate": 8.732723919695364e-06, "loss": 0.7323, "step": 1037 }, { "epoch": 0.3092852620209319, "grad_norm": 0.42342373728752136, "learning_rate": 8.729262431480832e-06, "loss": 0.7413, "step": 1038 }, { "epoch": 0.30958322470110616, "grad_norm": 0.41246771812438965, "learning_rate": 8.725796910331494e-06, "loss": 0.7008, "step": 1039 }, { "epoch": 0.3098811873812805, "grad_norm": 0.4482112526893616, "learning_rate": 8.722327359995064e-06, "loss": 0.7964, "step": 1040 }, { "epoch": 0.3101791500614548, "grad_norm": 0.42225462198257446, "learning_rate": 8.718853784223618e-06, "loss": 0.7816, "step": 1041 }, { "epoch": 0.3104771127416291, "grad_norm": 0.4308784306049347, "learning_rate": 8.71537618677358e-06, "loss": 0.7556, "step": 1042 }, { "epoch": 0.31077507542180344, "grad_norm": 0.42901504039764404, "learning_rate": 8.71189457140573e-06, "loss": 0.7398, "step": 1043 }, { "epoch": 0.31107303810197773, "grad_norm": 0.42239463329315186, "learning_rate": 8.708408941885189e-06, "loss": 0.7544, "step": 1044 }, { "epoch": 0.311371000782152, "grad_norm": 0.4180073142051697, "learning_rate": 8.704919301981422e-06, "loss": 0.7368, "step": 1045 }, { "epoch": 0.31166896346232636, "grad_norm": 0.4252670109272003, "learning_rate": 8.701425655468226e-06, "loss": 0.7761, "step": 1046 }, { "epoch": 0.31196692614250066, "grad_norm": 0.4541017711162567, "learning_rate": 8.697928006123735e-06, "loss": 0.8097, "step": 1047 }, { "epoch": 0.31226488882267495, "grad_norm": 0.42537379264831543, "learning_rate": 8.69442635773041e-06, "loss": 0.7039, "step": 1048 }, { "epoch": 0.3125628515028493, "grad_norm": 0.41526198387145996, "learning_rate": 8.690920714075039e-06, "loss": 0.7425, "step": 1049 }, { "epoch": 0.3128608141830236, "grad_norm": 0.4391304552555084, "learning_rate": 8.687411078948727e-06, "loss": 0.7795, "step": 1050 }, { "epoch": 0.3131587768631979, "grad_norm": 0.45744815468788147, "learning_rate": 8.683897456146897e-06, "loss": 0.7992, "step": 1051 }, { "epoch": 0.3134567395433722, "grad_norm": 0.4174099564552307, "learning_rate": 8.680379849469287e-06, "loss": 0.8134, "step": 1052 }, { "epoch": 0.3137547022235465, "grad_norm": 0.4404802620410919, "learning_rate": 8.676858262719939e-06, "loss": 0.8169, "step": 1053 }, { "epoch": 0.3140526649037208, "grad_norm": 0.44465020298957825, "learning_rate": 8.673332699707202e-06, "loss": 0.7445, "step": 1054 }, { "epoch": 0.3143506275838951, "grad_norm": 0.39243802428245544, "learning_rate": 8.669803164243725e-06, "loss": 0.7439, "step": 1055 }, { "epoch": 0.31464859026406944, "grad_norm": 0.42216596007347107, "learning_rate": 8.66626966014645e-06, "loss": 0.7361, "step": 1056 }, { "epoch": 0.31494655294424373, "grad_norm": 0.44119539856910706, "learning_rate": 8.662732191236614e-06, "loss": 0.7504, "step": 1057 }, { "epoch": 0.315244515624418, "grad_norm": 0.42714157700538635, "learning_rate": 8.659190761339741e-06, "loss": 0.7716, "step": 1058 }, { "epoch": 0.31554247830459237, "grad_norm": 0.425434947013855, "learning_rate": 8.655645374285637e-06, "loss": 0.8043, "step": 1059 }, { "epoch": 0.31584044098476666, "grad_norm": 0.4378584921360016, "learning_rate": 8.652096033908391e-06, "loss": 0.7834, "step": 1060 }, { "epoch": 0.31613840366494095, "grad_norm": 0.40135619044303894, "learning_rate": 8.648542744046364e-06, "loss": 0.736, "step": 1061 }, { "epoch": 0.3164363663451153, "grad_norm": 0.43657106161117554, "learning_rate": 8.644985508542186e-06, "loss": 0.7844, "step": 1062 }, { "epoch": 0.3167343290252896, "grad_norm": 0.4240058660507202, "learning_rate": 8.64142433124276e-06, "loss": 0.752, "step": 1063 }, { "epoch": 0.3170322917054639, "grad_norm": 0.4469079077243805, "learning_rate": 8.637859215999246e-06, "loss": 0.8066, "step": 1064 }, { "epoch": 0.3173302543856382, "grad_norm": 0.4204435348510742, "learning_rate": 8.63429016666707e-06, "loss": 0.8185, "step": 1065 }, { "epoch": 0.3176282170658125, "grad_norm": 0.42589515447616577, "learning_rate": 8.630717187105902e-06, "loss": 0.7787, "step": 1066 }, { "epoch": 0.3179261797459868, "grad_norm": 0.43811801075935364, "learning_rate": 8.62714028117967e-06, "loss": 0.7667, "step": 1067 }, { "epoch": 0.31822414242616115, "grad_norm": 0.4212261140346527, "learning_rate": 8.623559452756547e-06, "loss": 0.7984, "step": 1068 }, { "epoch": 0.31852210510633544, "grad_norm": 0.4261661767959595, "learning_rate": 8.619974705708945e-06, "loss": 0.7834, "step": 1069 }, { "epoch": 0.31882006778650973, "grad_norm": 0.4385144114494324, "learning_rate": 8.616386043913516e-06, "loss": 0.7839, "step": 1070 }, { "epoch": 0.319118030466684, "grad_norm": 0.427738755941391, "learning_rate": 8.612793471251148e-06, "loss": 0.7846, "step": 1071 }, { "epoch": 0.31941599314685837, "grad_norm": 0.426027774810791, "learning_rate": 8.609196991606951e-06, "loss": 0.8066, "step": 1072 }, { "epoch": 0.31971395582703266, "grad_norm": 0.41970694065093994, "learning_rate": 8.605596608870268e-06, "loss": 0.7786, "step": 1073 }, { "epoch": 0.32001191850720695, "grad_norm": 0.4090143144130707, "learning_rate": 8.601992326934658e-06, "loss": 0.7421, "step": 1074 }, { "epoch": 0.3203098811873813, "grad_norm": 0.4413098692893982, "learning_rate": 8.5983841496979e-06, "loss": 0.8006, "step": 1075 }, { "epoch": 0.3206078438675556, "grad_norm": 0.44104504585266113, "learning_rate": 8.59477208106198e-06, "loss": 0.78, "step": 1076 }, { "epoch": 0.3209058065477299, "grad_norm": 0.43282023072242737, "learning_rate": 8.591156124933097e-06, "loss": 0.7596, "step": 1077 }, { "epoch": 0.3212037692279042, "grad_norm": 0.42781636118888855, "learning_rate": 8.587536285221656e-06, "loss": 0.7919, "step": 1078 }, { "epoch": 0.3215017319080785, "grad_norm": 0.41227588057518005, "learning_rate": 8.583912565842258e-06, "loss": 0.7646, "step": 1079 }, { "epoch": 0.3217996945882528, "grad_norm": 0.41265419125556946, "learning_rate": 8.580284970713697e-06, "loss": 0.7113, "step": 1080 }, { "epoch": 0.32209765726842715, "grad_norm": 0.43087223172187805, "learning_rate": 8.576653503758964e-06, "loss": 0.7746, "step": 1081 }, { "epoch": 0.32239561994860144, "grad_norm": 0.4424741268157959, "learning_rate": 8.573018168905237e-06, "loss": 0.7646, "step": 1082 }, { "epoch": 0.32269358262877573, "grad_norm": 0.41031649708747864, "learning_rate": 8.569378970083873e-06, "loss": 0.7532, "step": 1083 }, { "epoch": 0.3229915453089501, "grad_norm": 0.4124182164669037, "learning_rate": 8.565735911230407e-06, "loss": 0.769, "step": 1084 }, { "epoch": 0.32328950798912437, "grad_norm": 0.436513751745224, "learning_rate": 8.562088996284555e-06, "loss": 0.7941, "step": 1085 }, { "epoch": 0.32358747066929866, "grad_norm": 0.4268746078014374, "learning_rate": 8.558438229190195e-06, "loss": 0.793, "step": 1086 }, { "epoch": 0.32388543334947295, "grad_norm": 0.430209219455719, "learning_rate": 8.554783613895377e-06, "loss": 0.7172, "step": 1087 }, { "epoch": 0.3241833960296473, "grad_norm": 0.44001534581184387, "learning_rate": 8.551125154352309e-06, "loss": 0.7422, "step": 1088 }, { "epoch": 0.3244813587098216, "grad_norm": 0.41053760051727295, "learning_rate": 8.54746285451736e-06, "loss": 0.7834, "step": 1089 }, { "epoch": 0.3247793213899959, "grad_norm": 0.42692187428474426, "learning_rate": 8.543796718351043e-06, "loss": 0.737, "step": 1090 }, { "epoch": 0.3250772840701702, "grad_norm": 0.4233838617801666, "learning_rate": 8.540126749818033e-06, "loss": 0.7313, "step": 1091 }, { "epoch": 0.3253752467503445, "grad_norm": 0.4325284957885742, "learning_rate": 8.536452952887142e-06, "loss": 0.8187, "step": 1092 }, { "epoch": 0.3256732094305188, "grad_norm": 0.42090630531311035, "learning_rate": 8.532775331531317e-06, "loss": 0.7263, "step": 1093 }, { "epoch": 0.32597117211069315, "grad_norm": 0.43220120668411255, "learning_rate": 8.529093889727655e-06, "loss": 0.7538, "step": 1094 }, { "epoch": 0.32626913479086744, "grad_norm": 0.42502284049987793, "learning_rate": 8.52540863145737e-06, "loss": 0.7794, "step": 1095 }, { "epoch": 0.32656709747104173, "grad_norm": 0.4070689380168915, "learning_rate": 8.52171956070581e-06, "loss": 0.7249, "step": 1096 }, { "epoch": 0.3268650601512161, "grad_norm": 0.42295122146606445, "learning_rate": 8.518026681462448e-06, "loss": 0.7516, "step": 1097 }, { "epoch": 0.32716302283139037, "grad_norm": 0.418568879365921, "learning_rate": 8.514329997720871e-06, "loss": 0.7951, "step": 1098 }, { "epoch": 0.32746098551156466, "grad_norm": 0.47218871116638184, "learning_rate": 8.510629513478783e-06, "loss": 0.7837, "step": 1099 }, { "epoch": 0.327758948191739, "grad_norm": 0.42859897017478943, "learning_rate": 8.506925232737998e-06, "loss": 0.7544, "step": 1100 }, { "epoch": 0.3280569108719133, "grad_norm": 0.4329354763031006, "learning_rate": 8.50321715950443e-06, "loss": 0.805, "step": 1101 }, { "epoch": 0.3283548735520876, "grad_norm": 0.43533289432525635, "learning_rate": 8.499505297788106e-06, "loss": 0.776, "step": 1102 }, { "epoch": 0.32865283623226194, "grad_norm": 0.4444495737552643, "learning_rate": 8.49578965160314e-06, "loss": 0.7492, "step": 1103 }, { "epoch": 0.3289507989124362, "grad_norm": 0.4216909408569336, "learning_rate": 8.492070224967742e-06, "loss": 0.7342, "step": 1104 }, { "epoch": 0.3292487615926105, "grad_norm": 0.4235077202320099, "learning_rate": 8.48834702190421e-06, "loss": 0.7462, "step": 1105 }, { "epoch": 0.3295467242727848, "grad_norm": 0.43822556734085083, "learning_rate": 8.484620046438925e-06, "loss": 0.8223, "step": 1106 }, { "epoch": 0.32984468695295915, "grad_norm": 0.4066564738750458, "learning_rate": 8.480889302602351e-06, "loss": 0.7321, "step": 1107 }, { "epoch": 0.33014264963313344, "grad_norm": 0.4117765426635742, "learning_rate": 8.477154794429021e-06, "loss": 0.7348, "step": 1108 }, { "epoch": 0.33044061231330774, "grad_norm": 0.4176870882511139, "learning_rate": 8.47341652595755e-06, "loss": 0.8148, "step": 1109 }, { "epoch": 0.3307385749934821, "grad_norm": 0.41546934843063354, "learning_rate": 8.469674501230603e-06, "loss": 0.7704, "step": 1110 }, { "epoch": 0.3310365376736564, "grad_norm": 0.44431930780410767, "learning_rate": 8.465928724294923e-06, "loss": 0.8094, "step": 1111 }, { "epoch": 0.33133450035383066, "grad_norm": 0.42732754349708557, "learning_rate": 8.462179199201301e-06, "loss": 0.7129, "step": 1112 }, { "epoch": 0.331632463034005, "grad_norm": 0.4333447515964508, "learning_rate": 8.458425930004585e-06, "loss": 0.8335, "step": 1113 }, { "epoch": 0.3319304257141793, "grad_norm": 0.4211162030696869, "learning_rate": 8.454668920763672e-06, "loss": 0.7637, "step": 1114 }, { "epoch": 0.3322283883943536, "grad_norm": 0.44972220063209534, "learning_rate": 8.450908175541503e-06, "loss": 0.8342, "step": 1115 }, { "epoch": 0.33252635107452794, "grad_norm": 0.4448044002056122, "learning_rate": 8.44714369840506e-06, "loss": 0.8266, "step": 1116 }, { "epoch": 0.33282431375470223, "grad_norm": 0.4465561509132385, "learning_rate": 8.443375493425358e-06, "loss": 0.7993, "step": 1117 }, { "epoch": 0.3331222764348765, "grad_norm": 0.4390435516834259, "learning_rate": 8.439603564677448e-06, "loss": 0.7782, "step": 1118 }, { "epoch": 0.33342023911505086, "grad_norm": 0.42651331424713135, "learning_rate": 8.435827916240403e-06, "loss": 0.7605, "step": 1119 }, { "epoch": 0.33371820179522516, "grad_norm": 0.4195694327354431, "learning_rate": 8.43204855219732e-06, "loss": 0.7726, "step": 1120 }, { "epoch": 0.33401616447539945, "grad_norm": 0.42806294560432434, "learning_rate": 8.42826547663532e-06, "loss": 0.735, "step": 1121 }, { "epoch": 0.33431412715557374, "grad_norm": 0.42807263135910034, "learning_rate": 8.424478693645528e-06, "loss": 0.7883, "step": 1122 }, { "epoch": 0.3346120898357481, "grad_norm": 0.43336206674575806, "learning_rate": 8.420688207323085e-06, "loss": 0.806, "step": 1123 }, { "epoch": 0.3349100525159224, "grad_norm": 0.4646878242492676, "learning_rate": 8.416894021767137e-06, "loss": 0.7781, "step": 1124 }, { "epoch": 0.33520801519609666, "grad_norm": 0.4356582462787628, "learning_rate": 8.413096141080827e-06, "loss": 0.7731, "step": 1125 }, { "epoch": 0.335505977876271, "grad_norm": 0.4233420491218567, "learning_rate": 8.409294569371293e-06, "loss": 0.7666, "step": 1126 }, { "epoch": 0.3358039405564453, "grad_norm": 0.4309924244880676, "learning_rate": 8.405489310749672e-06, "loss": 0.7458, "step": 1127 }, { "epoch": 0.3361019032366196, "grad_norm": 0.4494045078754425, "learning_rate": 8.401680369331083e-06, "loss": 0.7762, "step": 1128 }, { "epoch": 0.33639986591679394, "grad_norm": 0.4436827600002289, "learning_rate": 8.397867749234623e-06, "loss": 0.7755, "step": 1129 }, { "epoch": 0.33669782859696823, "grad_norm": 0.4168068468570709, "learning_rate": 8.394051454583376e-06, "loss": 0.7506, "step": 1130 }, { "epoch": 0.3369957912771425, "grad_norm": 0.42762142419815063, "learning_rate": 8.390231489504397e-06, "loss": 0.7819, "step": 1131 }, { "epoch": 0.33729375395731687, "grad_norm": 0.4372676908969879, "learning_rate": 8.386407858128707e-06, "loss": 0.8063, "step": 1132 }, { "epoch": 0.33759171663749116, "grad_norm": 0.45513224601745605, "learning_rate": 8.382580564591294e-06, "loss": 0.8075, "step": 1133 }, { "epoch": 0.33788967931766545, "grad_norm": 0.4320538640022278, "learning_rate": 8.378749613031108e-06, "loss": 0.7857, "step": 1134 }, { "epoch": 0.3381876419978398, "grad_norm": 0.41906487941741943, "learning_rate": 8.374915007591053e-06, "loss": 0.7429, "step": 1135 }, { "epoch": 0.3384856046780141, "grad_norm": 0.41150668263435364, "learning_rate": 8.371076752417986e-06, "loss": 0.713, "step": 1136 }, { "epoch": 0.3387835673581884, "grad_norm": 0.45930665731430054, "learning_rate": 8.367234851662707e-06, "loss": 0.7644, "step": 1137 }, { "epoch": 0.3390815300383627, "grad_norm": 0.43832212686538696, "learning_rate": 8.363389309479964e-06, "loss": 0.7738, "step": 1138 }, { "epoch": 0.339379492718537, "grad_norm": 0.42082133889198303, "learning_rate": 8.359540130028439e-06, "loss": 0.7319, "step": 1139 }, { "epoch": 0.3396774553987113, "grad_norm": 0.4095704257488251, "learning_rate": 8.355687317470749e-06, "loss": 0.7524, "step": 1140 }, { "epoch": 0.3399754180788856, "grad_norm": 0.41502609848976135, "learning_rate": 8.351830875973436e-06, "loss": 0.7406, "step": 1141 }, { "epoch": 0.34027338075905994, "grad_norm": 0.43125033378601074, "learning_rate": 8.347970809706977e-06, "loss": 0.7803, "step": 1142 }, { "epoch": 0.34057134343923423, "grad_norm": 0.45613449811935425, "learning_rate": 8.344107122845757e-06, "loss": 0.7607, "step": 1143 }, { "epoch": 0.3408693061194085, "grad_norm": 0.4124937951564789, "learning_rate": 8.340239819568082e-06, "loss": 0.7234, "step": 1144 }, { "epoch": 0.34116726879958287, "grad_norm": 0.426287442445755, "learning_rate": 8.336368904056169e-06, "loss": 0.756, "step": 1145 }, { "epoch": 0.34146523147975716, "grad_norm": 0.44698894023895264, "learning_rate": 8.332494380496142e-06, "loss": 0.7876, "step": 1146 }, { "epoch": 0.34176319415993145, "grad_norm": 0.41564255952835083, "learning_rate": 8.32861625307802e-06, "loss": 0.7673, "step": 1147 }, { "epoch": 0.3420611568401058, "grad_norm": 0.44098252058029175, "learning_rate": 8.324734525995732e-06, "loss": 0.7602, "step": 1148 }, { "epoch": 0.3423591195202801, "grad_norm": 0.41699904203414917, "learning_rate": 8.32084920344709e-06, "loss": 0.7877, "step": 1149 }, { "epoch": 0.3426570822004544, "grad_norm": 0.41518479585647583, "learning_rate": 8.316960289633795e-06, "loss": 0.7653, "step": 1150 }, { "epoch": 0.3429550448806287, "grad_norm": 0.3980076014995575, "learning_rate": 8.313067788761436e-06, "loss": 0.7658, "step": 1151 }, { "epoch": 0.343253007560803, "grad_norm": 0.4039032757282257, "learning_rate": 8.309171705039474e-06, "loss": 0.7153, "step": 1152 }, { "epoch": 0.3435509702409773, "grad_norm": 0.40779900550842285, "learning_rate": 8.305272042681257e-06, "loss": 0.7334, "step": 1153 }, { "epoch": 0.34384893292115165, "grad_norm": 0.4208314120769501, "learning_rate": 8.301368805903988e-06, "loss": 0.75, "step": 1154 }, { "epoch": 0.34414689560132594, "grad_norm": 0.4231463372707367, "learning_rate": 8.297461998928746e-06, "loss": 0.7798, "step": 1155 }, { "epoch": 0.34444485828150023, "grad_norm": 0.42949163913726807, "learning_rate": 8.293551625980468e-06, "loss": 0.7663, "step": 1156 }, { "epoch": 0.3447428209616745, "grad_norm": 0.4180258810520172, "learning_rate": 8.289637691287948e-06, "loss": 0.7535, "step": 1157 }, { "epoch": 0.34504078364184887, "grad_norm": 0.4052999019622803, "learning_rate": 8.28572019908383e-06, "loss": 0.7316, "step": 1158 }, { "epoch": 0.34533874632202316, "grad_norm": 0.4323941469192505, "learning_rate": 8.281799153604603e-06, "loss": 0.7624, "step": 1159 }, { "epoch": 0.34563670900219745, "grad_norm": 0.4146830439567566, "learning_rate": 8.277874559090605e-06, "loss": 0.7593, "step": 1160 }, { "epoch": 0.3459346716823718, "grad_norm": 0.420483261346817, "learning_rate": 8.273946419786008e-06, "loss": 0.7781, "step": 1161 }, { "epoch": 0.3462326343625461, "grad_norm": 0.41429367661476135, "learning_rate": 8.27001473993882e-06, "loss": 0.7429, "step": 1162 }, { "epoch": 0.3465305970427204, "grad_norm": 0.42921391129493713, "learning_rate": 8.266079523800873e-06, "loss": 0.7878, "step": 1163 }, { "epoch": 0.3468285597228947, "grad_norm": 0.4069218337535858, "learning_rate": 8.262140775627827e-06, "loss": 0.7468, "step": 1164 }, { "epoch": 0.347126522403069, "grad_norm": 0.42255890369415283, "learning_rate": 8.258198499679162e-06, "loss": 0.7485, "step": 1165 }, { "epoch": 0.3474244850832433, "grad_norm": 0.4330653250217438, "learning_rate": 8.25425270021817e-06, "loss": 0.7907, "step": 1166 }, { "epoch": 0.34772244776341765, "grad_norm": 0.4273219704627991, "learning_rate": 8.250303381511957e-06, "loss": 0.7894, "step": 1167 }, { "epoch": 0.34802041044359194, "grad_norm": 0.41967499256134033, "learning_rate": 8.246350547831433e-06, "loss": 0.7366, "step": 1168 }, { "epoch": 0.34831837312376623, "grad_norm": 0.4150393009185791, "learning_rate": 8.24239420345131e-06, "loss": 0.7823, "step": 1169 }, { "epoch": 0.3486163358039406, "grad_norm": 0.43301746249198914, "learning_rate": 8.238434352650094e-06, "loss": 0.7735, "step": 1170 }, { "epoch": 0.34891429848411487, "grad_norm": 0.4161169230937958, "learning_rate": 8.234470999710086e-06, "loss": 0.7563, "step": 1171 }, { "epoch": 0.34921226116428916, "grad_norm": 0.4297851324081421, "learning_rate": 8.230504148917374e-06, "loss": 0.7164, "step": 1172 }, { "epoch": 0.3495102238444635, "grad_norm": 0.42612648010253906, "learning_rate": 8.226533804561828e-06, "loss": 0.786, "step": 1173 }, { "epoch": 0.3498081865246378, "grad_norm": 0.4089588522911072, "learning_rate": 8.222559970937092e-06, "loss": 0.7427, "step": 1174 }, { "epoch": 0.3501061492048121, "grad_norm": 0.42445510625839233, "learning_rate": 8.218582652340592e-06, "loss": 0.7663, "step": 1175 }, { "epoch": 0.3504041118849864, "grad_norm": 0.41788461804389954, "learning_rate": 8.214601853073516e-06, "loss": 0.7726, "step": 1176 }, { "epoch": 0.3507020745651607, "grad_norm": 0.4169197380542755, "learning_rate": 8.21061757744082e-06, "loss": 0.774, "step": 1177 }, { "epoch": 0.351000037245335, "grad_norm": 0.40096545219421387, "learning_rate": 8.206629829751215e-06, "loss": 0.7321, "step": 1178 }, { "epoch": 0.3512979999255093, "grad_norm": 0.41959595680236816, "learning_rate": 8.202638614317171e-06, "loss": 0.7679, "step": 1179 }, { "epoch": 0.35159596260568365, "grad_norm": 0.41927823424339294, "learning_rate": 8.198643935454907e-06, "loss": 0.7578, "step": 1180 }, { "epoch": 0.35189392528585794, "grad_norm": 0.45546793937683105, "learning_rate": 8.194645797484385e-06, "loss": 0.7964, "step": 1181 }, { "epoch": 0.35219188796603224, "grad_norm": 0.4366207420825958, "learning_rate": 8.190644204729313e-06, "loss": 0.7855, "step": 1182 }, { "epoch": 0.3524898506462066, "grad_norm": 0.41425538063049316, "learning_rate": 8.186639161517127e-06, "loss": 0.7442, "step": 1183 }, { "epoch": 0.3527878133263809, "grad_norm": 0.4379921853542328, "learning_rate": 8.182630672179003e-06, "loss": 0.8418, "step": 1184 }, { "epoch": 0.35308577600655516, "grad_norm": 0.4329274296760559, "learning_rate": 8.178618741049841e-06, "loss": 0.7885, "step": 1185 }, { "epoch": 0.3533837386867295, "grad_norm": 0.4258681833744049, "learning_rate": 8.174603372468259e-06, "loss": 0.7588, "step": 1186 }, { "epoch": 0.3536817013669038, "grad_norm": 0.41804084181785583, "learning_rate": 8.170584570776598e-06, "loss": 0.8098, "step": 1187 }, { "epoch": 0.3539796640470781, "grad_norm": 0.4102548062801361, "learning_rate": 8.166562340320908e-06, "loss": 0.7863, "step": 1188 }, { "epoch": 0.35427762672725244, "grad_norm": 0.4187453091144562, "learning_rate": 8.162536685450945e-06, "loss": 0.7904, "step": 1189 }, { "epoch": 0.35457558940742673, "grad_norm": 0.43551990389823914, "learning_rate": 8.158507610520177e-06, "loss": 0.8015, "step": 1190 }, { "epoch": 0.354873552087601, "grad_norm": 0.41999849677085876, "learning_rate": 8.154475119885763e-06, "loss": 0.7717, "step": 1191 }, { "epoch": 0.3551715147677753, "grad_norm": 0.43046441674232483, "learning_rate": 8.150439217908557e-06, "loss": 0.7526, "step": 1192 }, { "epoch": 0.35546947744794966, "grad_norm": 0.4299757778644562, "learning_rate": 8.146399908953102e-06, "loss": 0.7774, "step": 1193 }, { "epoch": 0.35576744012812395, "grad_norm": 0.43132033944129944, "learning_rate": 8.142357197387627e-06, "loss": 0.7387, "step": 1194 }, { "epoch": 0.35606540280829824, "grad_norm": 0.43697425723075867, "learning_rate": 8.138311087584042e-06, "loss": 0.783, "step": 1195 }, { "epoch": 0.3563633654884726, "grad_norm": 0.42331957817077637, "learning_rate": 8.134261583917927e-06, "loss": 0.7536, "step": 1196 }, { "epoch": 0.3566613281686469, "grad_norm": 0.4190444350242615, "learning_rate": 8.130208690768536e-06, "loss": 0.7617, "step": 1197 }, { "epoch": 0.35695929084882116, "grad_norm": 0.4350127875804901, "learning_rate": 8.126152412518788e-06, "loss": 0.7954, "step": 1198 }, { "epoch": 0.3572572535289955, "grad_norm": 0.4178895652294159, "learning_rate": 8.122092753555265e-06, "loss": 0.7746, "step": 1199 }, { "epoch": 0.3575552162091698, "grad_norm": 0.4325284957885742, "learning_rate": 8.118029718268197e-06, "loss": 0.7596, "step": 1200 }, { "epoch": 0.3578531788893441, "grad_norm": 0.4090530574321747, "learning_rate": 8.113963311051474e-06, "loss": 0.754, "step": 1201 }, { "epoch": 0.35815114156951844, "grad_norm": 0.39271825551986694, "learning_rate": 8.10989353630263e-06, "loss": 0.739, "step": 1202 }, { "epoch": 0.35844910424969273, "grad_norm": 0.4179271459579468, "learning_rate": 8.105820398422837e-06, "loss": 0.767, "step": 1203 }, { "epoch": 0.358747066929867, "grad_norm": 0.43544018268585205, "learning_rate": 8.10174390181691e-06, "loss": 0.768, "step": 1204 }, { "epoch": 0.35904502961004137, "grad_norm": 0.41055986285209656, "learning_rate": 8.09766405089329e-06, "loss": 0.7455, "step": 1205 }, { "epoch": 0.35934299229021566, "grad_norm": 0.43460318446159363, "learning_rate": 8.093580850064053e-06, "loss": 0.7631, "step": 1206 }, { "epoch": 0.35964095497038995, "grad_norm": 0.4326554238796234, "learning_rate": 8.08949430374489e-06, "loss": 0.7593, "step": 1207 }, { "epoch": 0.3599389176505643, "grad_norm": 0.4217560291290283, "learning_rate": 8.085404416355111e-06, "loss": 0.7579, "step": 1208 }, { "epoch": 0.3602368803307386, "grad_norm": 0.44069936871528625, "learning_rate": 8.081311192317645e-06, "loss": 0.7789, "step": 1209 }, { "epoch": 0.3605348430109129, "grad_norm": 0.4214688539505005, "learning_rate": 8.077214636059025e-06, "loss": 0.7705, "step": 1210 }, { "epoch": 0.36083280569108717, "grad_norm": 0.41780489683151245, "learning_rate": 8.073114752009388e-06, "loss": 0.8051, "step": 1211 }, { "epoch": 0.3611307683712615, "grad_norm": 0.4128536581993103, "learning_rate": 8.06901154460247e-06, "loss": 0.7839, "step": 1212 }, { "epoch": 0.3614287310514358, "grad_norm": 0.4101253151893616, "learning_rate": 8.0649050182756e-06, "loss": 0.7666, "step": 1213 }, { "epoch": 0.3617266937316101, "grad_norm": 0.4235800504684448, "learning_rate": 8.060795177469698e-06, "loss": 0.7479, "step": 1214 }, { "epoch": 0.36202465641178444, "grad_norm": 0.4299834966659546, "learning_rate": 8.056682026629269e-06, "loss": 0.7552, "step": 1215 }, { "epoch": 0.36232261909195873, "grad_norm": 0.43004798889160156, "learning_rate": 8.052565570202394e-06, "loss": 0.7419, "step": 1216 }, { "epoch": 0.362620581772133, "grad_norm": 0.4240386486053467, "learning_rate": 8.04844581264073e-06, "loss": 0.7907, "step": 1217 }, { "epoch": 0.36291854445230737, "grad_norm": 0.4139268398284912, "learning_rate": 8.044322758399508e-06, "loss": 0.7389, "step": 1218 }, { "epoch": 0.36321650713248166, "grad_norm": 0.45361292362213135, "learning_rate": 8.04019641193752e-06, "loss": 0.8171, "step": 1219 }, { "epoch": 0.36351446981265595, "grad_norm": 0.42017659544944763, "learning_rate": 8.036066777717117e-06, "loss": 0.7835, "step": 1220 }, { "epoch": 0.3638124324928303, "grad_norm": 0.43891727924346924, "learning_rate": 8.031933860204208e-06, "loss": 0.8214, "step": 1221 }, { "epoch": 0.3641103951730046, "grad_norm": 0.41762715578079224, "learning_rate": 8.027797663868255e-06, "loss": 0.7513, "step": 1222 }, { "epoch": 0.3644083578531789, "grad_norm": 0.4542342722415924, "learning_rate": 8.023658193182261e-06, "loss": 0.767, "step": 1223 }, { "epoch": 0.3647063205333532, "grad_norm": 0.4249032139778137, "learning_rate": 8.019515452622775e-06, "loss": 0.7792, "step": 1224 }, { "epoch": 0.3650042832135275, "grad_norm": 0.4247143268585205, "learning_rate": 8.015369446669877e-06, "loss": 0.7667, "step": 1225 }, { "epoch": 0.3653022458937018, "grad_norm": 0.4281252920627594, "learning_rate": 8.011220179807178e-06, "loss": 0.7489, "step": 1226 }, { "epoch": 0.3656002085738761, "grad_norm": 0.411519855260849, "learning_rate": 8.007067656521823e-06, "loss": 0.7618, "step": 1227 }, { "epoch": 0.36589817125405044, "grad_norm": 0.4394058585166931, "learning_rate": 8.00291188130447e-06, "loss": 0.7529, "step": 1228 }, { "epoch": 0.36619613393422473, "grad_norm": 0.4067392945289612, "learning_rate": 7.9987528586493e-06, "loss": 0.7288, "step": 1229 }, { "epoch": 0.366494096614399, "grad_norm": 0.4320877194404602, "learning_rate": 7.994590593054001e-06, "loss": 0.7867, "step": 1230 }, { "epoch": 0.36679205929457337, "grad_norm": 0.42730119824409485, "learning_rate": 7.990425089019774e-06, "loss": 0.755, "step": 1231 }, { "epoch": 0.36709002197474766, "grad_norm": 0.4184194505214691, "learning_rate": 7.98625635105131e-06, "loss": 0.7375, "step": 1232 }, { "epoch": 0.36738798465492195, "grad_norm": 0.4290657341480255, "learning_rate": 7.982084383656818e-06, "loss": 0.773, "step": 1233 }, { "epoch": 0.3676859473350963, "grad_norm": 0.43041571974754333, "learning_rate": 7.977909191347977e-06, "loss": 0.8132, "step": 1234 }, { "epoch": 0.3679839100152706, "grad_norm": 0.40691784024238586, "learning_rate": 7.973730778639968e-06, "loss": 0.7787, "step": 1235 }, { "epoch": 0.3682818726954449, "grad_norm": 0.4414646625518799, "learning_rate": 7.969549150051447e-06, "loss": 0.7625, "step": 1236 }, { "epoch": 0.3685798353756192, "grad_norm": 0.4310557544231415, "learning_rate": 7.965364310104556e-06, "loss": 0.781, "step": 1237 }, { "epoch": 0.3688777980557935, "grad_norm": 0.42236313223838806, "learning_rate": 7.961176263324902e-06, "loss": 0.7593, "step": 1238 }, { "epoch": 0.3691757607359678, "grad_norm": 0.4452737867832184, "learning_rate": 7.95698501424156e-06, "loss": 0.7807, "step": 1239 }, { "epoch": 0.36947372341614215, "grad_norm": 0.4227520525455475, "learning_rate": 7.952790567387077e-06, "loss": 0.7445, "step": 1240 }, { "epoch": 0.36977168609631644, "grad_norm": 0.43495917320251465, "learning_rate": 7.948592927297446e-06, "loss": 0.7486, "step": 1241 }, { "epoch": 0.37006964877649073, "grad_norm": 0.4254031777381897, "learning_rate": 7.944392098512123e-06, "loss": 0.7607, "step": 1242 }, { "epoch": 0.3703676114566651, "grad_norm": 0.4352802634239197, "learning_rate": 7.940188085574007e-06, "loss": 0.7986, "step": 1243 }, { "epoch": 0.37066557413683937, "grad_norm": 0.43163731694221497, "learning_rate": 7.935980893029442e-06, "loss": 0.7769, "step": 1244 }, { "epoch": 0.37096353681701366, "grad_norm": 0.4114803969860077, "learning_rate": 7.931770525428212e-06, "loss": 0.7559, "step": 1245 }, { "epoch": 0.37126149949718795, "grad_norm": 0.4310123920440674, "learning_rate": 7.927556987323534e-06, "loss": 0.7836, "step": 1246 }, { "epoch": 0.3715594621773623, "grad_norm": 0.4124079644680023, "learning_rate": 7.92334028327205e-06, "loss": 0.7224, "step": 1247 }, { "epoch": 0.3718574248575366, "grad_norm": 0.4300309419631958, "learning_rate": 7.91912041783383e-06, "loss": 0.7505, "step": 1248 }, { "epoch": 0.3721553875377109, "grad_norm": 0.4144781827926636, "learning_rate": 7.914897395572362e-06, "loss": 0.7697, "step": 1249 }, { "epoch": 0.3724533502178852, "grad_norm": 0.4345705509185791, "learning_rate": 7.910671221054545e-06, "loss": 0.7951, "step": 1250 }, { "epoch": 0.3727513128980595, "grad_norm": 0.421006977558136, "learning_rate": 7.906441898850693e-06, "loss": 0.7166, "step": 1251 }, { "epoch": 0.3730492755782338, "grad_norm": 0.4450724422931671, "learning_rate": 7.902209433534515e-06, "loss": 0.7898, "step": 1252 }, { "epoch": 0.37334723825840815, "grad_norm": 0.41672611236572266, "learning_rate": 7.89797382968313e-06, "loss": 0.7618, "step": 1253 }, { "epoch": 0.37364520093858244, "grad_norm": 0.42059627175331116, "learning_rate": 7.893735091877041e-06, "loss": 0.7821, "step": 1254 }, { "epoch": 0.37394316361875674, "grad_norm": 0.4378127455711365, "learning_rate": 7.889493224700147e-06, "loss": 0.7896, "step": 1255 }, { "epoch": 0.3742411262989311, "grad_norm": 0.4272556006908417, "learning_rate": 7.885248232739729e-06, "loss": 0.7865, "step": 1256 }, { "epoch": 0.3745390889791054, "grad_norm": 0.443332701921463, "learning_rate": 7.881000120586446e-06, "loss": 0.7354, "step": 1257 }, { "epoch": 0.37483705165927966, "grad_norm": 0.42500534653663635, "learning_rate": 7.876748892834331e-06, "loss": 0.6991, "step": 1258 }, { "epoch": 0.375135014339454, "grad_norm": 0.42817673087120056, "learning_rate": 7.87249455408079e-06, "loss": 0.7993, "step": 1259 }, { "epoch": 0.3754329770196283, "grad_norm": 0.4016212224960327, "learning_rate": 7.86823710892659e-06, "loss": 0.7421, "step": 1260 }, { "epoch": 0.3757309396998026, "grad_norm": 0.41569772362709045, "learning_rate": 7.86397656197586e-06, "loss": 0.7628, "step": 1261 }, { "epoch": 0.3760289023799769, "grad_norm": 0.4199194014072418, "learning_rate": 7.859712917836075e-06, "loss": 0.7591, "step": 1262 }, { "epoch": 0.3763268650601512, "grad_norm": 0.413968026638031, "learning_rate": 7.855446181118074e-06, "loss": 0.7582, "step": 1263 }, { "epoch": 0.3766248277403255, "grad_norm": 0.4255465865135193, "learning_rate": 7.851176356436028e-06, "loss": 0.8063, "step": 1264 }, { "epoch": 0.3769227904204998, "grad_norm": 0.43615594506263733, "learning_rate": 7.846903448407454e-06, "loss": 0.7209, "step": 1265 }, { "epoch": 0.37722075310067416, "grad_norm": 0.4406788945198059, "learning_rate": 7.842627461653198e-06, "loss": 0.7863, "step": 1266 }, { "epoch": 0.37751871578084845, "grad_norm": 0.4182646870613098, "learning_rate": 7.838348400797443e-06, "loss": 0.7392, "step": 1267 }, { "epoch": 0.37781667846102274, "grad_norm": 0.4302336871623993, "learning_rate": 7.83406627046769e-06, "loss": 0.7663, "step": 1268 }, { "epoch": 0.3781146411411971, "grad_norm": 0.43965375423431396, "learning_rate": 7.829781075294762e-06, "loss": 0.801, "step": 1269 }, { "epoch": 0.3784126038213714, "grad_norm": 0.4432234466075897, "learning_rate": 7.825492819912792e-06, "loss": 0.7936, "step": 1270 }, { "epoch": 0.37871056650154566, "grad_norm": 0.4336239993572235, "learning_rate": 7.821201508959233e-06, "loss": 0.7912, "step": 1271 }, { "epoch": 0.37900852918172, "grad_norm": 0.43910470604896545, "learning_rate": 7.816907147074832e-06, "loss": 0.762, "step": 1272 }, { "epoch": 0.3793064918618943, "grad_norm": 0.41593313217163086, "learning_rate": 7.81260973890364e-06, "loss": 0.7793, "step": 1273 }, { "epoch": 0.3796044545420686, "grad_norm": 0.4045245051383972, "learning_rate": 7.808309289093e-06, "loss": 0.7362, "step": 1274 }, { "epoch": 0.37990241722224294, "grad_norm": 0.43610960245132446, "learning_rate": 7.804005802293547e-06, "loss": 0.7572, "step": 1275 }, { "epoch": 0.38020037990241723, "grad_norm": 0.4366176426410675, "learning_rate": 7.799699283159199e-06, "loss": 0.7504, "step": 1276 }, { "epoch": 0.3804983425825915, "grad_norm": 0.4121038019657135, "learning_rate": 7.795389736347152e-06, "loss": 0.7285, "step": 1277 }, { "epoch": 0.38079630526276587, "grad_norm": 0.42790961265563965, "learning_rate": 7.791077166517881e-06, "loss": 0.7538, "step": 1278 }, { "epoch": 0.38109426794294016, "grad_norm": 0.4392932057380676, "learning_rate": 7.786761578335123e-06, "loss": 0.7744, "step": 1279 }, { "epoch": 0.38139223062311445, "grad_norm": 0.41220176219940186, "learning_rate": 7.782442976465885e-06, "loss": 0.7064, "step": 1280 }, { "epoch": 0.38169019330328874, "grad_norm": 0.4391211271286011, "learning_rate": 7.778121365580428e-06, "loss": 0.7975, "step": 1281 }, { "epoch": 0.3819881559834631, "grad_norm": 0.4090668559074402, "learning_rate": 7.773796750352274e-06, "loss": 0.7432, "step": 1282 }, { "epoch": 0.3822861186636374, "grad_norm": 0.4221076965332031, "learning_rate": 7.769469135458187e-06, "loss": 0.7434, "step": 1283 }, { "epoch": 0.38258408134381167, "grad_norm": 0.4447765052318573, "learning_rate": 7.765138525578179e-06, "loss": 0.7629, "step": 1284 }, { "epoch": 0.382882044023986, "grad_norm": 0.4213871657848358, "learning_rate": 7.760804925395502e-06, "loss": 0.7572, "step": 1285 }, { "epoch": 0.3831800067041603, "grad_norm": 0.4042508602142334, "learning_rate": 7.756468339596634e-06, "loss": 0.771, "step": 1286 }, { "epoch": 0.3834779693843346, "grad_norm": 0.413955956697464, "learning_rate": 7.752128772871292e-06, "loss": 0.7879, "step": 1287 }, { "epoch": 0.38377593206450894, "grad_norm": 0.41381534934043884, "learning_rate": 7.74778622991241e-06, "loss": 0.81, "step": 1288 }, { "epoch": 0.38407389474468323, "grad_norm": 0.44178399443626404, "learning_rate": 7.743440715416144e-06, "loss": 0.7526, "step": 1289 }, { "epoch": 0.3843718574248575, "grad_norm": 0.4154306948184967, "learning_rate": 7.73909223408186e-06, "loss": 0.7251, "step": 1290 }, { "epoch": 0.38466982010503187, "grad_norm": 0.440555602312088, "learning_rate": 7.734740790612137e-06, "loss": 0.7891, "step": 1291 }, { "epoch": 0.38496778278520616, "grad_norm": 0.4190932810306549, "learning_rate": 7.730386389712749e-06, "loss": 0.751, "step": 1292 }, { "epoch": 0.38526574546538045, "grad_norm": 0.4299873411655426, "learning_rate": 7.726029036092682e-06, "loss": 0.7627, "step": 1293 }, { "epoch": 0.3855637081455548, "grad_norm": 0.42644360661506653, "learning_rate": 7.721668734464103e-06, "loss": 0.775, "step": 1294 }, { "epoch": 0.3858616708257291, "grad_norm": 0.42981818318367004, "learning_rate": 7.71730548954237e-06, "loss": 0.736, "step": 1295 }, { "epoch": 0.3861596335059034, "grad_norm": 0.435674250125885, "learning_rate": 7.71293930604603e-06, "loss": 0.7854, "step": 1296 }, { "epoch": 0.38645759618607767, "grad_norm": 0.4008498191833496, "learning_rate": 7.708570188696798e-06, "loss": 0.7294, "step": 1297 }, { "epoch": 0.386755558866252, "grad_norm": 0.4344206154346466, "learning_rate": 7.70419814221957e-06, "loss": 0.7457, "step": 1298 }, { "epoch": 0.3870535215464263, "grad_norm": 0.4119266867637634, "learning_rate": 7.699823171342404e-06, "loss": 0.7669, "step": 1299 }, { "epoch": 0.3873514842266006, "grad_norm": 0.42819297313690186, "learning_rate": 7.695445280796527e-06, "loss": 0.7921, "step": 1300 }, { "epoch": 0.38764944690677494, "grad_norm": 0.4224448502063751, "learning_rate": 7.691064475316314e-06, "loss": 0.7711, "step": 1301 }, { "epoch": 0.38794740958694923, "grad_norm": 0.43162328004837036, "learning_rate": 7.686680759639304e-06, "loss": 0.7536, "step": 1302 }, { "epoch": 0.3882453722671235, "grad_norm": 0.4317812919616699, "learning_rate": 7.682294138506171e-06, "loss": 0.7053, "step": 1303 }, { "epoch": 0.38854333494729787, "grad_norm": 0.42532142996788025, "learning_rate": 7.677904616660742e-06, "loss": 0.7753, "step": 1304 }, { "epoch": 0.38884129762747216, "grad_norm": 0.40439435839653015, "learning_rate": 7.673512198849973e-06, "loss": 0.7299, "step": 1305 }, { "epoch": 0.38913926030764645, "grad_norm": 0.4271869659423828, "learning_rate": 7.669116889823955e-06, "loss": 0.7583, "step": 1306 }, { "epoch": 0.3894372229878208, "grad_norm": 0.4402204751968384, "learning_rate": 7.664718694335904e-06, "loss": 0.8032, "step": 1307 }, { "epoch": 0.3897351856679951, "grad_norm": 0.4161050617694855, "learning_rate": 7.660317617142163e-06, "loss": 0.7019, "step": 1308 }, { "epoch": 0.3900331483481694, "grad_norm": 0.4266067445278168, "learning_rate": 7.655913663002181e-06, "loss": 0.7739, "step": 1309 }, { "epoch": 0.3903311110283437, "grad_norm": 0.4075363874435425, "learning_rate": 7.651506836678531e-06, "loss": 0.736, "step": 1310 }, { "epoch": 0.390629073708518, "grad_norm": 0.4212097227573395, "learning_rate": 7.647097142936881e-06, "loss": 0.7503, "step": 1311 }, { "epoch": 0.3909270363886923, "grad_norm": 0.41902589797973633, "learning_rate": 7.642684586546008e-06, "loss": 0.7603, "step": 1312 }, { "epoch": 0.3912249990688666, "grad_norm": 0.4070022404193878, "learning_rate": 7.638269172277777e-06, "loss": 0.7606, "step": 1313 }, { "epoch": 0.39152296174904094, "grad_norm": 0.4232437014579773, "learning_rate": 7.633850904907149e-06, "loss": 0.7716, "step": 1314 }, { "epoch": 0.39182092442921523, "grad_norm": 0.44182655215263367, "learning_rate": 7.62942978921217e-06, "loss": 0.8091, "step": 1315 }, { "epoch": 0.3921188871093895, "grad_norm": 0.4218367040157318, "learning_rate": 7.625005829973966e-06, "loss": 0.7416, "step": 1316 }, { "epoch": 0.39241684978956387, "grad_norm": 0.41140156984329224, "learning_rate": 7.6205790319767385e-06, "loss": 0.7577, "step": 1317 }, { "epoch": 0.39271481246973816, "grad_norm": 0.42180532217025757, "learning_rate": 7.616149400007753e-06, "loss": 0.7348, "step": 1318 }, { "epoch": 0.39301277514991245, "grad_norm": 0.3995833396911621, "learning_rate": 7.611716938857349e-06, "loss": 0.748, "step": 1319 }, { "epoch": 0.3933107378300868, "grad_norm": 0.42666950821876526, "learning_rate": 7.60728165331892e-06, "loss": 0.7483, "step": 1320 }, { "epoch": 0.3936087005102611, "grad_norm": 0.43507495522499084, "learning_rate": 7.602843548188915e-06, "loss": 0.8089, "step": 1321 }, { "epoch": 0.3939066631904354, "grad_norm": 0.4356527328491211, "learning_rate": 7.598402628266832e-06, "loss": 0.7452, "step": 1322 }, { "epoch": 0.3942046258706097, "grad_norm": 0.41399919986724854, "learning_rate": 7.5939588983552145e-06, "loss": 0.7603, "step": 1323 }, { "epoch": 0.394502588550784, "grad_norm": 0.4259139597415924, "learning_rate": 7.589512363259643e-06, "loss": 0.7756, "step": 1324 }, { "epoch": 0.3948005512309583, "grad_norm": 0.42768535017967224, "learning_rate": 7.58506302778873e-06, "loss": 0.7465, "step": 1325 }, { "epoch": 0.39509851391113265, "grad_norm": 0.42649754881858826, "learning_rate": 7.580610896754122e-06, "loss": 0.7372, "step": 1326 }, { "epoch": 0.39539647659130694, "grad_norm": 0.4322483539581299, "learning_rate": 7.576155974970485e-06, "loss": 0.7693, "step": 1327 }, { "epoch": 0.39569443927148124, "grad_norm": 0.41652461886405945, "learning_rate": 7.5716982672555e-06, "loss": 0.7764, "step": 1328 }, { "epoch": 0.3959924019516556, "grad_norm": 0.4124981164932251, "learning_rate": 7.567237778429868e-06, "loss": 0.7475, "step": 1329 }, { "epoch": 0.3962903646318299, "grad_norm": 0.3989923298358917, "learning_rate": 7.562774513317293e-06, "loss": 0.7229, "step": 1330 }, { "epoch": 0.39658832731200416, "grad_norm": 0.41557246446609497, "learning_rate": 7.558308476744478e-06, "loss": 0.7503, "step": 1331 }, { "epoch": 0.39688628999217845, "grad_norm": 0.4041295051574707, "learning_rate": 7.553839673541133e-06, "loss": 0.7339, "step": 1332 }, { "epoch": 0.3971842526723528, "grad_norm": 0.4372117221355438, "learning_rate": 7.54936810853995e-06, "loss": 0.7584, "step": 1333 }, { "epoch": 0.3974822153525271, "grad_norm": 0.4198533594608307, "learning_rate": 7.544893786576612e-06, "loss": 0.7624, "step": 1334 }, { "epoch": 0.3977801780327014, "grad_norm": 0.4439017176628113, "learning_rate": 7.540416712489786e-06, "loss": 0.8231, "step": 1335 }, { "epoch": 0.3980781407128757, "grad_norm": 0.4390065371990204, "learning_rate": 7.5359368911211115e-06, "loss": 0.7142, "step": 1336 }, { "epoch": 0.39837610339305, "grad_norm": 0.4036790430545807, "learning_rate": 7.5314543273151986e-06, "loss": 0.7458, "step": 1337 }, { "epoch": 0.3986740660732243, "grad_norm": 0.43398842215538025, "learning_rate": 7.5269690259196235e-06, "loss": 0.7881, "step": 1338 }, { "epoch": 0.39897202875339866, "grad_norm": 0.4433099031448364, "learning_rate": 7.522480991784928e-06, "loss": 0.7741, "step": 1339 }, { "epoch": 0.39926999143357295, "grad_norm": 0.41631844639778137, "learning_rate": 7.517990229764602e-06, "loss": 0.7323, "step": 1340 }, { "epoch": 0.39956795411374724, "grad_norm": 0.43654537200927734, "learning_rate": 7.51349674471509e-06, "loss": 0.7332, "step": 1341 }, { "epoch": 0.3998659167939216, "grad_norm": 0.42507266998291016, "learning_rate": 7.509000541495777e-06, "loss": 0.7597, "step": 1342 }, { "epoch": 0.4001638794740959, "grad_norm": 0.4104708135128021, "learning_rate": 7.504501624968995e-06, "loss": 0.7441, "step": 1343 }, { "epoch": 0.40046184215427016, "grad_norm": 0.415763258934021, "learning_rate": 7.500000000000001e-06, "loss": 0.7459, "step": 1344 }, { "epoch": 0.4007598048344445, "grad_norm": 0.41063833236694336, "learning_rate": 7.495495671456987e-06, "loss": 0.742, "step": 1345 }, { "epoch": 0.4010577675146188, "grad_norm": 0.4284031093120575, "learning_rate": 7.4909886442110694e-06, "loss": 0.7591, "step": 1346 }, { "epoch": 0.4013557301947931, "grad_norm": 0.4156349003314972, "learning_rate": 7.4864789231362776e-06, "loss": 0.7683, "step": 1347 }, { "epoch": 0.4016536928749674, "grad_norm": 0.422329843044281, "learning_rate": 7.481966513109561e-06, "loss": 0.7323, "step": 1348 }, { "epoch": 0.40195165555514173, "grad_norm": 0.4287894070148468, "learning_rate": 7.477451419010768e-06, "loss": 0.7342, "step": 1349 }, { "epoch": 0.402249618235316, "grad_norm": 0.40589234232902527, "learning_rate": 7.472933645722662e-06, "loss": 0.7642, "step": 1350 }, { "epoch": 0.4025475809154903, "grad_norm": 0.42224130034446716, "learning_rate": 7.468413198130891e-06, "loss": 0.7288, "step": 1351 }, { "epoch": 0.40284554359566466, "grad_norm": 0.433326780796051, "learning_rate": 7.463890081124005e-06, "loss": 0.7924, "step": 1352 }, { "epoch": 0.40314350627583895, "grad_norm": 0.39268532395362854, "learning_rate": 7.459364299593433e-06, "loss": 0.6931, "step": 1353 }, { "epoch": 0.40344146895601324, "grad_norm": 0.41443511843681335, "learning_rate": 7.4548358584334924e-06, "loss": 0.7505, "step": 1354 }, { "epoch": 0.4037394316361876, "grad_norm": 0.4311977028846741, "learning_rate": 7.4503047625413715e-06, "loss": 0.7849, "step": 1355 }, { "epoch": 0.4040373943163619, "grad_norm": 0.42603302001953125, "learning_rate": 7.445771016817132e-06, "loss": 0.7755, "step": 1356 }, { "epoch": 0.40433535699653617, "grad_norm": 0.419454962015152, "learning_rate": 7.4412346261637e-06, "loss": 0.7299, "step": 1357 }, { "epoch": 0.4046333196767105, "grad_norm": 0.4183696508407593, "learning_rate": 7.436695595486865e-06, "loss": 0.7846, "step": 1358 }, { "epoch": 0.4049312823568848, "grad_norm": 0.4117453992366791, "learning_rate": 7.432153929695268e-06, "loss": 0.7586, "step": 1359 }, { "epoch": 0.4052292450370591, "grad_norm": 0.42735040187835693, "learning_rate": 7.427609633700399e-06, "loss": 0.7615, "step": 1360 }, { "epoch": 0.40552720771723344, "grad_norm": 0.4226491451263428, "learning_rate": 7.4230627124165975e-06, "loss": 0.7847, "step": 1361 }, { "epoch": 0.40582517039740773, "grad_norm": 0.40541863441467285, "learning_rate": 7.418513170761036e-06, "loss": 0.7348, "step": 1362 }, { "epoch": 0.406123133077582, "grad_norm": 0.5737308263778687, "learning_rate": 7.413961013653725e-06, "loss": 0.7987, "step": 1363 }, { "epoch": 0.40642109575775637, "grad_norm": 0.4181864559650421, "learning_rate": 7.409406246017501e-06, "loss": 0.7746, "step": 1364 }, { "epoch": 0.40671905843793066, "grad_norm": 0.42666494846343994, "learning_rate": 7.404848872778028e-06, "loss": 0.7393, "step": 1365 }, { "epoch": 0.40701702111810495, "grad_norm": 0.4203423857688904, "learning_rate": 7.400288898863779e-06, "loss": 0.7602, "step": 1366 }, { "epoch": 0.40731498379827924, "grad_norm": 0.4137178063392639, "learning_rate": 7.395726329206048e-06, "loss": 0.7705, "step": 1367 }, { "epoch": 0.4076129464784536, "grad_norm": 0.44370102882385254, "learning_rate": 7.3911611687389314e-06, "loss": 0.7377, "step": 1368 }, { "epoch": 0.4079109091586279, "grad_norm": 0.4083855152130127, "learning_rate": 7.386593422399331e-06, "loss": 0.7214, "step": 1369 }, { "epoch": 0.40820887183880217, "grad_norm": 0.4298425316810608, "learning_rate": 7.382023095126941e-06, "loss": 0.7707, "step": 1370 }, { "epoch": 0.4085068345189765, "grad_norm": 0.44515231251716614, "learning_rate": 7.377450191864249e-06, "loss": 0.7834, "step": 1371 }, { "epoch": 0.4088047971991508, "grad_norm": 0.4382359981536865, "learning_rate": 7.372874717556529e-06, "loss": 0.7679, "step": 1372 }, { "epoch": 0.4091027598793251, "grad_norm": 0.417392373085022, "learning_rate": 7.368296677151834e-06, "loss": 0.7495, "step": 1373 }, { "epoch": 0.40940072255949944, "grad_norm": 0.4449048340320587, "learning_rate": 7.363716075600993e-06, "loss": 0.7477, "step": 1374 }, { "epoch": 0.40969868523967373, "grad_norm": 0.42793670296669006, "learning_rate": 7.359132917857601e-06, "loss": 0.8068, "step": 1375 }, { "epoch": 0.409996647919848, "grad_norm": 0.43676117062568665, "learning_rate": 7.354547208878025e-06, "loss": 0.7482, "step": 1376 }, { "epoch": 0.41029461060002237, "grad_norm": 0.41651538014411926, "learning_rate": 7.349958953621383e-06, "loss": 0.693, "step": 1377 }, { "epoch": 0.41059257328019666, "grad_norm": 0.42439186573028564, "learning_rate": 7.34536815704955e-06, "loss": 0.7363, "step": 1378 }, { "epoch": 0.41089053596037095, "grad_norm": 0.4144623577594757, "learning_rate": 7.340774824127153e-06, "loss": 0.7194, "step": 1379 }, { "epoch": 0.4111884986405453, "grad_norm": 0.42826560139656067, "learning_rate": 7.336178959821555e-06, "loss": 0.7529, "step": 1380 }, { "epoch": 0.4114864613207196, "grad_norm": 0.4292297661304474, "learning_rate": 7.3315805691028615e-06, "loss": 0.7688, "step": 1381 }, { "epoch": 0.4117844240008939, "grad_norm": 0.4229586124420166, "learning_rate": 7.326979656943907e-06, "loss": 0.7452, "step": 1382 }, { "epoch": 0.41208238668106817, "grad_norm": 0.4257849156856537, "learning_rate": 7.322376228320254e-06, "loss": 0.7808, "step": 1383 }, { "epoch": 0.4123803493612425, "grad_norm": 0.45161929726600647, "learning_rate": 7.317770288210187e-06, "loss": 0.7898, "step": 1384 }, { "epoch": 0.4126783120414168, "grad_norm": 0.4150608479976654, "learning_rate": 7.313161841594708e-06, "loss": 0.7411, "step": 1385 }, { "epoch": 0.4129762747215911, "grad_norm": 0.4321781098842621, "learning_rate": 7.308550893457524e-06, "loss": 0.8008, "step": 1386 }, { "epoch": 0.41327423740176544, "grad_norm": 0.4279387891292572, "learning_rate": 7.303937448785052e-06, "loss": 0.7508, "step": 1387 }, { "epoch": 0.41357220008193973, "grad_norm": 0.4056704640388489, "learning_rate": 7.29932151256641e-06, "loss": 0.7161, "step": 1388 }, { "epoch": 0.413870162762114, "grad_norm": 0.4097346067428589, "learning_rate": 7.294703089793406e-06, "loss": 0.7178, "step": 1389 }, { "epoch": 0.41416812544228837, "grad_norm": 0.4417518973350525, "learning_rate": 7.290082185460539e-06, "loss": 0.7807, "step": 1390 }, { "epoch": 0.41446608812246266, "grad_norm": 0.4345901608467102, "learning_rate": 7.285458804564991e-06, "loss": 0.7812, "step": 1391 }, { "epoch": 0.41476405080263695, "grad_norm": 0.4186284840106964, "learning_rate": 7.280832952106627e-06, "loss": 0.7779, "step": 1392 }, { "epoch": 0.4150620134828113, "grad_norm": 0.4079751968383789, "learning_rate": 7.276204633087976e-06, "loss": 0.7309, "step": 1393 }, { "epoch": 0.4153599761629856, "grad_norm": 0.4316011369228363, "learning_rate": 7.271573852514242e-06, "loss": 0.7751, "step": 1394 }, { "epoch": 0.4156579388431599, "grad_norm": 0.4291500747203827, "learning_rate": 7.266940615393288e-06, "loss": 0.8154, "step": 1395 }, { "epoch": 0.4159559015233342, "grad_norm": 0.4209052324295044, "learning_rate": 7.262304926735633e-06, "loss": 0.7949, "step": 1396 }, { "epoch": 0.4162538642035085, "grad_norm": 0.42992156744003296, "learning_rate": 7.257666791554448e-06, "loss": 0.7878, "step": 1397 }, { "epoch": 0.4165518268836828, "grad_norm": 0.42956528067588806, "learning_rate": 7.253026214865549e-06, "loss": 0.7604, "step": 1398 }, { "epoch": 0.41684978956385715, "grad_norm": 0.41761088371276855, "learning_rate": 7.2483832016873955e-06, "loss": 0.7618, "step": 1399 }, { "epoch": 0.41714775224403144, "grad_norm": 0.4143986701965332, "learning_rate": 7.243737757041077e-06, "loss": 0.7209, "step": 1400 }, { "epoch": 0.41744571492420574, "grad_norm": 0.4181540012359619, "learning_rate": 7.239089885950317e-06, "loss": 0.7822, "step": 1401 }, { "epoch": 0.41774367760438, "grad_norm": 0.42317330837249756, "learning_rate": 7.234439593441458e-06, "loss": 0.7581, "step": 1402 }, { "epoch": 0.41804164028455437, "grad_norm": 0.4166615903377533, "learning_rate": 7.2297868845434674e-06, "loss": 0.7416, "step": 1403 }, { "epoch": 0.41833960296472866, "grad_norm": 0.4076935648918152, "learning_rate": 7.225131764287919e-06, "loss": 0.7423, "step": 1404 }, { "epoch": 0.41863756564490295, "grad_norm": 0.4171837270259857, "learning_rate": 7.220474237709001e-06, "loss": 0.7502, "step": 1405 }, { "epoch": 0.4189355283250773, "grad_norm": 0.44579628109931946, "learning_rate": 7.215814309843496e-06, "loss": 0.8137, "step": 1406 }, { "epoch": 0.4192334910052516, "grad_norm": 0.4234749376773834, "learning_rate": 7.211151985730794e-06, "loss": 0.725, "step": 1407 }, { "epoch": 0.4195314536854259, "grad_norm": 0.42978689074516296, "learning_rate": 7.206487270412866e-06, "loss": 0.7727, "step": 1408 }, { "epoch": 0.4198294163656002, "grad_norm": 0.4192066788673401, "learning_rate": 7.2018201689342745e-06, "loss": 0.7638, "step": 1409 }, { "epoch": 0.4201273790457745, "grad_norm": 0.4143064022064209, "learning_rate": 7.197150686342161e-06, "loss": 0.7527, "step": 1410 }, { "epoch": 0.4204253417259488, "grad_norm": 0.43450862169265747, "learning_rate": 7.192478827686242e-06, "loss": 0.7769, "step": 1411 }, { "epoch": 0.42072330440612316, "grad_norm": 0.4161491394042969, "learning_rate": 7.187804598018806e-06, "loss": 0.7545, "step": 1412 }, { "epoch": 0.42102126708629745, "grad_norm": 0.4148748219013214, "learning_rate": 7.183128002394699e-06, "loss": 0.767, "step": 1413 }, { "epoch": 0.42131922976647174, "grad_norm": 0.42876750230789185, "learning_rate": 7.178449045871335e-06, "loss": 0.7673, "step": 1414 }, { "epoch": 0.4216171924466461, "grad_norm": 0.41839900612831116, "learning_rate": 7.173767733508672e-06, "loss": 0.7258, "step": 1415 }, { "epoch": 0.4219151551268204, "grad_norm": 0.4298432469367981, "learning_rate": 7.169084070369223e-06, "loss": 0.7544, "step": 1416 }, { "epoch": 0.42221311780699466, "grad_norm": 0.4193957448005676, "learning_rate": 7.164398061518036e-06, "loss": 0.7454, "step": 1417 }, { "epoch": 0.42251108048716896, "grad_norm": 0.40669599175453186, "learning_rate": 7.159709712022705e-06, "loss": 0.7237, "step": 1418 }, { "epoch": 0.4228090431673433, "grad_norm": 0.4243616461753845, "learning_rate": 7.1550190269533435e-06, "loss": 0.7765, "step": 1419 }, { "epoch": 0.4231070058475176, "grad_norm": 0.4437214136123657, "learning_rate": 7.1503260113826035e-06, "loss": 0.7835, "step": 1420 }, { "epoch": 0.4234049685276919, "grad_norm": 0.4286574721336365, "learning_rate": 7.145630670385647e-06, "loss": 0.7892, "step": 1421 }, { "epoch": 0.42370293120786623, "grad_norm": 0.4140666127204895, "learning_rate": 7.1409330090401564e-06, "loss": 0.7386, "step": 1422 }, { "epoch": 0.4240008938880405, "grad_norm": 0.4383534789085388, "learning_rate": 7.136233032426322e-06, "loss": 0.7703, "step": 1423 }, { "epoch": 0.4242988565682148, "grad_norm": 0.406421422958374, "learning_rate": 7.131530745626836e-06, "loss": 0.7656, "step": 1424 }, { "epoch": 0.42459681924838916, "grad_norm": 0.4072284698486328, "learning_rate": 7.126826153726893e-06, "loss": 0.7291, "step": 1425 }, { "epoch": 0.42489478192856345, "grad_norm": 0.42674052715301514, "learning_rate": 7.122119261814175e-06, "loss": 0.7584, "step": 1426 }, { "epoch": 0.42519274460873774, "grad_norm": 0.4246309995651245, "learning_rate": 7.117410074978858e-06, "loss": 0.7668, "step": 1427 }, { "epoch": 0.4254907072889121, "grad_norm": 0.4239397346973419, "learning_rate": 7.112698598313591e-06, "loss": 0.7371, "step": 1428 }, { "epoch": 0.4257886699690864, "grad_norm": 0.43355709314346313, "learning_rate": 7.10798483691351e-06, "loss": 0.769, "step": 1429 }, { "epoch": 0.42608663264926067, "grad_norm": 0.42314520478248596, "learning_rate": 7.103268795876212e-06, "loss": 0.7634, "step": 1430 }, { "epoch": 0.426384595329435, "grad_norm": 0.42701730132102966, "learning_rate": 7.098550480301765e-06, "loss": 0.7857, "step": 1431 }, { "epoch": 0.4266825580096093, "grad_norm": 0.4175165295600891, "learning_rate": 7.093829895292695e-06, "loss": 0.7779, "step": 1432 }, { "epoch": 0.4269805206897836, "grad_norm": 0.41399118304252625, "learning_rate": 7.089107045953983e-06, "loss": 0.7646, "step": 1433 }, { "epoch": 0.42727848336995794, "grad_norm": 0.4138357937335968, "learning_rate": 7.084381937393059e-06, "loss": 0.7591, "step": 1434 }, { "epoch": 0.42757644605013223, "grad_norm": 0.41874101758003235, "learning_rate": 7.0796545747197924e-06, "loss": 0.7773, "step": 1435 }, { "epoch": 0.4278744087303065, "grad_norm": 0.43565741181373596, "learning_rate": 7.0749249630464935e-06, "loss": 0.7483, "step": 1436 }, { "epoch": 0.4281723714104808, "grad_norm": 0.4324802756309509, "learning_rate": 7.070193107487906e-06, "loss": 0.8143, "step": 1437 }, { "epoch": 0.42847033409065516, "grad_norm": 0.43232670426368713, "learning_rate": 7.0654590131612e-06, "loss": 0.7796, "step": 1438 }, { "epoch": 0.42876829677082945, "grad_norm": 0.40568041801452637, "learning_rate": 7.060722685185961e-06, "loss": 0.7366, "step": 1439 }, { "epoch": 0.42906625945100374, "grad_norm": 0.4250433146953583, "learning_rate": 7.0559841286841975e-06, "loss": 0.7601, "step": 1440 }, { "epoch": 0.4293642221311781, "grad_norm": 0.439263254404068, "learning_rate": 7.0512433487803245e-06, "loss": 0.7444, "step": 1441 }, { "epoch": 0.4296621848113524, "grad_norm": 0.42873579263687134, "learning_rate": 7.04650035060116e-06, "loss": 0.784, "step": 1442 }, { "epoch": 0.42996014749152667, "grad_norm": 0.4230228066444397, "learning_rate": 7.041755139275925e-06, "loss": 0.7377, "step": 1443 }, { "epoch": 0.430258110171701, "grad_norm": 0.4337446391582489, "learning_rate": 7.03700771993623e-06, "loss": 0.7417, "step": 1444 }, { "epoch": 0.4305560728518753, "grad_norm": 0.4101364314556122, "learning_rate": 7.032258097716076e-06, "loss": 0.7559, "step": 1445 }, { "epoch": 0.4308540355320496, "grad_norm": 0.44110795855522156, "learning_rate": 7.027506277751843e-06, "loss": 0.8001, "step": 1446 }, { "epoch": 0.43115199821222394, "grad_norm": 0.42024466395378113, "learning_rate": 7.022752265182292e-06, "loss": 0.7348, "step": 1447 }, { "epoch": 0.43144996089239823, "grad_norm": 0.41125401854515076, "learning_rate": 7.017996065148553e-06, "loss": 0.8091, "step": 1448 }, { "epoch": 0.4317479235725725, "grad_norm": 0.4192337989807129, "learning_rate": 7.01323768279412e-06, "loss": 0.7416, "step": 1449 }, { "epoch": 0.43204588625274687, "grad_norm": 0.4032820761203766, "learning_rate": 7.008477123264849e-06, "loss": 0.7004, "step": 1450 }, { "epoch": 0.43234384893292116, "grad_norm": 0.4296363294124603, "learning_rate": 7.0037143917089485e-06, "loss": 0.7787, "step": 1451 }, { "epoch": 0.43264181161309545, "grad_norm": 0.41232022643089294, "learning_rate": 6.9989494932769805e-06, "loss": 0.7424, "step": 1452 }, { "epoch": 0.43293977429326974, "grad_norm": 0.4449349641799927, "learning_rate": 6.9941824331218465e-06, "loss": 0.787, "step": 1453 }, { "epoch": 0.4332377369734441, "grad_norm": 0.41432490944862366, "learning_rate": 6.989413216398786e-06, "loss": 0.7426, "step": 1454 }, { "epoch": 0.4335356996536184, "grad_norm": 0.4493269622325897, "learning_rate": 6.98464184826537e-06, "loss": 0.8144, "step": 1455 }, { "epoch": 0.43383366233379267, "grad_norm": 0.3982318937778473, "learning_rate": 6.979868333881499e-06, "loss": 0.7263, "step": 1456 }, { "epoch": 0.434131625013967, "grad_norm": 0.4245948791503906, "learning_rate": 6.975092678409392e-06, "loss": 0.7877, "step": 1457 }, { "epoch": 0.4344295876941413, "grad_norm": 0.4272557497024536, "learning_rate": 6.970314887013585e-06, "loss": 0.7497, "step": 1458 }, { "epoch": 0.4347275503743156, "grad_norm": 0.432786226272583, "learning_rate": 6.965534964860921e-06, "loss": 0.731, "step": 1459 }, { "epoch": 0.43502551305448994, "grad_norm": 0.42844119668006897, "learning_rate": 6.960752917120552e-06, "loss": 0.7579, "step": 1460 }, { "epoch": 0.43532347573466423, "grad_norm": 0.42431870102882385, "learning_rate": 6.955968748963924e-06, "loss": 0.7786, "step": 1461 }, { "epoch": 0.4356214384148385, "grad_norm": 0.40737029910087585, "learning_rate": 6.9511824655647786e-06, "loss": 0.7403, "step": 1462 }, { "epoch": 0.43591940109501287, "grad_norm": 0.4208509027957916, "learning_rate": 6.946394072099145e-06, "loss": 0.7551, "step": 1463 }, { "epoch": 0.43621736377518716, "grad_norm": 0.4238913953304291, "learning_rate": 6.941603573745334e-06, "loss": 0.7633, "step": 1464 }, { "epoch": 0.43651532645536145, "grad_norm": 0.42684584856033325, "learning_rate": 6.936810975683931e-06, "loss": 0.7652, "step": 1465 }, { "epoch": 0.4368132891355358, "grad_norm": 0.4150695204734802, "learning_rate": 6.932016283097793e-06, "loss": 0.7425, "step": 1466 }, { "epoch": 0.4371112518157101, "grad_norm": 0.4127320945262909, "learning_rate": 6.927219501172046e-06, "loss": 0.7914, "step": 1467 }, { "epoch": 0.4374092144958844, "grad_norm": 0.41298285126686096, "learning_rate": 6.922420635094067e-06, "loss": 0.7591, "step": 1468 }, { "epoch": 0.4377071771760587, "grad_norm": 0.4197395145893097, "learning_rate": 6.9176196900534975e-06, "loss": 0.7628, "step": 1469 }, { "epoch": 0.438005139856233, "grad_norm": 0.4192826449871063, "learning_rate": 6.912816671242215e-06, "loss": 0.7607, "step": 1470 }, { "epoch": 0.4383031025364073, "grad_norm": 0.4042337238788605, "learning_rate": 6.908011583854353e-06, "loss": 0.6951, "step": 1471 }, { "epoch": 0.4386010652165816, "grad_norm": 0.4131634533405304, "learning_rate": 6.90320443308627e-06, "loss": 0.766, "step": 1472 }, { "epoch": 0.43889902789675594, "grad_norm": 0.4269515573978424, "learning_rate": 6.898395224136565e-06, "loss": 0.7805, "step": 1473 }, { "epoch": 0.43919699057693024, "grad_norm": 0.4271021783351898, "learning_rate": 6.8935839622060564e-06, "loss": 0.8168, "step": 1474 }, { "epoch": 0.4394949532571045, "grad_norm": 0.4018744230270386, "learning_rate": 6.888770652497785e-06, "loss": 0.755, "step": 1475 }, { "epoch": 0.43979291593727887, "grad_norm": 0.4159916043281555, "learning_rate": 6.88395530021701e-06, "loss": 0.74, "step": 1476 }, { "epoch": 0.44009087861745316, "grad_norm": 0.42056897282600403, "learning_rate": 6.879137910571191e-06, "loss": 0.7153, "step": 1477 }, { "epoch": 0.44038884129762745, "grad_norm": 0.4232238233089447, "learning_rate": 6.87431848877e-06, "loss": 0.7604, "step": 1478 }, { "epoch": 0.4406868039778018, "grad_norm": 0.40519949793815613, "learning_rate": 6.8694970400253e-06, "loss": 0.752, "step": 1479 }, { "epoch": 0.4409847666579761, "grad_norm": 0.42621520161628723, "learning_rate": 6.86467356955115e-06, "loss": 0.7631, "step": 1480 }, { "epoch": 0.4412827293381504, "grad_norm": 0.40618953108787537, "learning_rate": 6.8598480825637916e-06, "loss": 0.7715, "step": 1481 }, { "epoch": 0.4415806920183247, "grad_norm": 0.4289984405040741, "learning_rate": 6.855020584281651e-06, "loss": 0.7577, "step": 1482 }, { "epoch": 0.441878654698499, "grad_norm": 0.4101161062717438, "learning_rate": 6.850191079925328e-06, "loss": 0.7559, "step": 1483 }, { "epoch": 0.4421766173786733, "grad_norm": 0.4357437789440155, "learning_rate": 6.845359574717591e-06, "loss": 0.7396, "step": 1484 }, { "epoch": 0.44247458005884766, "grad_norm": 0.4237337112426758, "learning_rate": 6.8405260738833715e-06, "loss": 0.7199, "step": 1485 }, { "epoch": 0.44277254273902195, "grad_norm": 0.42737624049186707, "learning_rate": 6.835690582649762e-06, "loss": 0.7638, "step": 1486 }, { "epoch": 0.44307050541919624, "grad_norm": 0.42396315932273865, "learning_rate": 6.830853106246007e-06, "loss": 0.7432, "step": 1487 }, { "epoch": 0.4433684680993705, "grad_norm": 0.4084084928035736, "learning_rate": 6.826013649903495e-06, "loss": 0.7566, "step": 1488 }, { "epoch": 0.4436664307795449, "grad_norm": 0.42464542388916016, "learning_rate": 6.821172218855756e-06, "loss": 0.7647, "step": 1489 }, { "epoch": 0.44396439345971916, "grad_norm": 0.4080066978931427, "learning_rate": 6.81632881833846e-06, "loss": 0.742, "step": 1490 }, { "epoch": 0.44426235613989346, "grad_norm": 0.4154869616031647, "learning_rate": 6.811483453589403e-06, "loss": 0.7579, "step": 1491 }, { "epoch": 0.4445603188200678, "grad_norm": 0.41644084453582764, "learning_rate": 6.806636129848504e-06, "loss": 0.7504, "step": 1492 }, { "epoch": 0.4448582815002421, "grad_norm": 0.413764089345932, "learning_rate": 6.801786852357804e-06, "loss": 0.7259, "step": 1493 }, { "epoch": 0.4451562441804164, "grad_norm": 0.4061482846736908, "learning_rate": 6.796935626361454e-06, "loss": 0.7619, "step": 1494 }, { "epoch": 0.44545420686059073, "grad_norm": 0.4003064036369324, "learning_rate": 6.792082457105714e-06, "loss": 0.7358, "step": 1495 }, { "epoch": 0.445752169540765, "grad_norm": 0.45082637667655945, "learning_rate": 6.787227349838946e-06, "loss": 0.7915, "step": 1496 }, { "epoch": 0.4460501322209393, "grad_norm": 0.4260353147983551, "learning_rate": 6.782370309811605e-06, "loss": 0.7218, "step": 1497 }, { "epoch": 0.44634809490111366, "grad_norm": 0.4147459864616394, "learning_rate": 6.777511342276242e-06, "loss": 0.7893, "step": 1498 }, { "epoch": 0.44664605758128795, "grad_norm": 0.43019166588783264, "learning_rate": 6.772650452487482e-06, "loss": 0.7851, "step": 1499 }, { "epoch": 0.44694402026146224, "grad_norm": 0.40980443358421326, "learning_rate": 6.767787645702039e-06, "loss": 0.7539, "step": 1500 }, { "epoch": 0.4472419829416366, "grad_norm": 0.41804051399230957, "learning_rate": 6.762922927178696e-06, "loss": 0.7229, "step": 1501 }, { "epoch": 0.4475399456218109, "grad_norm": 0.41674575209617615, "learning_rate": 6.7580563021783045e-06, "loss": 0.7914, "step": 1502 }, { "epoch": 0.44783790830198517, "grad_norm": 0.4301901161670685, "learning_rate": 6.753187775963773e-06, "loss": 0.7459, "step": 1503 }, { "epoch": 0.4481358709821595, "grad_norm": 0.4142482876777649, "learning_rate": 6.7483173538000734e-06, "loss": 0.7596, "step": 1504 }, { "epoch": 0.4484338336623338, "grad_norm": 0.42101266980171204, "learning_rate": 6.743445040954223e-06, "loss": 0.7949, "step": 1505 }, { "epoch": 0.4487317963425081, "grad_norm": 0.42256978154182434, "learning_rate": 6.738570842695287e-06, "loss": 0.7771, "step": 1506 }, { "epoch": 0.4490297590226824, "grad_norm": 0.40780338644981384, "learning_rate": 6.7336947642943665e-06, "loss": 0.7704, "step": 1507 }, { "epoch": 0.44932772170285673, "grad_norm": 0.4173721671104431, "learning_rate": 6.728816811024594e-06, "loss": 0.7672, "step": 1508 }, { "epoch": 0.449625684383031, "grad_norm": 0.4159814119338989, "learning_rate": 6.723936988161138e-06, "loss": 0.7421, "step": 1509 }, { "epoch": 0.4499236470632053, "grad_norm": 0.4115670621395111, "learning_rate": 6.719055300981181e-06, "loss": 0.7692, "step": 1510 }, { "epoch": 0.45022160974337966, "grad_norm": 0.4083748161792755, "learning_rate": 6.714171754763923e-06, "loss": 0.7779, "step": 1511 }, { "epoch": 0.45051957242355395, "grad_norm": 0.42115432024002075, "learning_rate": 6.709286354790577e-06, "loss": 0.7554, "step": 1512 }, { "epoch": 0.45081753510372824, "grad_norm": 0.40441468358039856, "learning_rate": 6.704399106344359e-06, "loss": 0.7255, "step": 1513 }, { "epoch": 0.4511154977839026, "grad_norm": 0.40472057461738586, "learning_rate": 6.699510014710484e-06, "loss": 0.6787, "step": 1514 }, { "epoch": 0.4514134604640769, "grad_norm": 0.42571139335632324, "learning_rate": 6.694619085176159e-06, "loss": 0.7412, "step": 1515 }, { "epoch": 0.45171142314425117, "grad_norm": 0.4025956690311432, "learning_rate": 6.689726323030582e-06, "loss": 0.726, "step": 1516 }, { "epoch": 0.4520093858244255, "grad_norm": 0.41559621691703796, "learning_rate": 6.684831733564929e-06, "loss": 0.7741, "step": 1517 }, { "epoch": 0.4523073485045998, "grad_norm": 0.4175313413143158, "learning_rate": 6.679935322072358e-06, "loss": 0.7438, "step": 1518 }, { "epoch": 0.4526053111847741, "grad_norm": 0.4239237606525421, "learning_rate": 6.6750370938479895e-06, "loss": 0.7551, "step": 1519 }, { "epoch": 0.45290327386494844, "grad_norm": 0.41169679164886475, "learning_rate": 6.670137054188912e-06, "loss": 0.7535, "step": 1520 }, { "epoch": 0.45320123654512273, "grad_norm": 0.4318566620349884, "learning_rate": 6.665235208394175e-06, "loss": 0.7596, "step": 1521 }, { "epoch": 0.453499199225297, "grad_norm": 0.4227028787136078, "learning_rate": 6.660331561764781e-06, "loss": 0.7899, "step": 1522 }, { "epoch": 0.4537971619054713, "grad_norm": 0.4185028076171875, "learning_rate": 6.6554261196036755e-06, "loss": 0.7579, "step": 1523 }, { "epoch": 0.45409512458564566, "grad_norm": 0.451593279838562, "learning_rate": 6.6505188872157525e-06, "loss": 0.7607, "step": 1524 }, { "epoch": 0.45439308726581995, "grad_norm": 0.4055825173854828, "learning_rate": 6.645609869907835e-06, "loss": 0.775, "step": 1525 }, { "epoch": 0.45469104994599424, "grad_norm": 0.4261016547679901, "learning_rate": 6.640699072988681e-06, "loss": 0.7472, "step": 1526 }, { "epoch": 0.4549890126261686, "grad_norm": 0.43738311529159546, "learning_rate": 6.635786501768973e-06, "loss": 0.7921, "step": 1527 }, { "epoch": 0.4552869753063429, "grad_norm": 0.41713571548461914, "learning_rate": 6.6308721615613106e-06, "loss": 0.7361, "step": 1528 }, { "epoch": 0.45558493798651717, "grad_norm": 0.41634663939476013, "learning_rate": 6.6259560576802055e-06, "loss": 0.7177, "step": 1529 }, { "epoch": 0.4558829006666915, "grad_norm": 0.4361664652824402, "learning_rate": 6.621038195442078e-06, "loss": 0.7898, "step": 1530 }, { "epoch": 0.4561808633468658, "grad_norm": 0.43086275458335876, "learning_rate": 6.6161185801652495e-06, "loss": 0.7399, "step": 1531 }, { "epoch": 0.4564788260270401, "grad_norm": 0.4243473410606384, "learning_rate": 6.61119721716994e-06, "loss": 0.7602, "step": 1532 }, { "epoch": 0.45677678870721444, "grad_norm": 0.40631866455078125, "learning_rate": 6.606274111778257e-06, "loss": 0.7682, "step": 1533 }, { "epoch": 0.45707475138738873, "grad_norm": 0.42651253938674927, "learning_rate": 6.601349269314188e-06, "loss": 0.7426, "step": 1534 }, { "epoch": 0.457372714067563, "grad_norm": 0.4395078718662262, "learning_rate": 6.596422695103609e-06, "loss": 0.8164, "step": 1535 }, { "epoch": 0.45767067674773737, "grad_norm": 0.417476624250412, "learning_rate": 6.591494394474261e-06, "loss": 0.7173, "step": 1536 }, { "epoch": 0.45796863942791166, "grad_norm": 0.4121398627758026, "learning_rate": 6.586564372755754e-06, "loss": 0.7461, "step": 1537 }, { "epoch": 0.45826660210808595, "grad_norm": 0.4245747923851013, "learning_rate": 6.581632635279558e-06, "loss": 0.7696, "step": 1538 }, { "epoch": 0.45856456478826024, "grad_norm": 0.4338565468788147, "learning_rate": 6.576699187379003e-06, "loss": 0.7906, "step": 1539 }, { "epoch": 0.4588625274684346, "grad_norm": 0.44849637150764465, "learning_rate": 6.571764034389263e-06, "loss": 0.7914, "step": 1540 }, { "epoch": 0.4591604901486089, "grad_norm": 0.42695313692092896, "learning_rate": 6.566827181647361e-06, "loss": 0.7365, "step": 1541 }, { "epoch": 0.45945845282878317, "grad_norm": 0.43655329942703247, "learning_rate": 6.561888634492153e-06, "loss": 0.7474, "step": 1542 }, { "epoch": 0.4597564155089575, "grad_norm": 0.4254245460033417, "learning_rate": 6.556948398264332e-06, "loss": 0.7564, "step": 1543 }, { "epoch": 0.4600543781891318, "grad_norm": 0.41453203558921814, "learning_rate": 6.552006478306416e-06, "loss": 0.7308, "step": 1544 }, { "epoch": 0.4603523408693061, "grad_norm": 0.43700653314590454, "learning_rate": 6.547062879962742e-06, "loss": 0.7549, "step": 1545 }, { "epoch": 0.46065030354948044, "grad_norm": 0.42417776584625244, "learning_rate": 6.5421176085794645e-06, "loss": 0.7371, "step": 1546 }, { "epoch": 0.46094826622965474, "grad_norm": 0.4269201159477234, "learning_rate": 6.537170669504547e-06, "loss": 0.7504, "step": 1547 }, { "epoch": 0.461246228909829, "grad_norm": 0.4198196232318878, "learning_rate": 6.532222068087754e-06, "loss": 0.716, "step": 1548 }, { "epoch": 0.46154419159000337, "grad_norm": 0.4342532455921173, "learning_rate": 6.527271809680651e-06, "loss": 0.7595, "step": 1549 }, { "epoch": 0.46184215427017766, "grad_norm": 0.4008735716342926, "learning_rate": 6.522319899636594e-06, "loss": 0.7101, "step": 1550 }, { "epoch": 0.46214011695035195, "grad_norm": 0.4136100113391876, "learning_rate": 6.517366343310726e-06, "loss": 0.7564, "step": 1551 }, { "epoch": 0.4624380796305263, "grad_norm": 0.4406328797340393, "learning_rate": 6.512411146059967e-06, "loss": 0.7355, "step": 1552 }, { "epoch": 0.4627360423107006, "grad_norm": 0.40765517950057983, "learning_rate": 6.507454313243016e-06, "loss": 0.7443, "step": 1553 }, { "epoch": 0.4630340049908749, "grad_norm": 0.418593168258667, "learning_rate": 6.502495850220337e-06, "loss": 0.7818, "step": 1554 }, { "epoch": 0.4633319676710492, "grad_norm": 0.4101206958293915, "learning_rate": 6.497535762354162e-06, "loss": 0.7134, "step": 1555 }, { "epoch": 0.4636299303512235, "grad_norm": 0.4457070827484131, "learning_rate": 6.492574055008474e-06, "loss": 0.756, "step": 1556 }, { "epoch": 0.4639278930313978, "grad_norm": 0.4158073961734772, "learning_rate": 6.4876107335490106e-06, "loss": 0.7487, "step": 1557 }, { "epoch": 0.4642258557115721, "grad_norm": 0.4253988564014435, "learning_rate": 6.482645803343255e-06, "loss": 0.7218, "step": 1558 }, { "epoch": 0.46452381839174645, "grad_norm": 0.4238702654838562, "learning_rate": 6.4776792697604305e-06, "loss": 0.7543, "step": 1559 }, { "epoch": 0.46482178107192074, "grad_norm": 0.4382781386375427, "learning_rate": 6.472711138171492e-06, "loss": 0.7234, "step": 1560 }, { "epoch": 0.465119743752095, "grad_norm": 0.41743335127830505, "learning_rate": 6.467741413949124e-06, "loss": 0.7032, "step": 1561 }, { "epoch": 0.4654177064322694, "grad_norm": 0.4467609226703644, "learning_rate": 6.462770102467736e-06, "loss": 0.7761, "step": 1562 }, { "epoch": 0.46571566911244366, "grad_norm": 0.41901764273643494, "learning_rate": 6.457797209103449e-06, "loss": 0.7398, "step": 1563 }, { "epoch": 0.46601363179261795, "grad_norm": 0.45777711272239685, "learning_rate": 6.452822739234097e-06, "loss": 0.7641, "step": 1564 }, { "epoch": 0.4663115944727923, "grad_norm": 0.4191468358039856, "learning_rate": 6.447846698239221e-06, "loss": 0.7425, "step": 1565 }, { "epoch": 0.4666095571529666, "grad_norm": 0.42025595903396606, "learning_rate": 6.442869091500058e-06, "loss": 0.7796, "step": 1566 }, { "epoch": 0.4669075198331409, "grad_norm": 0.41758546233177185, "learning_rate": 6.437889924399539e-06, "loss": 0.7638, "step": 1567 }, { "epoch": 0.46720548251331523, "grad_norm": 0.41302821040153503, "learning_rate": 6.4329092023222825e-06, "loss": 0.7336, "step": 1568 }, { "epoch": 0.4675034451934895, "grad_norm": 0.42900779843330383, "learning_rate": 6.427926930654589e-06, "loss": 0.7841, "step": 1569 }, { "epoch": 0.4678014078736638, "grad_norm": 0.42111918330192566, "learning_rate": 6.422943114784437e-06, "loss": 0.7613, "step": 1570 }, { "epoch": 0.46809937055383816, "grad_norm": 0.417671799659729, "learning_rate": 6.417957760101467e-06, "loss": 0.7484, "step": 1571 }, { "epoch": 0.46839733323401245, "grad_norm": 0.4312800168991089, "learning_rate": 6.412970871996995e-06, "loss": 0.7391, "step": 1572 }, { "epoch": 0.46869529591418674, "grad_norm": 0.40557584166526794, "learning_rate": 6.407982455863986e-06, "loss": 0.6973, "step": 1573 }, { "epoch": 0.46899325859436103, "grad_norm": 0.4075579345226288, "learning_rate": 6.402992517097062e-06, "loss": 0.7604, "step": 1574 }, { "epoch": 0.4692912212745354, "grad_norm": 0.41097551584243774, "learning_rate": 6.398001061092492e-06, "loss": 0.7373, "step": 1575 }, { "epoch": 0.46958918395470967, "grad_norm": 0.4276979863643646, "learning_rate": 6.39300809324818e-06, "loss": 0.7527, "step": 1576 }, { "epoch": 0.46988714663488396, "grad_norm": 0.40917807817459106, "learning_rate": 6.388013618963674e-06, "loss": 0.7059, "step": 1577 }, { "epoch": 0.4701851093150583, "grad_norm": 0.41270890831947327, "learning_rate": 6.383017643640144e-06, "loss": 0.7242, "step": 1578 }, { "epoch": 0.4704830719952326, "grad_norm": 0.4280890226364136, "learning_rate": 6.378020172680386e-06, "loss": 0.7686, "step": 1579 }, { "epoch": 0.4707810346754069, "grad_norm": 0.430791437625885, "learning_rate": 6.373021211488812e-06, "loss": 0.7696, "step": 1580 }, { "epoch": 0.47107899735558123, "grad_norm": 0.4117489457130432, "learning_rate": 6.36802076547145e-06, "loss": 0.7613, "step": 1581 }, { "epoch": 0.4713769600357555, "grad_norm": 0.43257904052734375, "learning_rate": 6.363018840035926e-06, "loss": 0.7356, "step": 1582 }, { "epoch": 0.4716749227159298, "grad_norm": 0.4123290181159973, "learning_rate": 6.358015440591472e-06, "loss": 0.7342, "step": 1583 }, { "epoch": 0.47197288539610416, "grad_norm": 0.44685089588165283, "learning_rate": 6.3530105725489136e-06, "loss": 0.7003, "step": 1584 }, { "epoch": 0.47227084807627845, "grad_norm": 0.40533214807510376, "learning_rate": 6.348004241320662e-06, "loss": 0.7865, "step": 1585 }, { "epoch": 0.47256881075645274, "grad_norm": 0.4086616039276123, "learning_rate": 6.342996452320713e-06, "loss": 0.7439, "step": 1586 }, { "epoch": 0.4728667734366271, "grad_norm": 0.42993593215942383, "learning_rate": 6.337987210964636e-06, "loss": 0.7582, "step": 1587 }, { "epoch": 0.4731647361168014, "grad_norm": 0.423460990190506, "learning_rate": 6.332976522669576e-06, "loss": 0.7413, "step": 1588 }, { "epoch": 0.47346269879697567, "grad_norm": 0.43371692299842834, "learning_rate": 6.327964392854237e-06, "loss": 0.7746, "step": 1589 }, { "epoch": 0.47376066147715, "grad_norm": 0.42651933431625366, "learning_rate": 6.322950826938885e-06, "loss": 0.7536, "step": 1590 }, { "epoch": 0.4740586241573243, "grad_norm": 0.4330197870731354, "learning_rate": 6.3179358303453386e-06, "loss": 0.8055, "step": 1591 }, { "epoch": 0.4743565868374986, "grad_norm": 0.4229655861854553, "learning_rate": 6.3129194084969655e-06, "loss": 0.7512, "step": 1592 }, { "epoch": 0.4746545495176729, "grad_norm": 0.42545217275619507, "learning_rate": 6.30790156681867e-06, "loss": 0.7412, "step": 1593 }, { "epoch": 0.47495251219784723, "grad_norm": 0.42011532187461853, "learning_rate": 6.3028823107368965e-06, "loss": 0.7265, "step": 1594 }, { "epoch": 0.4752504748780215, "grad_norm": 0.4164109528064728, "learning_rate": 6.297861645679616e-06, "loss": 0.7349, "step": 1595 }, { "epoch": 0.4755484375581958, "grad_norm": 0.4242698550224304, "learning_rate": 6.292839577076326e-06, "loss": 0.7233, "step": 1596 }, { "epoch": 0.47584640023837016, "grad_norm": 0.4278213083744049, "learning_rate": 6.2878161103580395e-06, "loss": 0.7834, "step": 1597 }, { "epoch": 0.47614436291854445, "grad_norm": 0.43776217103004456, "learning_rate": 6.28279125095728e-06, "loss": 0.7799, "step": 1598 }, { "epoch": 0.47644232559871874, "grad_norm": 0.4515795409679413, "learning_rate": 6.277765004308083e-06, "loss": 0.8182, "step": 1599 }, { "epoch": 0.4767402882788931, "grad_norm": 0.39705896377563477, "learning_rate": 6.2727373758459765e-06, "loss": 0.7643, "step": 1600 }, { "epoch": 0.4770382509590674, "grad_norm": 0.42140883207321167, "learning_rate": 6.267708371007991e-06, "loss": 0.7589, "step": 1601 }, { "epoch": 0.47733621363924167, "grad_norm": 0.41087716817855835, "learning_rate": 6.262677995232637e-06, "loss": 0.7324, "step": 1602 }, { "epoch": 0.477634176319416, "grad_norm": 0.40221107006073, "learning_rate": 6.2576462539599145e-06, "loss": 0.7054, "step": 1603 }, { "epoch": 0.4779321389995903, "grad_norm": 0.4330417513847351, "learning_rate": 6.252613152631297e-06, "loss": 0.7367, "step": 1604 }, { "epoch": 0.4782301016797646, "grad_norm": 0.40460506081581116, "learning_rate": 6.247578696689729e-06, "loss": 0.7369, "step": 1605 }, { "epoch": 0.47852806435993894, "grad_norm": 0.42739930748939514, "learning_rate": 6.242542891579619e-06, "loss": 0.7868, "step": 1606 }, { "epoch": 0.47882602704011323, "grad_norm": 0.4340622127056122, "learning_rate": 6.237505742746839e-06, "loss": 0.7502, "step": 1607 }, { "epoch": 0.4791239897202875, "grad_norm": 0.41937345266342163, "learning_rate": 6.232467255638709e-06, "loss": 0.751, "step": 1608 }, { "epoch": 0.4794219524004618, "grad_norm": 0.4325072765350342, "learning_rate": 6.227427435703997e-06, "loss": 0.7515, "step": 1609 }, { "epoch": 0.47971991508063616, "grad_norm": 0.409756600856781, "learning_rate": 6.222386288392914e-06, "loss": 0.745, "step": 1610 }, { "epoch": 0.48001787776081045, "grad_norm": 0.4101891815662384, "learning_rate": 6.217343819157106e-06, "loss": 0.7049, "step": 1611 }, { "epoch": 0.48031584044098474, "grad_norm": 0.4222071170806885, "learning_rate": 6.212300033449652e-06, "loss": 0.7959, "step": 1612 }, { "epoch": 0.4806138031211591, "grad_norm": 0.4142032265663147, "learning_rate": 6.2072549367250465e-06, "loss": 0.7132, "step": 1613 }, { "epoch": 0.4809117658013334, "grad_norm": 0.4161210358142853, "learning_rate": 6.202208534439208e-06, "loss": 0.7321, "step": 1614 }, { "epoch": 0.48120972848150767, "grad_norm": 0.4110502302646637, "learning_rate": 6.197160832049466e-06, "loss": 0.7407, "step": 1615 }, { "epoch": 0.481507691161682, "grad_norm": 0.4301973879337311, "learning_rate": 6.192111835014554e-06, "loss": 0.7345, "step": 1616 }, { "epoch": 0.4818056538418563, "grad_norm": 0.41788768768310547, "learning_rate": 6.187061548794609e-06, "loss": 0.7467, "step": 1617 }, { "epoch": 0.4821036165220306, "grad_norm": 0.41017264127731323, "learning_rate": 6.182009978851158e-06, "loss": 0.7098, "step": 1618 }, { "epoch": 0.48240157920220494, "grad_norm": 0.4400678873062134, "learning_rate": 6.17695713064712e-06, "loss": 0.7861, "step": 1619 }, { "epoch": 0.48269954188237923, "grad_norm": 0.407147616147995, "learning_rate": 6.171903009646792e-06, "loss": 0.7835, "step": 1620 }, { "epoch": 0.4829975045625535, "grad_norm": 0.41501620411872864, "learning_rate": 6.1668476213158525e-06, "loss": 0.7324, "step": 1621 }, { "epoch": 0.48329546724272787, "grad_norm": 0.4054969549179077, "learning_rate": 6.161790971121349e-06, "loss": 0.7487, "step": 1622 }, { "epoch": 0.48359342992290216, "grad_norm": 0.4138328433036804, "learning_rate": 6.1567330645316906e-06, "loss": 0.7081, "step": 1623 }, { "epoch": 0.48389139260307645, "grad_norm": 0.42272600531578064, "learning_rate": 6.151673907016646e-06, "loss": 0.7654, "step": 1624 }, { "epoch": 0.4841893552832508, "grad_norm": 0.4159351587295532, "learning_rate": 6.146613504047342e-06, "loss": 0.7813, "step": 1625 }, { "epoch": 0.4844873179634251, "grad_norm": 0.42444831132888794, "learning_rate": 6.1415518610962445e-06, "loss": 0.7335, "step": 1626 }, { "epoch": 0.4847852806435994, "grad_norm": 0.4191901683807373, "learning_rate": 6.136488983637165e-06, "loss": 0.7595, "step": 1627 }, { "epoch": 0.48508324332377367, "grad_norm": 0.4251089096069336, "learning_rate": 6.131424877145252e-06, "loss": 0.7984, "step": 1628 }, { "epoch": 0.485381206003948, "grad_norm": 0.4156152904033661, "learning_rate": 6.126359547096975e-06, "loss": 0.783, "step": 1629 }, { "epoch": 0.4856791686841223, "grad_norm": 0.40349531173706055, "learning_rate": 6.121292998970138e-06, "loss": 0.7275, "step": 1630 }, { "epoch": 0.4859771313642966, "grad_norm": 0.44632115960121155, "learning_rate": 6.11622523824385e-06, "loss": 0.8035, "step": 1631 }, { "epoch": 0.48627509404447095, "grad_norm": 0.42304009199142456, "learning_rate": 6.111156270398542e-06, "loss": 0.7565, "step": 1632 }, { "epoch": 0.48657305672464524, "grad_norm": 0.4415636658668518, "learning_rate": 6.106086100915942e-06, "loss": 0.7514, "step": 1633 }, { "epoch": 0.4868710194048195, "grad_norm": 0.442160427570343, "learning_rate": 6.1010147352790875e-06, "loss": 0.722, "step": 1634 }, { "epoch": 0.4871689820849939, "grad_norm": 0.4188246428966522, "learning_rate": 6.095942178972296e-06, "loss": 0.7349, "step": 1635 }, { "epoch": 0.48746694476516816, "grad_norm": 0.41654735803604126, "learning_rate": 6.090868437481185e-06, "loss": 0.732, "step": 1636 }, { "epoch": 0.48776490744534245, "grad_norm": 0.43010467290878296, "learning_rate": 6.085793516292647e-06, "loss": 0.7667, "step": 1637 }, { "epoch": 0.4880628701255168, "grad_norm": 0.4185599088668823, "learning_rate": 6.080717420894852e-06, "loss": 0.7515, "step": 1638 }, { "epoch": 0.4883608328056911, "grad_norm": 0.423115998506546, "learning_rate": 6.075640156777243e-06, "loss": 0.7805, "step": 1639 }, { "epoch": 0.4886587954858654, "grad_norm": 0.42163342237472534, "learning_rate": 6.070561729430518e-06, "loss": 0.7515, "step": 1640 }, { "epoch": 0.48895675816603973, "grad_norm": 0.4198808968067169, "learning_rate": 6.065482144346644e-06, "loss": 0.7094, "step": 1641 }, { "epoch": 0.489254720846214, "grad_norm": 0.43005016446113586, "learning_rate": 6.060401407018832e-06, "loss": 0.7835, "step": 1642 }, { "epoch": 0.4895526835263883, "grad_norm": 0.431854784488678, "learning_rate": 6.055319522941543e-06, "loss": 0.7756, "step": 1643 }, { "epoch": 0.4898506462065626, "grad_norm": 0.4222675561904907, "learning_rate": 6.0502364976104734e-06, "loss": 0.7308, "step": 1644 }, { "epoch": 0.49014860888673695, "grad_norm": 0.40350571274757385, "learning_rate": 6.045152336522562e-06, "loss": 0.7208, "step": 1645 }, { "epoch": 0.49044657156691124, "grad_norm": 0.4296424388885498, "learning_rate": 6.040067045175969e-06, "loss": 0.7707, "step": 1646 }, { "epoch": 0.49074453424708553, "grad_norm": 0.42658230662345886, "learning_rate": 6.034980629070078e-06, "loss": 0.7956, "step": 1647 }, { "epoch": 0.4910424969272599, "grad_norm": 0.4275640547275543, "learning_rate": 6.029893093705492e-06, "loss": 0.746, "step": 1648 }, { "epoch": 0.49134045960743417, "grad_norm": 0.4200149476528168, "learning_rate": 6.0248044445840215e-06, "loss": 0.7644, "step": 1649 }, { "epoch": 0.49163842228760846, "grad_norm": 0.40972739458084106, "learning_rate": 6.019714687208684e-06, "loss": 0.7393, "step": 1650 }, { "epoch": 0.4919363849677828, "grad_norm": 0.4072873294353485, "learning_rate": 6.0146238270836895e-06, "loss": 0.6877, "step": 1651 }, { "epoch": 0.4922343476479571, "grad_norm": 0.43277212977409363, "learning_rate": 6.00953186971445e-06, "loss": 0.7561, "step": 1652 }, { "epoch": 0.4925323103281314, "grad_norm": 0.4115188419818878, "learning_rate": 6.004438820607554e-06, "loss": 0.7191, "step": 1653 }, { "epoch": 0.49283027300830573, "grad_norm": 0.4096651077270508, "learning_rate": 5.999344685270782e-06, "loss": 0.7819, "step": 1654 }, { "epoch": 0.49312823568848, "grad_norm": 0.4046769440174103, "learning_rate": 5.9942494692130744e-06, "loss": 0.6998, "step": 1655 }, { "epoch": 0.4934261983686543, "grad_norm": 0.4084392488002777, "learning_rate": 5.989153177944555e-06, "loss": 0.7341, "step": 1656 }, { "epoch": 0.49372416104882866, "grad_norm": 0.4243430495262146, "learning_rate": 5.984055816976504e-06, "loss": 0.71, "step": 1657 }, { "epoch": 0.49402212372900295, "grad_norm": 0.4338114857673645, "learning_rate": 5.978957391821354e-06, "loss": 0.7579, "step": 1658 }, { "epoch": 0.49432008640917724, "grad_norm": 0.41517770290374756, "learning_rate": 5.973857907992698e-06, "loss": 0.7255, "step": 1659 }, { "epoch": 0.4946180490893516, "grad_norm": 0.40864789485931396, "learning_rate": 5.968757371005265e-06, "loss": 0.7707, "step": 1660 }, { "epoch": 0.4949160117695259, "grad_norm": 0.41645941138267517, "learning_rate": 5.963655786374929e-06, "loss": 0.7655, "step": 1661 }, { "epoch": 0.49521397444970017, "grad_norm": 0.4150625169277191, "learning_rate": 5.958553159618693e-06, "loss": 0.7697, "step": 1662 }, { "epoch": 0.49551193712987446, "grad_norm": 0.41089800000190735, "learning_rate": 5.95344949625469e-06, "loss": 0.738, "step": 1663 }, { "epoch": 0.4958098998100488, "grad_norm": 0.41153329610824585, "learning_rate": 5.948344801802172e-06, "loss": 0.7249, "step": 1664 }, { "epoch": 0.4961078624902231, "grad_norm": 0.4157087206840515, "learning_rate": 5.943239081781508e-06, "loss": 0.7291, "step": 1665 }, { "epoch": 0.4964058251703974, "grad_norm": 0.4415079355239868, "learning_rate": 5.938132341714173e-06, "loss": 0.7922, "step": 1666 }, { "epoch": 0.49670378785057173, "grad_norm": 0.4293401837348938, "learning_rate": 5.933024587122745e-06, "loss": 0.7878, "step": 1667 }, { "epoch": 0.497001750530746, "grad_norm": 0.41637614369392395, "learning_rate": 5.927915823530907e-06, "loss": 0.7944, "step": 1668 }, { "epoch": 0.4972997132109203, "grad_norm": 0.41283151507377625, "learning_rate": 5.922806056463421e-06, "loss": 0.7635, "step": 1669 }, { "epoch": 0.49759767589109466, "grad_norm": 0.4281843304634094, "learning_rate": 5.917695291446146e-06, "loss": 0.7463, "step": 1670 }, { "epoch": 0.49789563857126895, "grad_norm": 0.416308730840683, "learning_rate": 5.91258353400601e-06, "loss": 0.7406, "step": 1671 }, { "epoch": 0.49819360125144324, "grad_norm": 0.43074557185173035, "learning_rate": 5.9074707896710225e-06, "loss": 0.7725, "step": 1672 }, { "epoch": 0.4984915639316176, "grad_norm": 0.41739097237586975, "learning_rate": 5.9023570639702544e-06, "loss": 0.7566, "step": 1673 }, { "epoch": 0.4987895266117919, "grad_norm": 0.40861406922340393, "learning_rate": 5.8972423624338395e-06, "loss": 0.7582, "step": 1674 }, { "epoch": 0.49908748929196617, "grad_norm": 0.4410512447357178, "learning_rate": 5.892126690592969e-06, "loss": 0.7696, "step": 1675 }, { "epoch": 0.4993854519721405, "grad_norm": 0.41618219017982483, "learning_rate": 5.887010053979881e-06, "loss": 0.7333, "step": 1676 }, { "epoch": 0.4996834146523148, "grad_norm": 0.4145171046257019, "learning_rate": 5.881892458127858e-06, "loss": 0.759, "step": 1677 }, { "epoch": 0.4999813773324891, "grad_norm": 0.42233115434646606, "learning_rate": 5.87677390857122e-06, "loss": 0.7641, "step": 1678 }, { "epoch": 0.5002793400126634, "grad_norm": 0.4378186762332916, "learning_rate": 5.871654410845317e-06, "loss": 0.7472, "step": 1679 }, { "epoch": 0.5005773026928377, "grad_norm": 0.40987905859947205, "learning_rate": 5.866533970486529e-06, "loss": 0.724, "step": 1680 }, { "epoch": 0.5008752653730121, "grad_norm": 0.4189298152923584, "learning_rate": 5.861412593032247e-06, "loss": 0.7603, "step": 1681 }, { "epoch": 0.5011732280531863, "grad_norm": 0.4255460798740387, "learning_rate": 5.856290284020883e-06, "loss": 0.7548, "step": 1682 }, { "epoch": 0.5014711907333607, "grad_norm": 0.4227476716041565, "learning_rate": 5.851167048991853e-06, "loss": 0.7465, "step": 1683 }, { "epoch": 0.501769153413535, "grad_norm": 0.3998330235481262, "learning_rate": 5.846042893485575e-06, "loss": 0.7286, "step": 1684 }, { "epoch": 0.5020671160937092, "grad_norm": 0.42391958832740784, "learning_rate": 5.8409178230434615e-06, "loss": 0.7546, "step": 1685 }, { "epoch": 0.5023650787738836, "grad_norm": 0.4327545762062073, "learning_rate": 5.835791843207916e-06, "loss": 0.7452, "step": 1686 }, { "epoch": 0.5026630414540578, "grad_norm": 0.440319299697876, "learning_rate": 5.830664959522328e-06, "loss": 0.7819, "step": 1687 }, { "epoch": 0.5029610041342322, "grad_norm": 0.4276452958583832, "learning_rate": 5.825537177531057e-06, "loss": 0.7612, "step": 1688 }, { "epoch": 0.5032589668144065, "grad_norm": 0.42737245559692383, "learning_rate": 5.82040850277944e-06, "loss": 0.7507, "step": 1689 }, { "epoch": 0.5035569294945808, "grad_norm": 0.4190289080142975, "learning_rate": 5.815278940813777e-06, "loss": 0.729, "step": 1690 }, { "epoch": 0.5038548921747551, "grad_norm": 0.4288443326950073, "learning_rate": 5.810148497181328e-06, "loss": 0.7807, "step": 1691 }, { "epoch": 0.5041528548549294, "grad_norm": 0.41055840253829956, "learning_rate": 5.80501717743031e-06, "loss": 0.7483, "step": 1692 }, { "epoch": 0.5044508175351037, "grad_norm": 0.42178279161453247, "learning_rate": 5.799884987109878e-06, "loss": 0.7567, "step": 1693 }, { "epoch": 0.504748780215278, "grad_norm": 0.4185313284397125, "learning_rate": 5.794751931770142e-06, "loss": 0.7636, "step": 1694 }, { "epoch": 0.5050467428954524, "grad_norm": 0.4304468631744385, "learning_rate": 5.789618016962134e-06, "loss": 0.7748, "step": 1695 }, { "epoch": 0.5053447055756266, "grad_norm": 0.4272032380104065, "learning_rate": 5.7844832482378245e-06, "loss": 0.7829, "step": 1696 }, { "epoch": 0.505642668255801, "grad_norm": 0.4228638708591461, "learning_rate": 5.779347631150101e-06, "loss": 0.7745, "step": 1697 }, { "epoch": 0.5059406309359753, "grad_norm": 0.4103485643863678, "learning_rate": 5.774211171252777e-06, "loss": 0.739, "step": 1698 }, { "epoch": 0.5062385936161495, "grad_norm": 0.4145384430885315, "learning_rate": 5.769073874100569e-06, "loss": 0.7347, "step": 1699 }, { "epoch": 0.5065365562963239, "grad_norm": 0.4036468267440796, "learning_rate": 5.763935745249103e-06, "loss": 0.7609, "step": 1700 }, { "epoch": 0.5068345189764982, "grad_norm": 0.426704466342926, "learning_rate": 5.758796790254902e-06, "loss": 0.7553, "step": 1701 }, { "epoch": 0.5071324816566725, "grad_norm": 0.4068341851234436, "learning_rate": 5.7536570146753874e-06, "loss": 0.7423, "step": 1702 }, { "epoch": 0.5074304443368468, "grad_norm": 0.4228654205799103, "learning_rate": 5.748516424068864e-06, "loss": 0.72, "step": 1703 }, { "epoch": 0.5077284070170212, "grad_norm": 0.41845396161079407, "learning_rate": 5.743375023994514e-06, "loss": 0.7157, "step": 1704 }, { "epoch": 0.5080263696971954, "grad_norm": 0.39111408591270447, "learning_rate": 5.738232820012407e-06, "loss": 0.7146, "step": 1705 }, { "epoch": 0.5083243323773697, "grad_norm": 0.4307633936405182, "learning_rate": 5.733089817683469e-06, "loss": 0.7701, "step": 1706 }, { "epoch": 0.5086222950575441, "grad_norm": 0.4309898316860199, "learning_rate": 5.7279460225694985e-06, "loss": 0.7344, "step": 1707 }, { "epoch": 0.5089202577377183, "grad_norm": 0.42719435691833496, "learning_rate": 5.722801440233145e-06, "loss": 0.7531, "step": 1708 }, { "epoch": 0.5092182204178927, "grad_norm": 0.4361584484577179, "learning_rate": 5.7176560762379144e-06, "loss": 0.756, "step": 1709 }, { "epoch": 0.509516183098067, "grad_norm": 0.40878501534461975, "learning_rate": 5.712509936148153e-06, "loss": 0.7514, "step": 1710 }, { "epoch": 0.5098141457782412, "grad_norm": 0.4180915653705597, "learning_rate": 5.7073630255290515e-06, "loss": 0.7179, "step": 1711 }, { "epoch": 0.5101121084584156, "grad_norm": 0.41091781854629517, "learning_rate": 5.70221534994663e-06, "loss": 0.744, "step": 1712 }, { "epoch": 0.5104100711385899, "grad_norm": 0.41649574041366577, "learning_rate": 5.6970669149677395e-06, "loss": 0.7387, "step": 1713 }, { "epoch": 0.5107080338187642, "grad_norm": 0.4178697466850281, "learning_rate": 5.691917726160049e-06, "loss": 0.7765, "step": 1714 }, { "epoch": 0.5110059964989385, "grad_norm": 0.423127681016922, "learning_rate": 5.686767789092041e-06, "loss": 0.7307, "step": 1715 }, { "epoch": 0.5113039591791129, "grad_norm": 0.4184946119785309, "learning_rate": 5.6816171093330145e-06, "loss": 0.7752, "step": 1716 }, { "epoch": 0.5116019218592871, "grad_norm": 0.4245437979698181, "learning_rate": 5.676465692453063e-06, "loss": 0.7716, "step": 1717 }, { "epoch": 0.5118998845394614, "grad_norm": 0.42974284291267395, "learning_rate": 5.671313544023084e-06, "loss": 0.7343, "step": 1718 }, { "epoch": 0.5121978472196358, "grad_norm": 0.415322870016098, "learning_rate": 5.666160669614761e-06, "loss": 0.7643, "step": 1719 }, { "epoch": 0.51249580989981, "grad_norm": 0.41199418902397156, "learning_rate": 5.661007074800569e-06, "loss": 0.7272, "step": 1720 }, { "epoch": 0.5127937725799844, "grad_norm": 0.4084899425506592, "learning_rate": 5.655852765153752e-06, "loss": 0.7091, "step": 1721 }, { "epoch": 0.5130917352601586, "grad_norm": 0.42653077840805054, "learning_rate": 5.650697746248338e-06, "loss": 0.7605, "step": 1722 }, { "epoch": 0.513389697940333, "grad_norm": 0.41063791513442993, "learning_rate": 5.645542023659115e-06, "loss": 0.7312, "step": 1723 }, { "epoch": 0.5136876606205073, "grad_norm": 0.40550047159194946, "learning_rate": 5.640385602961634e-06, "loss": 0.7468, "step": 1724 }, { "epoch": 0.5139856233006815, "grad_norm": 0.4107825756072998, "learning_rate": 5.635228489732204e-06, "loss": 0.7817, "step": 1725 }, { "epoch": 0.5142835859808559, "grad_norm": 0.42436328530311584, "learning_rate": 5.630070689547875e-06, "loss": 0.7305, "step": 1726 }, { "epoch": 0.5145815486610302, "grad_norm": 0.4121907949447632, "learning_rate": 5.624912207986448e-06, "loss": 0.7238, "step": 1727 }, { "epoch": 0.5148795113412045, "grad_norm": 0.41771507263183594, "learning_rate": 5.619753050626458e-06, "loss": 0.7572, "step": 1728 }, { "epoch": 0.5151774740213788, "grad_norm": 0.3964030146598816, "learning_rate": 5.614593223047169e-06, "loss": 0.7157, "step": 1729 }, { "epoch": 0.5154754367015532, "grad_norm": 0.425343781709671, "learning_rate": 5.609432730828571e-06, "loss": 0.7681, "step": 1730 }, { "epoch": 0.5157733993817274, "grad_norm": 0.4268225133419037, "learning_rate": 5.604271579551375e-06, "loss": 0.7906, "step": 1731 }, { "epoch": 0.5160713620619017, "grad_norm": 0.41018426418304443, "learning_rate": 5.599109774797e-06, "loss": 0.7768, "step": 1732 }, { "epoch": 0.5163693247420761, "grad_norm": 0.42251598834991455, "learning_rate": 5.593947322147577e-06, "loss": 0.7095, "step": 1733 }, { "epoch": 0.5166672874222503, "grad_norm": 0.40326982736587524, "learning_rate": 5.588784227185936e-06, "loss": 0.7339, "step": 1734 }, { "epoch": 0.5169652501024247, "grad_norm": 0.41833508014678955, "learning_rate": 5.583620495495596e-06, "loss": 0.7188, "step": 1735 }, { "epoch": 0.517263212782599, "grad_norm": 0.41312336921691895, "learning_rate": 5.578456132660774e-06, "loss": 0.7073, "step": 1736 }, { "epoch": 0.5175611754627732, "grad_norm": 0.4036800265312195, "learning_rate": 5.573291144266364e-06, "loss": 0.7246, "step": 1737 }, { "epoch": 0.5178591381429476, "grad_norm": 0.4259328842163086, "learning_rate": 5.5681255358979355e-06, "loss": 0.7764, "step": 1738 }, { "epoch": 0.5181571008231219, "grad_norm": 0.40341201424598694, "learning_rate": 5.562959313141732e-06, "loss": 0.7309, "step": 1739 }, { "epoch": 0.5184550635032962, "grad_norm": 0.4262118935585022, "learning_rate": 5.557792481584661e-06, "loss": 0.7878, "step": 1740 }, { "epoch": 0.5187530261834705, "grad_norm": 0.4327569007873535, "learning_rate": 5.552625046814283e-06, "loss": 0.7517, "step": 1741 }, { "epoch": 0.5190509888636449, "grad_norm": 0.41983869671821594, "learning_rate": 5.547457014418818e-06, "loss": 0.8, "step": 1742 }, { "epoch": 0.5193489515438191, "grad_norm": 0.4388618767261505, "learning_rate": 5.542288389987128e-06, "loss": 0.7637, "step": 1743 }, { "epoch": 0.5196469142239935, "grad_norm": 0.41374140977859497, "learning_rate": 5.5371191791087185e-06, "loss": 0.7614, "step": 1744 }, { "epoch": 0.5199448769041678, "grad_norm": 0.4400937557220459, "learning_rate": 5.531949387373725e-06, "loss": 0.7939, "step": 1745 }, { "epoch": 0.520242839584342, "grad_norm": 0.4292343556880951, "learning_rate": 5.526779020372913e-06, "loss": 0.7514, "step": 1746 }, { "epoch": 0.5205408022645164, "grad_norm": 0.4206315577030182, "learning_rate": 5.521608083697673e-06, "loss": 0.7334, "step": 1747 }, { "epoch": 0.5208387649446907, "grad_norm": 0.41921359300613403, "learning_rate": 5.516436582940007e-06, "loss": 0.7557, "step": 1748 }, { "epoch": 0.521136727624865, "grad_norm": 0.42275798320770264, "learning_rate": 5.511264523692531e-06, "loss": 0.7549, "step": 1749 }, { "epoch": 0.5214346903050393, "grad_norm": 0.4251478314399719, "learning_rate": 5.5060919115484594e-06, "loss": 0.7387, "step": 1750 }, { "epoch": 0.5217326529852137, "grad_norm": 0.41317999362945557, "learning_rate": 5.500918752101611e-06, "loss": 0.7472, "step": 1751 }, { "epoch": 0.5220306156653879, "grad_norm": 0.4308857023715973, "learning_rate": 5.495745050946394e-06, "loss": 0.7731, "step": 1752 }, { "epoch": 0.5223285783455622, "grad_norm": 0.42264774441719055, "learning_rate": 5.4905708136778e-06, "loss": 0.7117, "step": 1753 }, { "epoch": 0.5226265410257366, "grad_norm": 0.44053274393081665, "learning_rate": 5.485396045891404e-06, "loss": 0.8017, "step": 1754 }, { "epoch": 0.5229245037059108, "grad_norm": 0.41573989391326904, "learning_rate": 5.480220753183353e-06, "loss": 0.7292, "step": 1755 }, { "epoch": 0.5232224663860852, "grad_norm": 0.42802026867866516, "learning_rate": 5.475044941150361e-06, "loss": 0.7523, "step": 1756 }, { "epoch": 0.5235204290662594, "grad_norm": 0.4277799129486084, "learning_rate": 5.469868615389703e-06, "loss": 0.7641, "step": 1757 }, { "epoch": 0.5238183917464337, "grad_norm": 0.4125003218650818, "learning_rate": 5.4646917814992125e-06, "loss": 0.781, "step": 1758 }, { "epoch": 0.5241163544266081, "grad_norm": 0.4128820300102234, "learning_rate": 5.459514445077272e-06, "loss": 0.7664, "step": 1759 }, { "epoch": 0.5244143171067823, "grad_norm": 0.4197518527507782, "learning_rate": 5.454336611722807e-06, "loss": 0.7541, "step": 1760 }, { "epoch": 0.5247122797869567, "grad_norm": 0.4234422445297241, "learning_rate": 5.449158287035274e-06, "loss": 0.757, "step": 1761 }, { "epoch": 0.525010242467131, "grad_norm": 0.4273323118686676, "learning_rate": 5.443979476614674e-06, "loss": 0.7315, "step": 1762 }, { "epoch": 0.5253082051473053, "grad_norm": 0.43033096194267273, "learning_rate": 5.4388001860615225e-06, "loss": 0.7632, "step": 1763 }, { "epoch": 0.5256061678274796, "grad_norm": 0.43124809861183167, "learning_rate": 5.4336204209768584e-06, "loss": 0.7752, "step": 1764 }, { "epoch": 0.5259041305076539, "grad_norm": 0.40140900015830994, "learning_rate": 5.4284401869622306e-06, "loss": 0.7451, "step": 1765 }, { "epoch": 0.5262020931878282, "grad_norm": 0.4155518114566803, "learning_rate": 5.423259489619701e-06, "loss": 0.7886, "step": 1766 }, { "epoch": 0.5265000558680025, "grad_norm": 0.4166567027568817, "learning_rate": 5.418078334551826e-06, "loss": 0.7376, "step": 1767 }, { "epoch": 0.5267980185481769, "grad_norm": 0.42524638772010803, "learning_rate": 5.412896727361663e-06, "loss": 0.7756, "step": 1768 }, { "epoch": 0.5270959812283511, "grad_norm": 0.4090453088283539, "learning_rate": 5.407714673652753e-06, "loss": 0.7191, "step": 1769 }, { "epoch": 0.5273939439085255, "grad_norm": 0.39983639121055603, "learning_rate": 5.402532179029123e-06, "loss": 0.7204, "step": 1770 }, { "epoch": 0.5276919065886998, "grad_norm": 0.4308376610279083, "learning_rate": 5.397349249095279e-06, "loss": 0.7643, "step": 1771 }, { "epoch": 0.527989869268874, "grad_norm": 0.41261640191078186, "learning_rate": 5.392165889456189e-06, "loss": 0.7758, "step": 1772 }, { "epoch": 0.5282878319490484, "grad_norm": 0.4529683589935303, "learning_rate": 5.386982105717298e-06, "loss": 0.8002, "step": 1773 }, { "epoch": 0.5285857946292227, "grad_norm": 0.41529229283332825, "learning_rate": 5.381797903484498e-06, "loss": 0.76, "step": 1774 }, { "epoch": 0.528883757309397, "grad_norm": 0.4131626486778259, "learning_rate": 5.376613288364142e-06, "loss": 0.7323, "step": 1775 }, { "epoch": 0.5291817199895713, "grad_norm": 0.40386584401130676, "learning_rate": 5.371428265963024e-06, "loss": 0.6704, "step": 1776 }, { "epoch": 0.5294796826697457, "grad_norm": 0.42746710777282715, "learning_rate": 5.366242841888384e-06, "loss": 0.7703, "step": 1777 }, { "epoch": 0.5297776453499199, "grad_norm": 0.4215255677700043, "learning_rate": 5.3610570217478895e-06, "loss": 0.7314, "step": 1778 }, { "epoch": 0.5300756080300942, "grad_norm": 0.42057353258132935, "learning_rate": 5.355870811149643e-06, "loss": 0.7438, "step": 1779 }, { "epoch": 0.5303735707102686, "grad_norm": 0.4245503544807434, "learning_rate": 5.3506842157021635e-06, "loss": 0.7708, "step": 1780 }, { "epoch": 0.5306715333904428, "grad_norm": 0.4192019999027252, "learning_rate": 5.34549724101439e-06, "loss": 0.7375, "step": 1781 }, { "epoch": 0.5309694960706172, "grad_norm": 0.39441004395484924, "learning_rate": 5.340309892695672e-06, "loss": 0.7373, "step": 1782 }, { "epoch": 0.5312674587507915, "grad_norm": 0.4011537432670593, "learning_rate": 5.335122176355759e-06, "loss": 0.7192, "step": 1783 }, { "epoch": 0.5315654214309657, "grad_norm": 0.4111559987068176, "learning_rate": 5.3299340976048035e-06, "loss": 0.7763, "step": 1784 }, { "epoch": 0.5318633841111401, "grad_norm": 0.4052737355232239, "learning_rate": 5.324745662053344e-06, "loss": 0.7252, "step": 1785 }, { "epoch": 0.5321613467913144, "grad_norm": 0.434222012758255, "learning_rate": 5.319556875312313e-06, "loss": 0.7802, "step": 1786 }, { "epoch": 0.5324593094714887, "grad_norm": 0.43211445212364197, "learning_rate": 5.314367742993014e-06, "loss": 0.7522, "step": 1787 }, { "epoch": 0.532757272151663, "grad_norm": 0.429675817489624, "learning_rate": 5.30917827070713e-06, "loss": 0.7566, "step": 1788 }, { "epoch": 0.5330552348318374, "grad_norm": 0.42022308707237244, "learning_rate": 5.3039884640667115e-06, "loss": 0.7878, "step": 1789 }, { "epoch": 0.5333531975120116, "grad_norm": 0.42533957958221436, "learning_rate": 5.298798328684166e-06, "loss": 0.7506, "step": 1790 }, { "epoch": 0.533651160192186, "grad_norm": 0.4366130232810974, "learning_rate": 5.2936078701722615e-06, "loss": 0.7773, "step": 1791 }, { "epoch": 0.5339491228723602, "grad_norm": 0.43641233444213867, "learning_rate": 5.288417094144113e-06, "loss": 0.791, "step": 1792 }, { "epoch": 0.5342470855525345, "grad_norm": 0.40789636969566345, "learning_rate": 5.28322600621318e-06, "loss": 0.728, "step": 1793 }, { "epoch": 0.5345450482327089, "grad_norm": 0.4139827489852905, "learning_rate": 5.278034611993258e-06, "loss": 0.7806, "step": 1794 }, { "epoch": 0.5348430109128831, "grad_norm": 0.4079539179801941, "learning_rate": 5.272842917098474e-06, "loss": 0.7228, "step": 1795 }, { "epoch": 0.5351409735930575, "grad_norm": 0.42347198724746704, "learning_rate": 5.2676509271432815e-06, "loss": 0.7461, "step": 1796 }, { "epoch": 0.5354389362732318, "grad_norm": 0.4191540777683258, "learning_rate": 5.262458647742454e-06, "loss": 0.7454, "step": 1797 }, { "epoch": 0.535736898953406, "grad_norm": 0.4120540916919708, "learning_rate": 5.25726608451107e-06, "loss": 0.7311, "step": 1798 }, { "epoch": 0.5360348616335804, "grad_norm": 0.4422626495361328, "learning_rate": 5.2520732430645275e-06, "loss": 0.7563, "step": 1799 }, { "epoch": 0.5363328243137547, "grad_norm": 0.4033385217189789, "learning_rate": 5.246880129018515e-06, "loss": 0.6934, "step": 1800 }, { "epoch": 0.536630786993929, "grad_norm": 0.43625926971435547, "learning_rate": 5.241686747989023e-06, "loss": 0.7616, "step": 1801 }, { "epoch": 0.5369287496741033, "grad_norm": 0.43050000071525574, "learning_rate": 5.236493105592326e-06, "loss": 0.8261, "step": 1802 }, { "epoch": 0.5372267123542777, "grad_norm": 0.4144790768623352, "learning_rate": 5.231299207444981e-06, "loss": 0.7389, "step": 1803 }, { "epoch": 0.5375246750344519, "grad_norm": 0.41443172097206116, "learning_rate": 5.226105059163826e-06, "loss": 0.763, "step": 1804 }, { "epoch": 0.5378226377146262, "grad_norm": 0.4017443358898163, "learning_rate": 5.220910666365966e-06, "loss": 0.751, "step": 1805 }, { "epoch": 0.5381206003948006, "grad_norm": 0.4197418987751007, "learning_rate": 5.21571603466877e-06, "loss": 0.7079, "step": 1806 }, { "epoch": 0.5384185630749748, "grad_norm": 0.41983088850975037, "learning_rate": 5.210521169689866e-06, "loss": 0.732, "step": 1807 }, { "epoch": 0.5387165257551492, "grad_norm": 0.4189915955066681, "learning_rate": 5.205326077047138e-06, "loss": 0.7227, "step": 1808 }, { "epoch": 0.5390144884353235, "grad_norm": 0.4161134958267212, "learning_rate": 5.200130762358711e-06, "loss": 0.7473, "step": 1809 }, { "epoch": 0.5393124511154977, "grad_norm": 0.41901764273643494, "learning_rate": 5.1949352312429515e-06, "loss": 0.7724, "step": 1810 }, { "epoch": 0.5396104137956721, "grad_norm": 0.4030422270298004, "learning_rate": 5.189739489318461e-06, "loss": 0.7334, "step": 1811 }, { "epoch": 0.5399083764758464, "grad_norm": 0.3990989327430725, "learning_rate": 5.184543542204068e-06, "loss": 0.7149, "step": 1812 }, { "epoch": 0.5402063391560207, "grad_norm": 0.3978815972805023, "learning_rate": 5.179347395518827e-06, "loss": 0.7204, "step": 1813 }, { "epoch": 0.540504301836195, "grad_norm": 0.4164794087409973, "learning_rate": 5.174151054881999e-06, "loss": 0.6979, "step": 1814 }, { "epoch": 0.5408022645163694, "grad_norm": 0.41425642371177673, "learning_rate": 5.168954525913068e-06, "loss": 0.7426, "step": 1815 }, { "epoch": 0.5411002271965436, "grad_norm": 0.40607571601867676, "learning_rate": 5.163757814231708e-06, "loss": 0.7328, "step": 1816 }, { "epoch": 0.541398189876718, "grad_norm": 0.4419354498386383, "learning_rate": 5.158560925457801e-06, "loss": 0.7914, "step": 1817 }, { "epoch": 0.5416961525568923, "grad_norm": 0.432871013879776, "learning_rate": 5.153363865211411e-06, "loss": 0.7788, "step": 1818 }, { "epoch": 0.5419941152370665, "grad_norm": 0.4156447947025299, "learning_rate": 5.148166639112799e-06, "loss": 0.7013, "step": 1819 }, { "epoch": 0.5422920779172409, "grad_norm": 0.4269291162490845, "learning_rate": 5.142969252782397e-06, "loss": 0.765, "step": 1820 }, { "epoch": 0.5425900405974152, "grad_norm": 0.41579586267471313, "learning_rate": 5.137771711840811e-06, "loss": 0.7588, "step": 1821 }, { "epoch": 0.5428880032775895, "grad_norm": 0.41332152485847473, "learning_rate": 5.132574021908816e-06, "loss": 0.7532, "step": 1822 }, { "epoch": 0.5431859659577638, "grad_norm": 0.41672608256340027, "learning_rate": 5.1273761886073496e-06, "loss": 0.746, "step": 1823 }, { "epoch": 0.5434839286379382, "grad_norm": 0.41137874126434326, "learning_rate": 5.122178217557502e-06, "loss": 0.7141, "step": 1824 }, { "epoch": 0.5437818913181124, "grad_norm": 0.4233442544937134, "learning_rate": 5.116980114380511e-06, "loss": 0.7442, "step": 1825 }, { "epoch": 0.5440798539982867, "grad_norm": 0.4179309904575348, "learning_rate": 5.111781884697762e-06, "loss": 0.7486, "step": 1826 }, { "epoch": 0.544377816678461, "grad_norm": 0.4062498211860657, "learning_rate": 5.106583534130773e-06, "loss": 0.7475, "step": 1827 }, { "epoch": 0.5446757793586353, "grad_norm": 0.42390862107276917, "learning_rate": 5.101385068301194e-06, "loss": 0.7607, "step": 1828 }, { "epoch": 0.5449737420388097, "grad_norm": 0.40766555070877075, "learning_rate": 5.0961864928308005e-06, "loss": 0.7231, "step": 1829 }, { "epoch": 0.5452717047189839, "grad_norm": 0.43810999393463135, "learning_rate": 5.090987813341486e-06, "loss": 0.7514, "step": 1830 }, { "epoch": 0.5455696673991582, "grad_norm": 0.42822471261024475, "learning_rate": 5.085789035455256e-06, "loss": 0.7263, "step": 1831 }, { "epoch": 0.5458676300793326, "grad_norm": 0.40355852246284485, "learning_rate": 5.0805901647942226e-06, "loss": 0.7258, "step": 1832 }, { "epoch": 0.5461655927595068, "grad_norm": 0.4210696220397949, "learning_rate": 5.0753912069806e-06, "loss": 0.7308, "step": 1833 }, { "epoch": 0.5464635554396812, "grad_norm": 0.41088956594467163, "learning_rate": 5.070192167636693e-06, "loss": 0.7468, "step": 1834 }, { "epoch": 0.5467615181198555, "grad_norm": 0.4268404245376587, "learning_rate": 5.064993052384899e-06, "loss": 0.7003, "step": 1835 }, { "epoch": 0.5470594808000298, "grad_norm": 0.4162386953830719, "learning_rate": 5.059793866847692e-06, "loss": 0.7529, "step": 1836 }, { "epoch": 0.5473574434802041, "grad_norm": 0.4230744242668152, "learning_rate": 5.054594616647628e-06, "loss": 0.7394, "step": 1837 }, { "epoch": 0.5476554061603784, "grad_norm": 0.4292016327381134, "learning_rate": 5.049395307407329e-06, "loss": 0.7795, "step": 1838 }, { "epoch": 0.5479533688405527, "grad_norm": 0.4010887145996094, "learning_rate": 5.044195944749482e-06, "loss": 0.7457, "step": 1839 }, { "epoch": 0.548251331520727, "grad_norm": 0.40281057357788086, "learning_rate": 5.0389965342968316e-06, "loss": 0.7384, "step": 1840 }, { "epoch": 0.5485492942009014, "grad_norm": 0.42106863856315613, "learning_rate": 5.033797081672176e-06, "loss": 0.7607, "step": 1841 }, { "epoch": 0.5488472568810756, "grad_norm": 0.4178571403026581, "learning_rate": 5.0285975924983546e-06, "loss": 0.746, "step": 1842 }, { "epoch": 0.54914521956125, "grad_norm": 0.4276307225227356, "learning_rate": 5.023398072398249e-06, "loss": 0.7679, "step": 1843 }, { "epoch": 0.5494431822414243, "grad_norm": 0.4141428470611572, "learning_rate": 5.0181985269947754e-06, "loss": 0.7667, "step": 1844 }, { "epoch": 0.5497411449215985, "grad_norm": 0.41870513558387756, "learning_rate": 5.012998961910876e-06, "loss": 0.7816, "step": 1845 }, { "epoch": 0.5500391076017729, "grad_norm": 0.41629573702812195, "learning_rate": 5.007799382769516e-06, "loss": 0.7634, "step": 1846 }, { "epoch": 0.5503370702819472, "grad_norm": 0.42141273617744446, "learning_rate": 5.002599795193671e-06, "loss": 0.7559, "step": 1847 }, { "epoch": 0.5506350329621215, "grad_norm": 0.4244276285171509, "learning_rate": 4.9974002048063314e-06, "loss": 0.762, "step": 1848 }, { "epoch": 0.5509329956422958, "grad_norm": 0.40807273983955383, "learning_rate": 4.9922006172304855e-06, "loss": 0.7242, "step": 1849 }, { "epoch": 0.5512309583224702, "grad_norm": 0.41207119822502136, "learning_rate": 4.987001038089124e-06, "loss": 0.7411, "step": 1850 }, { "epoch": 0.5515289210026444, "grad_norm": 0.42448246479034424, "learning_rate": 4.981801473005226e-06, "loss": 0.7246, "step": 1851 }, { "epoch": 0.5518268836828187, "grad_norm": 0.4166666269302368, "learning_rate": 4.976601927601752e-06, "loss": 0.706, "step": 1852 }, { "epoch": 0.5521248463629931, "grad_norm": 0.42458873987197876, "learning_rate": 4.971402407501649e-06, "loss": 0.756, "step": 1853 }, { "epoch": 0.5524228090431673, "grad_norm": 0.40440189838409424, "learning_rate": 4.966202918327826e-06, "loss": 0.7532, "step": 1854 }, { "epoch": 0.5527207717233417, "grad_norm": 0.4150710701942444, "learning_rate": 4.961003465703168e-06, "loss": 0.74, "step": 1855 }, { "epoch": 0.553018734403516, "grad_norm": 0.42995816469192505, "learning_rate": 4.955804055250519e-06, "loss": 0.7332, "step": 1856 }, { "epoch": 0.5533166970836902, "grad_norm": 0.41072604060173035, "learning_rate": 4.9506046925926725e-06, "loss": 0.7239, "step": 1857 }, { "epoch": 0.5536146597638646, "grad_norm": 0.40858787298202515, "learning_rate": 4.945405383352372e-06, "loss": 0.7475, "step": 1858 }, { "epoch": 0.5539126224440389, "grad_norm": 0.39616289734840393, "learning_rate": 4.94020613315231e-06, "loss": 0.7375, "step": 1859 }, { "epoch": 0.5542105851242132, "grad_norm": 0.4078272879123688, "learning_rate": 4.935006947615103e-06, "loss": 0.7545, "step": 1860 }, { "epoch": 0.5545085478043875, "grad_norm": 0.4218798279762268, "learning_rate": 4.929807832363308e-06, "loss": 0.7587, "step": 1861 }, { "epoch": 0.5548065104845618, "grad_norm": 0.4236418902873993, "learning_rate": 4.9246087930194016e-06, "loss": 0.7854, "step": 1862 }, { "epoch": 0.5551044731647361, "grad_norm": 0.42399102449417114, "learning_rate": 4.919409835205778e-06, "loss": 0.709, "step": 1863 }, { "epoch": 0.5554024358449104, "grad_norm": 0.43494337797164917, "learning_rate": 4.914210964544747e-06, "loss": 0.7599, "step": 1864 }, { "epoch": 0.5557003985250847, "grad_norm": 0.431159645318985, "learning_rate": 4.9090121866585155e-06, "loss": 0.7465, "step": 1865 }, { "epoch": 0.555998361205259, "grad_norm": 0.4185139536857605, "learning_rate": 4.9038135071692e-06, "loss": 0.7772, "step": 1866 }, { "epoch": 0.5562963238854334, "grad_norm": 0.41682150959968567, "learning_rate": 4.898614931698808e-06, "loss": 0.727, "step": 1867 }, { "epoch": 0.5565942865656076, "grad_norm": 0.40183204412460327, "learning_rate": 4.893416465869229e-06, "loss": 0.7562, "step": 1868 }, { "epoch": 0.556892249245782, "grad_norm": 0.41225665807724, "learning_rate": 4.888218115302238e-06, "loss": 0.7261, "step": 1869 }, { "epoch": 0.5571902119259563, "grad_norm": 0.4158567488193512, "learning_rate": 4.883019885619491e-06, "loss": 0.7621, "step": 1870 }, { "epoch": 0.5574881746061305, "grad_norm": 0.4105948805809021, "learning_rate": 4.8778217824425e-06, "loss": 0.7156, "step": 1871 }, { "epoch": 0.5577861372863049, "grad_norm": 0.4392630159854889, "learning_rate": 4.872623811392652e-06, "loss": 0.7595, "step": 1872 }, { "epoch": 0.5580840999664792, "grad_norm": 0.4194263517856598, "learning_rate": 4.867425978091185e-06, "loss": 0.7488, "step": 1873 }, { "epoch": 0.5583820626466535, "grad_norm": 0.4097282588481903, "learning_rate": 4.862228288159191e-06, "loss": 0.7466, "step": 1874 }, { "epoch": 0.5586800253268278, "grad_norm": 0.42875537276268005, "learning_rate": 4.857030747217606e-06, "loss": 0.8227, "step": 1875 }, { "epoch": 0.5589779880070022, "grad_norm": 0.4271450638771057, "learning_rate": 4.8518333608872015e-06, "loss": 0.7602, "step": 1876 }, { "epoch": 0.5592759506871764, "grad_norm": 0.4113244116306305, "learning_rate": 4.846636134788589e-06, "loss": 0.7145, "step": 1877 }, { "epoch": 0.5595739133673507, "grad_norm": 0.40590837597846985, "learning_rate": 4.841439074542202e-06, "loss": 0.712, "step": 1878 }, { "epoch": 0.5598718760475251, "grad_norm": 0.39654046297073364, "learning_rate": 4.836242185768293e-06, "loss": 0.716, "step": 1879 }, { "epoch": 0.5601698387276993, "grad_norm": 0.41102418303489685, "learning_rate": 4.831045474086932e-06, "loss": 0.7426, "step": 1880 }, { "epoch": 0.5604678014078737, "grad_norm": 0.4282558560371399, "learning_rate": 4.8258489451180014e-06, "loss": 0.7371, "step": 1881 }, { "epoch": 0.560765764088048, "grad_norm": 0.3982420563697815, "learning_rate": 4.820652604481175e-06, "loss": 0.71, "step": 1882 }, { "epoch": 0.5610637267682222, "grad_norm": 0.4026240408420563, "learning_rate": 4.815456457795933e-06, "loss": 0.7056, "step": 1883 }, { "epoch": 0.5613616894483966, "grad_norm": 0.4201667010784149, "learning_rate": 4.810260510681541e-06, "loss": 0.7408, "step": 1884 }, { "epoch": 0.5616596521285709, "grad_norm": 0.42020440101623535, "learning_rate": 4.805064768757051e-06, "loss": 0.7764, "step": 1885 }, { "epoch": 0.5619576148087452, "grad_norm": 0.4300926625728607, "learning_rate": 4.799869237641292e-06, "loss": 0.7987, "step": 1886 }, { "epoch": 0.5622555774889195, "grad_norm": 0.41482165455818176, "learning_rate": 4.794673922952863e-06, "loss": 0.7313, "step": 1887 }, { "epoch": 0.5625535401690939, "grad_norm": 0.40758150815963745, "learning_rate": 4.789478830310134e-06, "loss": 0.7154, "step": 1888 }, { "epoch": 0.5628515028492681, "grad_norm": 0.40623360872268677, "learning_rate": 4.784283965331232e-06, "loss": 0.7059, "step": 1889 }, { "epoch": 0.5631494655294425, "grad_norm": 0.43937063217163086, "learning_rate": 4.779089333634036e-06, "loss": 0.7701, "step": 1890 }, { "epoch": 0.5634474282096168, "grad_norm": 0.41792380809783936, "learning_rate": 4.773894940836174e-06, "loss": 0.7407, "step": 1891 }, { "epoch": 0.563745390889791, "grad_norm": 0.4050884544849396, "learning_rate": 4.76870079255502e-06, "loss": 0.7353, "step": 1892 }, { "epoch": 0.5640433535699654, "grad_norm": 0.4137275815010071, "learning_rate": 4.763506894407675e-06, "loss": 0.7236, "step": 1893 }, { "epoch": 0.5643413162501397, "grad_norm": 0.4158051013946533, "learning_rate": 4.7583132520109784e-06, "loss": 0.7762, "step": 1894 }, { "epoch": 0.564639278930314, "grad_norm": 0.4179161787033081, "learning_rate": 4.753119870981486e-06, "loss": 0.707, "step": 1895 }, { "epoch": 0.5649372416104883, "grad_norm": 0.40873193740844727, "learning_rate": 4.747926756935474e-06, "loss": 0.721, "step": 1896 }, { "epoch": 0.5652352042906625, "grad_norm": 0.4177382290363312, "learning_rate": 4.742733915488932e-06, "loss": 0.7517, "step": 1897 }, { "epoch": 0.5655331669708369, "grad_norm": 0.4085400700569153, "learning_rate": 4.737541352257549e-06, "loss": 0.6908, "step": 1898 }, { "epoch": 0.5658311296510112, "grad_norm": 0.4144119620323181, "learning_rate": 4.732349072856719e-06, "loss": 0.7344, "step": 1899 }, { "epoch": 0.5661290923311855, "grad_norm": 0.4040220379829407, "learning_rate": 4.727157082901527e-06, "loss": 0.7104, "step": 1900 }, { "epoch": 0.5664270550113598, "grad_norm": 0.41264522075653076, "learning_rate": 4.721965388006743e-06, "loss": 0.7355, "step": 1901 }, { "epoch": 0.5667250176915342, "grad_norm": 0.45604878664016724, "learning_rate": 4.716773993786822e-06, "loss": 0.7708, "step": 1902 }, { "epoch": 0.5670229803717084, "grad_norm": 0.4117855131626129, "learning_rate": 4.711582905855889e-06, "loss": 0.7366, "step": 1903 }, { "epoch": 0.5673209430518827, "grad_norm": 0.41091251373291016, "learning_rate": 4.706392129827739e-06, "loss": 0.7461, "step": 1904 }, { "epoch": 0.5676189057320571, "grad_norm": 0.4039057195186615, "learning_rate": 4.7012016713158355e-06, "loss": 0.6907, "step": 1905 }, { "epoch": 0.5679168684122313, "grad_norm": 0.4205576777458191, "learning_rate": 4.69601153593329e-06, "loss": 0.7653, "step": 1906 }, { "epoch": 0.5682148310924057, "grad_norm": 0.44450855255126953, "learning_rate": 4.6908217292928705e-06, "loss": 0.8305, "step": 1907 }, { "epoch": 0.56851279377258, "grad_norm": 0.4235401451587677, "learning_rate": 4.685632257006988e-06, "loss": 0.7603, "step": 1908 }, { "epoch": 0.5688107564527543, "grad_norm": 0.4110579192638397, "learning_rate": 4.680443124687688e-06, "loss": 0.707, "step": 1909 }, { "epoch": 0.5691087191329286, "grad_norm": 0.4077126085758209, "learning_rate": 4.675254337946656e-06, "loss": 0.7189, "step": 1910 }, { "epoch": 0.5694066818131029, "grad_norm": 0.40424802899360657, "learning_rate": 4.670065902395199e-06, "loss": 0.7206, "step": 1911 }, { "epoch": 0.5697046444932772, "grad_norm": 0.42563310265541077, "learning_rate": 4.664877823644242e-06, "loss": 0.7752, "step": 1912 }, { "epoch": 0.5700026071734515, "grad_norm": 0.4315684735774994, "learning_rate": 4.659690107304331e-06, "loss": 0.7838, "step": 1913 }, { "epoch": 0.5703005698536259, "grad_norm": 0.41235530376434326, "learning_rate": 4.654502758985611e-06, "loss": 0.7051, "step": 1914 }, { "epoch": 0.5705985325338001, "grad_norm": 0.4013887345790863, "learning_rate": 4.649315784297837e-06, "loss": 0.7212, "step": 1915 }, { "epoch": 0.5708964952139745, "grad_norm": 0.43051424622535706, "learning_rate": 4.644129188850359e-06, "loss": 0.7512, "step": 1916 }, { "epoch": 0.5711944578941488, "grad_norm": 0.4274933636188507, "learning_rate": 4.638942978252111e-06, "loss": 0.7321, "step": 1917 }, { "epoch": 0.571492420574323, "grad_norm": 0.41361722350120544, "learning_rate": 4.633757158111617e-06, "loss": 0.7293, "step": 1918 }, { "epoch": 0.5717903832544974, "grad_norm": 0.40967097878456116, "learning_rate": 4.6285717340369774e-06, "loss": 0.7139, "step": 1919 }, { "epoch": 0.5720883459346717, "grad_norm": 0.441385954618454, "learning_rate": 4.6233867116358586e-06, "loss": 0.7658, "step": 1920 }, { "epoch": 0.572386308614846, "grad_norm": 0.4277268946170807, "learning_rate": 4.618202096515505e-06, "loss": 0.7178, "step": 1921 }, { "epoch": 0.5726842712950203, "grad_norm": 0.41798168420791626, "learning_rate": 4.6130178942827045e-06, "loss": 0.7251, "step": 1922 }, { "epoch": 0.5729822339751947, "grad_norm": 0.43137815594673157, "learning_rate": 4.607834110543812e-06, "loss": 0.7746, "step": 1923 }, { "epoch": 0.5732801966553689, "grad_norm": 0.40609511733055115, "learning_rate": 4.602650750904724e-06, "loss": 0.7358, "step": 1924 }, { "epoch": 0.5735781593355432, "grad_norm": 0.41860562562942505, "learning_rate": 4.597467820970879e-06, "loss": 0.7486, "step": 1925 }, { "epoch": 0.5738761220157176, "grad_norm": 0.41176602244377136, "learning_rate": 4.5922853263472475e-06, "loss": 0.7587, "step": 1926 }, { "epoch": 0.5741740846958918, "grad_norm": 0.4014931619167328, "learning_rate": 4.587103272638339e-06, "loss": 0.7296, "step": 1927 }, { "epoch": 0.5744720473760662, "grad_norm": 0.4227864444255829, "learning_rate": 4.5819216654481756e-06, "loss": 0.7648, "step": 1928 }, { "epoch": 0.5747700100562405, "grad_norm": 0.39504462480545044, "learning_rate": 4.576740510380301e-06, "loss": 0.6974, "step": 1929 }, { "epoch": 0.5750679727364147, "grad_norm": 0.4270729720592499, "learning_rate": 4.571559813037771e-06, "loss": 0.7364, "step": 1930 }, { "epoch": 0.5753659354165891, "grad_norm": 0.42046594619750977, "learning_rate": 4.566379579023143e-06, "loss": 0.7719, "step": 1931 }, { "epoch": 0.5756638980967633, "grad_norm": 0.4060092270374298, "learning_rate": 4.56119981393848e-06, "loss": 0.7261, "step": 1932 }, { "epoch": 0.5759618607769377, "grad_norm": 0.41615304350852966, "learning_rate": 4.556020523385326e-06, "loss": 0.7051, "step": 1933 }, { "epoch": 0.576259823457112, "grad_norm": 0.41677922010421753, "learning_rate": 4.550841712964725e-06, "loss": 0.7293, "step": 1934 }, { "epoch": 0.5765577861372863, "grad_norm": 0.4129596948623657, "learning_rate": 4.545663388277196e-06, "loss": 0.7013, "step": 1935 }, { "epoch": 0.5768557488174606, "grad_norm": 0.4105396866798401, "learning_rate": 4.540485554922729e-06, "loss": 0.7264, "step": 1936 }, { "epoch": 0.577153711497635, "grad_norm": 0.39716649055480957, "learning_rate": 4.535308218500787e-06, "loss": 0.7288, "step": 1937 }, { "epoch": 0.5774516741778092, "grad_norm": 0.41149744391441345, "learning_rate": 4.530131384610299e-06, "loss": 0.7837, "step": 1938 }, { "epoch": 0.5777496368579835, "grad_norm": 0.4145060181617737, "learning_rate": 4.524955058849641e-06, "loss": 0.7403, "step": 1939 }, { "epoch": 0.5780475995381579, "grad_norm": 0.41767367720603943, "learning_rate": 4.51977924681665e-06, "loss": 0.7836, "step": 1940 }, { "epoch": 0.5783455622183321, "grad_norm": 0.40490424633026123, "learning_rate": 4.514603954108597e-06, "loss": 0.7535, "step": 1941 }, { "epoch": 0.5786435248985065, "grad_norm": 0.41554492712020874, "learning_rate": 4.5094291863222e-06, "loss": 0.7461, "step": 1942 }, { "epoch": 0.5789414875786808, "grad_norm": 0.42319101095199585, "learning_rate": 4.504254949053608e-06, "loss": 0.7393, "step": 1943 }, { "epoch": 0.579239450258855, "grad_norm": 0.4117283225059509, "learning_rate": 4.4990812478983895e-06, "loss": 0.7383, "step": 1944 }, { "epoch": 0.5795374129390294, "grad_norm": 0.41115203499794006, "learning_rate": 4.493908088451541e-06, "loss": 0.7583, "step": 1945 }, { "epoch": 0.5798353756192037, "grad_norm": 0.41332003474235535, "learning_rate": 4.488735476307472e-06, "loss": 0.7868, "step": 1946 }, { "epoch": 0.580133338299378, "grad_norm": 0.42866283655166626, "learning_rate": 4.483563417059995e-06, "loss": 0.7707, "step": 1947 }, { "epoch": 0.5804313009795523, "grad_norm": 0.4217878580093384, "learning_rate": 4.478391916302327e-06, "loss": 0.751, "step": 1948 }, { "epoch": 0.5807292636597267, "grad_norm": 0.4109710454940796, "learning_rate": 4.473220979627088e-06, "loss": 0.7387, "step": 1949 }, { "epoch": 0.5810272263399009, "grad_norm": 0.41321882605552673, "learning_rate": 4.468050612626277e-06, "loss": 0.7451, "step": 1950 }, { "epoch": 0.5813251890200752, "grad_norm": 0.4039456248283386, "learning_rate": 4.462880820891284e-06, "loss": 0.7271, "step": 1951 }, { "epoch": 0.5816231517002496, "grad_norm": 0.41406941413879395, "learning_rate": 4.457711610012873e-06, "loss": 0.755, "step": 1952 }, { "epoch": 0.5819211143804238, "grad_norm": 0.42365142703056335, "learning_rate": 4.452542985581184e-06, "loss": 0.7512, "step": 1953 }, { "epoch": 0.5822190770605982, "grad_norm": 0.43334272503852844, "learning_rate": 4.44737495318572e-06, "loss": 0.7667, "step": 1954 }, { "epoch": 0.5825170397407725, "grad_norm": 0.41110551357269287, "learning_rate": 4.442207518415341e-06, "loss": 0.7271, "step": 1955 }, { "epoch": 0.5828150024209467, "grad_norm": 0.4264329969882965, "learning_rate": 4.4370406868582684e-06, "loss": 0.7621, "step": 1956 }, { "epoch": 0.5831129651011211, "grad_norm": 0.4122719168663025, "learning_rate": 4.431874464102065e-06, "loss": 0.7343, "step": 1957 }, { "epoch": 0.5834109277812954, "grad_norm": 0.4079119861125946, "learning_rate": 4.426708855733637e-06, "loss": 0.7283, "step": 1958 }, { "epoch": 0.5837088904614697, "grad_norm": 0.422280490398407, "learning_rate": 4.421543867339227e-06, "loss": 0.7529, "step": 1959 }, { "epoch": 0.584006853141644, "grad_norm": 0.4280681312084198, "learning_rate": 4.4163795045044055e-06, "loss": 0.7469, "step": 1960 }, { "epoch": 0.5843048158218184, "grad_norm": 0.4085499048233032, "learning_rate": 4.411215772814066e-06, "loss": 0.7161, "step": 1961 }, { "epoch": 0.5846027785019926, "grad_norm": 0.43589451909065247, "learning_rate": 4.4060526778524245e-06, "loss": 0.779, "step": 1962 }, { "epoch": 0.584900741182167, "grad_norm": 0.42634084820747375, "learning_rate": 4.400890225203001e-06, "loss": 0.7485, "step": 1963 }, { "epoch": 0.5851987038623413, "grad_norm": 0.41059428453445435, "learning_rate": 4.395728420448627e-06, "loss": 0.755, "step": 1964 }, { "epoch": 0.5854966665425155, "grad_norm": 0.40567854046821594, "learning_rate": 4.3905672691714315e-06, "loss": 0.7407, "step": 1965 }, { "epoch": 0.5857946292226899, "grad_norm": 0.4206496477127075, "learning_rate": 4.385406776952833e-06, "loss": 0.7505, "step": 1966 }, { "epoch": 0.5860925919028641, "grad_norm": 0.4188379943370819, "learning_rate": 4.380246949373543e-06, "loss": 0.7567, "step": 1967 }, { "epoch": 0.5863905545830385, "grad_norm": 0.4289083182811737, "learning_rate": 4.375087792013553e-06, "loss": 0.7583, "step": 1968 }, { "epoch": 0.5866885172632128, "grad_norm": 0.41483402252197266, "learning_rate": 4.369929310452126e-06, "loss": 0.766, "step": 1969 }, { "epoch": 0.586986479943387, "grad_norm": 0.4211726486682892, "learning_rate": 4.364771510267798e-06, "loss": 0.7582, "step": 1970 }, { "epoch": 0.5872844426235614, "grad_norm": 0.4256436824798584, "learning_rate": 4.3596143970383665e-06, "loss": 0.7636, "step": 1971 }, { "epoch": 0.5875824053037357, "grad_norm": 0.4167037606239319, "learning_rate": 4.3544579763408855e-06, "loss": 0.7531, "step": 1972 }, { "epoch": 0.58788036798391, "grad_norm": 0.40595850348472595, "learning_rate": 4.3493022537516634e-06, "loss": 0.6923, "step": 1973 }, { "epoch": 0.5881783306640843, "grad_norm": 0.4079906642436981, "learning_rate": 4.344147234846249e-06, "loss": 0.7281, "step": 1974 }, { "epoch": 0.5884762933442587, "grad_norm": 0.41655340790748596, "learning_rate": 4.338992925199433e-06, "loss": 0.7389, "step": 1975 }, { "epoch": 0.5887742560244329, "grad_norm": 0.42848271131515503, "learning_rate": 4.333839330385241e-06, "loss": 0.7105, "step": 1976 }, { "epoch": 0.5890722187046072, "grad_norm": 0.4202434718608856, "learning_rate": 4.328686455976917e-06, "loss": 0.7297, "step": 1977 }, { "epoch": 0.5893701813847816, "grad_norm": 0.40047043561935425, "learning_rate": 4.323534307546938e-06, "loss": 0.7358, "step": 1978 }, { "epoch": 0.5896681440649558, "grad_norm": 0.4223696291446686, "learning_rate": 4.318382890666988e-06, "loss": 0.7616, "step": 1979 }, { "epoch": 0.5899661067451302, "grad_norm": 0.414849191904068, "learning_rate": 4.313232210907959e-06, "loss": 0.7623, "step": 1980 }, { "epoch": 0.5902640694253045, "grad_norm": 0.4242858290672302, "learning_rate": 4.308082273839953e-06, "loss": 0.7302, "step": 1981 }, { "epoch": 0.5905620321054788, "grad_norm": 0.41924822330474854, "learning_rate": 4.302933085032262e-06, "loss": 0.7421, "step": 1982 }, { "epoch": 0.5908599947856531, "grad_norm": 0.42674970626831055, "learning_rate": 4.29778465005337e-06, "loss": 0.7565, "step": 1983 }, { "epoch": 0.5911579574658274, "grad_norm": 0.4174593687057495, "learning_rate": 4.29263697447095e-06, "loss": 0.7441, "step": 1984 }, { "epoch": 0.5914559201460017, "grad_norm": 0.40316957235336304, "learning_rate": 4.287490063851848e-06, "loss": 0.6995, "step": 1985 }, { "epoch": 0.591753882826176, "grad_norm": 0.40519094467163086, "learning_rate": 4.282343923762088e-06, "loss": 0.7523, "step": 1986 }, { "epoch": 0.5920518455063504, "grad_norm": 0.4047006368637085, "learning_rate": 4.277198559766858e-06, "loss": 0.7322, "step": 1987 }, { "epoch": 0.5923498081865246, "grad_norm": 0.4245583713054657, "learning_rate": 4.272053977430503e-06, "loss": 0.7703, "step": 1988 }, { "epoch": 0.592647770866699, "grad_norm": 0.4122294485569, "learning_rate": 4.266910182316533e-06, "loss": 0.7066, "step": 1989 }, { "epoch": 0.5929457335468733, "grad_norm": 0.4069257080554962, "learning_rate": 4.261767179987595e-06, "loss": 0.7085, "step": 1990 }, { "epoch": 0.5932436962270475, "grad_norm": 0.41777312755584717, "learning_rate": 4.256624976005485e-06, "loss": 0.7569, "step": 1991 }, { "epoch": 0.5935416589072219, "grad_norm": 0.41323649883270264, "learning_rate": 4.251483575931139e-06, "loss": 0.7342, "step": 1992 }, { "epoch": 0.5938396215873962, "grad_norm": 0.42368587851524353, "learning_rate": 4.246342985324614e-06, "loss": 0.7261, "step": 1993 }, { "epoch": 0.5941375842675705, "grad_norm": 0.42891785502433777, "learning_rate": 4.241203209745098e-06, "loss": 0.7437, "step": 1994 }, { "epoch": 0.5944355469477448, "grad_norm": 0.4087558388710022, "learning_rate": 4.236064254750899e-06, "loss": 0.7473, "step": 1995 }, { "epoch": 0.5947335096279192, "grad_norm": 0.41087907552719116, "learning_rate": 4.230926125899432e-06, "loss": 0.7118, "step": 1996 }, { "epoch": 0.5950314723080934, "grad_norm": 0.4258415400981903, "learning_rate": 4.225788828747224e-06, "loss": 0.7338, "step": 1997 }, { "epoch": 0.5953294349882677, "grad_norm": 0.42956599593162537, "learning_rate": 4.2206523688499e-06, "loss": 0.7546, "step": 1998 }, { "epoch": 0.5956273976684421, "grad_norm": 0.4405895173549652, "learning_rate": 4.215516751762177e-06, "loss": 0.8171, "step": 1999 }, { "epoch": 0.5959253603486163, "grad_norm": 0.4291638433933258, "learning_rate": 4.210381983037869e-06, "loss": 0.7791, "step": 2000 } ], "logging_steps": 1, "max_steps": 3357, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1083765476491264e+19, "train_batch_size": 10, "trial_name": null, "trial_params": null }