{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8360039013515397, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00027866796711717987, "grad_norm": 0.5877133476749101, "learning_rate": 8.356545961002785e-07, "loss": 1.8941, "step": 1 }, { "epoch": 0.0005573359342343597, "grad_norm": 0.48042275085315894, "learning_rate": 1.671309192200557e-06, "loss": 1.9876, "step": 2 }, { "epoch": 0.0008360039013515396, "grad_norm": 0.502387651889168, "learning_rate": 2.5069637883008354e-06, "loss": 1.9278, "step": 3 }, { "epoch": 0.0011146718684687195, "grad_norm": 0.5073506635516672, "learning_rate": 3.342618384401114e-06, "loss": 1.9378, "step": 4 }, { "epoch": 0.0013933398355858995, "grad_norm": 0.5322417595474097, "learning_rate": 4.178272980501392e-06, "loss": 1.9444, "step": 5 }, { "epoch": 0.0016720078027030792, "grad_norm": 0.5388403569480659, "learning_rate": 5.013927576601671e-06, "loss": 1.907, "step": 6 }, { "epoch": 0.0019506757698202592, "grad_norm": 0.5061026016906961, "learning_rate": 5.84958217270195e-06, "loss": 1.8816, "step": 7 }, { "epoch": 0.002229343736937439, "grad_norm": 0.534913567853892, "learning_rate": 6.685236768802228e-06, "loss": 1.9394, "step": 8 }, { "epoch": 0.002508011704054619, "grad_norm": 0.5318205122465077, "learning_rate": 7.520891364902506e-06, "loss": 1.8753, "step": 9 }, { "epoch": 0.002786679671171799, "grad_norm": 0.5039549743987566, "learning_rate": 8.356545961002784e-06, "loss": 1.9061, "step": 10 }, { "epoch": 0.0030653476382889785, "grad_norm": 0.47974635562772167, "learning_rate": 9.192200557103063e-06, "loss": 1.8704, "step": 11 }, { "epoch": 0.0033440156054061585, "grad_norm": 0.543208801061286, "learning_rate": 1.0027855153203342e-05, "loss": 1.8178, "step": 12 }, { "epoch": 0.0036226835725233384, "grad_norm": 0.4883512998069601, "learning_rate": 1.086350974930362e-05, "loss": 1.8709, "step": 13 }, { "epoch": 0.0039013515396405184, "grad_norm": 0.5005884076949138, "learning_rate": 1.16991643454039e-05, "loss": 1.8348, "step": 14 }, { "epoch": 0.004180019506757698, "grad_norm": 0.5067979441957903, "learning_rate": 1.2534818941504177e-05, "loss": 1.9196, "step": 15 }, { "epoch": 0.004458687473874878, "grad_norm": 0.5004015967700227, "learning_rate": 1.3370473537604456e-05, "loss": 1.8998, "step": 16 }, { "epoch": 0.004737355440992058, "grad_norm": 0.5424934514850706, "learning_rate": 1.4206128133704734e-05, "loss": 1.9342, "step": 17 }, { "epoch": 0.005016023408109238, "grad_norm": 0.5011473644219502, "learning_rate": 1.5041782729805012e-05, "loss": 1.9062, "step": 18 }, { "epoch": 0.0052946913752264175, "grad_norm": 0.4593432636617286, "learning_rate": 1.587743732590529e-05, "loss": 1.8507, "step": 19 }, { "epoch": 0.005573359342343598, "grad_norm": 0.47706719354796917, "learning_rate": 1.6713091922005568e-05, "loss": 1.8762, "step": 20 }, { "epoch": 0.005852027309460777, "grad_norm": 0.5892800627209591, "learning_rate": 1.754874651810585e-05, "loss": 1.8923, "step": 21 }, { "epoch": 0.006130695276577957, "grad_norm": 0.5796503870686479, "learning_rate": 1.8384401114206126e-05, "loss": 1.8369, "step": 22 }, { "epoch": 0.006409363243695137, "grad_norm": 0.5146478085922461, "learning_rate": 1.9220055710306406e-05, "loss": 1.8706, "step": 23 }, { "epoch": 0.006688031210812317, "grad_norm": 0.5259182839728231, "learning_rate": 2.0055710306406683e-05, "loss": 1.8872, "step": 24 }, { "epoch": 0.006966699177929497, "grad_norm": 0.49382471982864695, "learning_rate": 2.0891364902506964e-05, "loss": 1.8581, "step": 25 }, { "epoch": 0.007245367145046677, "grad_norm": 0.44986915107685793, "learning_rate": 2.172701949860724e-05, "loss": 1.9162, "step": 26 }, { "epoch": 0.007524035112163856, "grad_norm": 0.5109771583906041, "learning_rate": 2.256267409470752e-05, "loss": 1.8335, "step": 27 }, { "epoch": 0.007802703079281037, "grad_norm": 0.4398725925789665, "learning_rate": 2.33983286908078e-05, "loss": 1.8472, "step": 28 }, { "epoch": 0.008081371046398217, "grad_norm": 0.4411346912656378, "learning_rate": 2.4233983286908076e-05, "loss": 1.8879, "step": 29 }, { "epoch": 0.008360039013515397, "grad_norm": 0.44529208077045485, "learning_rate": 2.5069637883008353e-05, "loss": 1.7645, "step": 30 }, { "epoch": 0.008638706980632576, "grad_norm": 0.5380333840394416, "learning_rate": 2.5905292479108634e-05, "loss": 1.8526, "step": 31 }, { "epoch": 0.008917374947749756, "grad_norm": 0.391095128581943, "learning_rate": 2.674094707520891e-05, "loss": 1.8126, "step": 32 }, { "epoch": 0.009196042914866935, "grad_norm": 0.3400238075610412, "learning_rate": 2.757660167130919e-05, "loss": 1.7898, "step": 33 }, { "epoch": 0.009474710881984117, "grad_norm": 0.35559630080735066, "learning_rate": 2.841225626740947e-05, "loss": 1.8132, "step": 34 }, { "epoch": 0.009753378849101296, "grad_norm": 0.3547992126025832, "learning_rate": 2.9247910863509746e-05, "loss": 1.7705, "step": 35 }, { "epoch": 0.010032046816218476, "grad_norm": 0.3239004516563582, "learning_rate": 3.0083565459610023e-05, "loss": 1.8292, "step": 36 }, { "epoch": 0.010310714783335655, "grad_norm": 0.3812171571687621, "learning_rate": 3.09192200557103e-05, "loss": 1.7844, "step": 37 }, { "epoch": 0.010589382750452835, "grad_norm": 0.28283563893092023, "learning_rate": 3.175487465181058e-05, "loss": 1.8311, "step": 38 }, { "epoch": 0.010868050717570016, "grad_norm": 0.2872878549816208, "learning_rate": 3.259052924791086e-05, "loss": 1.7855, "step": 39 }, { "epoch": 0.011146718684687196, "grad_norm": 0.2782792872240412, "learning_rate": 3.3426183844011136e-05, "loss": 1.7682, "step": 40 }, { "epoch": 0.011425386651804375, "grad_norm": 0.40131944071113085, "learning_rate": 3.4261838440111416e-05, "loss": 1.7621, "step": 41 }, { "epoch": 0.011704054618921555, "grad_norm": 0.27750837021751823, "learning_rate": 3.50974930362117e-05, "loss": 1.7759, "step": 42 }, { "epoch": 0.011982722586038734, "grad_norm": 0.2848056280847543, "learning_rate": 3.593314763231197e-05, "loss": 1.8282, "step": 43 }, { "epoch": 0.012261390553155914, "grad_norm": 0.2684622671077532, "learning_rate": 3.676880222841225e-05, "loss": 1.7378, "step": 44 }, { "epoch": 0.012540058520273095, "grad_norm": 0.2622125700914813, "learning_rate": 3.760445682451253e-05, "loss": 1.823, "step": 45 }, { "epoch": 0.012818726487390275, "grad_norm": 0.2931923072279208, "learning_rate": 3.844011142061281e-05, "loss": 1.7348, "step": 46 }, { "epoch": 0.013097394454507454, "grad_norm": 0.2824089016455819, "learning_rate": 3.9275766016713086e-05, "loss": 1.7748, "step": 47 }, { "epoch": 0.013376062421624634, "grad_norm": 0.27219447277449416, "learning_rate": 4.011142061281337e-05, "loss": 1.781, "step": 48 }, { "epoch": 0.013654730388741813, "grad_norm": 0.24957910459340735, "learning_rate": 4.094707520891364e-05, "loss": 1.7374, "step": 49 }, { "epoch": 0.013933398355858995, "grad_norm": 0.28001096263037983, "learning_rate": 4.178272980501393e-05, "loss": 1.7545, "step": 50 }, { "epoch": 0.014212066322976174, "grad_norm": 0.24985575001197666, "learning_rate": 4.26183844011142e-05, "loss": 1.722, "step": 51 }, { "epoch": 0.014490734290093354, "grad_norm": 0.2795134519668688, "learning_rate": 4.345403899721448e-05, "loss": 1.7859, "step": 52 }, { "epoch": 0.014769402257210533, "grad_norm": 0.2562679985626021, "learning_rate": 4.4289693593314756e-05, "loss": 1.7126, "step": 53 }, { "epoch": 0.015048070224327713, "grad_norm": 0.2540170960662595, "learning_rate": 4.512534818941504e-05, "loss": 1.6695, "step": 54 }, { "epoch": 0.015326738191444894, "grad_norm": 0.24912576563381056, "learning_rate": 4.596100278551531e-05, "loss": 1.6716, "step": 55 }, { "epoch": 0.015605406158562074, "grad_norm": 0.24131476668338983, "learning_rate": 4.67966573816156e-05, "loss": 1.7166, "step": 56 }, { "epoch": 0.015884074125679253, "grad_norm": 0.2425290716484195, "learning_rate": 4.763231197771587e-05, "loss": 1.6267, "step": 57 }, { "epoch": 0.016162742092796435, "grad_norm": 0.2145207057547455, "learning_rate": 4.846796657381615e-05, "loss": 1.6766, "step": 58 }, { "epoch": 0.016441410059913612, "grad_norm": 0.22589581875916917, "learning_rate": 4.9303621169916426e-05, "loss": 1.7231, "step": 59 }, { "epoch": 0.016720078027030794, "grad_norm": 0.2171874947937498, "learning_rate": 5.013927576601671e-05, "loss": 1.6811, "step": 60 }, { "epoch": 0.01699874599414797, "grad_norm": 0.22177865460713175, "learning_rate": 5.097493036211698e-05, "loss": 1.6697, "step": 61 }, { "epoch": 0.017277413961265153, "grad_norm": 0.19578567565605623, "learning_rate": 5.181058495821727e-05, "loss": 1.6489, "step": 62 }, { "epoch": 0.017556081928382334, "grad_norm": 0.2196388453627918, "learning_rate": 5.264623955431754e-05, "loss": 1.6507, "step": 63 }, { "epoch": 0.017834749895499512, "grad_norm": 0.21791256018831742, "learning_rate": 5.348189415041782e-05, "loss": 1.6736, "step": 64 }, { "epoch": 0.018113417862616693, "grad_norm": 0.200944911022821, "learning_rate": 5.4317548746518096e-05, "loss": 1.605, "step": 65 }, { "epoch": 0.01839208582973387, "grad_norm": 0.21375065325798911, "learning_rate": 5.515320334261838e-05, "loss": 1.6782, "step": 66 }, { "epoch": 0.018670753796851052, "grad_norm": 0.19482774575296682, "learning_rate": 5.5988857938718664e-05, "loss": 1.6182, "step": 67 }, { "epoch": 0.018949421763968233, "grad_norm": 0.1906900568035442, "learning_rate": 5.682451253481894e-05, "loss": 1.6825, "step": 68 }, { "epoch": 0.01922808973108541, "grad_norm": 0.19336619659644877, "learning_rate": 5.766016713091922e-05, "loss": 1.5939, "step": 69 }, { "epoch": 0.019506757698202593, "grad_norm": 0.17351099203037493, "learning_rate": 5.849582172701949e-05, "loss": 1.6953, "step": 70 }, { "epoch": 0.01978542566531977, "grad_norm": 0.17578900760598187, "learning_rate": 5.933147632311977e-05, "loss": 1.5874, "step": 71 }, { "epoch": 0.02006409363243695, "grad_norm": 0.17028990761975357, "learning_rate": 6.016713091922005e-05, "loss": 1.6755, "step": 72 }, { "epoch": 0.020342761599554133, "grad_norm": 0.17818820803719806, "learning_rate": 6.1002785515320334e-05, "loss": 1.6464, "step": 73 }, { "epoch": 0.02062142956667131, "grad_norm": 0.18447608616655192, "learning_rate": 6.18384401114206e-05, "loss": 1.6545, "step": 74 }, { "epoch": 0.020900097533788492, "grad_norm": 0.18173312769243338, "learning_rate": 6.26740947075209e-05, "loss": 1.539, "step": 75 }, { "epoch": 0.02117876550090567, "grad_norm": 0.1648261859540007, "learning_rate": 6.350974930362116e-05, "loss": 1.5616, "step": 76 }, { "epoch": 0.02145743346802285, "grad_norm": 0.15505636403713774, "learning_rate": 6.434540389972144e-05, "loss": 1.5921, "step": 77 }, { "epoch": 0.021736101435140032, "grad_norm": 0.15106160226437845, "learning_rate": 6.518105849582172e-05, "loss": 1.5056, "step": 78 }, { "epoch": 0.02201476940225721, "grad_norm": 0.1553138477760284, "learning_rate": 6.6016713091922e-05, "loss": 1.5713, "step": 79 }, { "epoch": 0.02229343736937439, "grad_norm": 0.1507497814924371, "learning_rate": 6.685236768802227e-05, "loss": 1.5792, "step": 80 }, { "epoch": 0.02257210533649157, "grad_norm": 0.13243131841695419, "learning_rate": 6.768802228412257e-05, "loss": 1.5935, "step": 81 }, { "epoch": 0.02285077330360875, "grad_norm": 0.13733087988152534, "learning_rate": 6.852367688022283e-05, "loss": 1.5191, "step": 82 }, { "epoch": 0.02312944127072593, "grad_norm": 0.13936360067724934, "learning_rate": 6.935933147632311e-05, "loss": 1.5165, "step": 83 }, { "epoch": 0.02340810923784311, "grad_norm": 0.13057023206980967, "learning_rate": 7.01949860724234e-05, "loss": 1.5651, "step": 84 }, { "epoch": 0.02368677720496029, "grad_norm": 0.13076877730555964, "learning_rate": 7.103064066852367e-05, "loss": 1.5606, "step": 85 }, { "epoch": 0.02396544517207747, "grad_norm": 0.12579189843526994, "learning_rate": 7.186629526462394e-05, "loss": 1.4906, "step": 86 }, { "epoch": 0.02424411313919465, "grad_norm": 0.13373671095983983, "learning_rate": 7.270194986072424e-05, "loss": 1.5505, "step": 87 }, { "epoch": 0.024522781106311828, "grad_norm": 0.1401989633497508, "learning_rate": 7.35376044568245e-05, "loss": 1.5047, "step": 88 }, { "epoch": 0.02480144907342901, "grad_norm": 0.12856257656342457, "learning_rate": 7.437325905292478e-05, "loss": 1.56, "step": 89 }, { "epoch": 0.02508011704054619, "grad_norm": 0.1100672485883676, "learning_rate": 7.520891364902506e-05, "loss": 1.5081, "step": 90 }, { "epoch": 0.025358785007663368, "grad_norm": 0.11540836216275797, "learning_rate": 7.604456824512534e-05, "loss": 1.5322, "step": 91 }, { "epoch": 0.02563745297478055, "grad_norm": 0.11112745861129263, "learning_rate": 7.688022284122562e-05, "loss": 1.4834, "step": 92 }, { "epoch": 0.025916120941897727, "grad_norm": 0.10536329673977879, "learning_rate": 7.771587743732589e-05, "loss": 1.5392, "step": 93 }, { "epoch": 0.02619478890901491, "grad_norm": 0.09776049461691359, "learning_rate": 7.855153203342617e-05, "loss": 1.5147, "step": 94 }, { "epoch": 0.02647345687613209, "grad_norm": 0.09400903495586489, "learning_rate": 7.938718662952645e-05, "loss": 1.5486, "step": 95 }, { "epoch": 0.026752124843249268, "grad_norm": 0.09528223151558249, "learning_rate": 8.022284122562673e-05, "loss": 1.5168, "step": 96 }, { "epoch": 0.02703079281036645, "grad_norm": 0.09595563142888357, "learning_rate": 8.1058495821727e-05, "loss": 1.5274, "step": 97 }, { "epoch": 0.027309460777483627, "grad_norm": 0.09500669164988852, "learning_rate": 8.189415041782728e-05, "loss": 1.4967, "step": 98 }, { "epoch": 0.027588128744600808, "grad_norm": 0.09441021693281004, "learning_rate": 8.272980501392758e-05, "loss": 1.5019, "step": 99 }, { "epoch": 0.02786679671171799, "grad_norm": 0.08841842781238164, "learning_rate": 8.356545961002786e-05, "loss": 1.5054, "step": 100 }, { "epoch": 0.028145464678835167, "grad_norm": 0.08473379478995909, "learning_rate": 8.440111420612814e-05, "loss": 1.5141, "step": 101 }, { "epoch": 0.02842413264595235, "grad_norm": 0.07901726483512692, "learning_rate": 8.52367688022284e-05, "loss": 1.4994, "step": 102 }, { "epoch": 0.028702800613069526, "grad_norm": 0.08166277955304611, "learning_rate": 8.607242339832868e-05, "loss": 1.5179, "step": 103 }, { "epoch": 0.028981468580186708, "grad_norm": 0.07281424787729403, "learning_rate": 8.690807799442896e-05, "loss": 1.4746, "step": 104 }, { "epoch": 0.02926013654730389, "grad_norm": 0.07635996248357141, "learning_rate": 8.774373259052925e-05, "loss": 1.4128, "step": 105 }, { "epoch": 0.029538804514421067, "grad_norm": 0.07489632322341418, "learning_rate": 8.857938718662951e-05, "loss": 1.5046, "step": 106 }, { "epoch": 0.029817472481538248, "grad_norm": 0.07051015211515885, "learning_rate": 8.941504178272979e-05, "loss": 1.4678, "step": 107 }, { "epoch": 0.030096140448655426, "grad_norm": 0.0659442256083889, "learning_rate": 9.025069637883007e-05, "loss": 1.5349, "step": 108 }, { "epoch": 0.030374808415772607, "grad_norm": 0.0665309198706812, "learning_rate": 9.108635097493037e-05, "loss": 1.3719, "step": 109 }, { "epoch": 0.03065347638288979, "grad_norm": 0.06913811948373796, "learning_rate": 9.192200557103062e-05, "loss": 1.4592, "step": 110 }, { "epoch": 0.030932144350006966, "grad_norm": 0.06267051679372977, "learning_rate": 9.275766016713092e-05, "loss": 1.3778, "step": 111 }, { "epoch": 0.031210812317124147, "grad_norm": 0.06362367252046176, "learning_rate": 9.35933147632312e-05, "loss": 1.4435, "step": 112 }, { "epoch": 0.031489480284241325, "grad_norm": 0.062005010954373864, "learning_rate": 9.442896935933148e-05, "loss": 1.4157, "step": 113 }, { "epoch": 0.031768148251358506, "grad_norm": 0.06072616124836147, "learning_rate": 9.526462395543174e-05, "loss": 1.4326, "step": 114 }, { "epoch": 0.03204681621847569, "grad_norm": 0.05843805880483854, "learning_rate": 9.610027855153202e-05, "loss": 1.5034, "step": 115 }, { "epoch": 0.03232548418559287, "grad_norm": 0.061605037319217834, "learning_rate": 9.69359331476323e-05, "loss": 1.4778, "step": 116 }, { "epoch": 0.03260415215271004, "grad_norm": 0.053465877983160034, "learning_rate": 9.777158774373259e-05, "loss": 1.4364, "step": 117 }, { "epoch": 0.032882820119827225, "grad_norm": 0.05790550968205393, "learning_rate": 9.860724233983285e-05, "loss": 1.4832, "step": 118 }, { "epoch": 0.033161488086944406, "grad_norm": 0.06598511603339223, "learning_rate": 9.944289693593313e-05, "loss": 1.4706, "step": 119 }, { "epoch": 0.03344015605406159, "grad_norm": 0.05403786593846714, "learning_rate": 0.00010027855153203341, "loss": 1.5305, "step": 120 }, { "epoch": 0.03371882402117877, "grad_norm": 0.05363560747950985, "learning_rate": 0.00010111420612813371, "loss": 1.4287, "step": 121 }, { "epoch": 0.03399749198829594, "grad_norm": 0.054665385643599615, "learning_rate": 0.00010194986072423396, "loss": 1.3756, "step": 122 }, { "epoch": 0.034276159955413124, "grad_norm": 0.05528757127743845, "learning_rate": 0.00010278551532033426, "loss": 1.4032, "step": 123 }, { "epoch": 0.034554827922530305, "grad_norm": 0.05066044291759129, "learning_rate": 0.00010362116991643454, "loss": 1.4209, "step": 124 }, { "epoch": 0.03483349588964749, "grad_norm": 0.08515397484750788, "learning_rate": 0.00010445682451253482, "loss": 1.4317, "step": 125 }, { "epoch": 0.03511216385676467, "grad_norm": 0.051363435081123864, "learning_rate": 0.00010529247910863508, "loss": 1.4455, "step": 126 }, { "epoch": 0.03539083182388184, "grad_norm": 0.052563894185618736, "learning_rate": 0.00010612813370473536, "loss": 1.347, "step": 127 }, { "epoch": 0.035669499790999024, "grad_norm": 0.04910606095136241, "learning_rate": 0.00010696378830083564, "loss": 1.4389, "step": 128 }, { "epoch": 0.035948167758116205, "grad_norm": 0.04634703525330851, "learning_rate": 0.00010779944289693593, "loss": 1.4666, "step": 129 }, { "epoch": 0.036226835725233386, "grad_norm": 0.051259977771383705, "learning_rate": 0.00010863509749303619, "loss": 1.3978, "step": 130 }, { "epoch": 0.03650550369235057, "grad_norm": 0.04889807490098387, "learning_rate": 0.00010947075208913647, "loss": 1.4722, "step": 131 }, { "epoch": 0.03678417165946774, "grad_norm": 0.0522380420810742, "learning_rate": 0.00011030640668523675, "loss": 1.3738, "step": 132 }, { "epoch": 0.03706283962658492, "grad_norm": 0.04971758999735424, "learning_rate": 0.00011114206128133705, "loss": 1.3908, "step": 133 }, { "epoch": 0.037341507593702104, "grad_norm": 0.04600146303599781, "learning_rate": 0.00011197771587743733, "loss": 1.4292, "step": 134 }, { "epoch": 0.037620175560819286, "grad_norm": 0.04600059063909686, "learning_rate": 0.0001128133704735376, "loss": 1.3801, "step": 135 }, { "epoch": 0.03789884352793647, "grad_norm": 0.06406957561580823, "learning_rate": 0.00011364902506963788, "loss": 1.3575, "step": 136 }, { "epoch": 0.03817751149505364, "grad_norm": 0.04427429743800022, "learning_rate": 0.00011448467966573816, "loss": 1.4424, "step": 137 }, { "epoch": 0.03845617946217082, "grad_norm": 0.04366029341469882, "learning_rate": 0.00011532033426183844, "loss": 1.4339, "step": 138 }, { "epoch": 0.038734847429288004, "grad_norm": 0.04628256541620269, "learning_rate": 0.0001161559888579387, "loss": 1.4227, "step": 139 }, { "epoch": 0.039013515396405185, "grad_norm": 0.047356020331605186, "learning_rate": 0.00011699164345403898, "loss": 1.4098, "step": 140 }, { "epoch": 0.039292183363522366, "grad_norm": 0.041159367476988715, "learning_rate": 0.00011782729805013927, "loss": 1.3672, "step": 141 }, { "epoch": 0.03957085133063954, "grad_norm": 0.043320885702419064, "learning_rate": 0.00011866295264623955, "loss": 1.3775, "step": 142 }, { "epoch": 0.03984951929775672, "grad_norm": 0.04076098052755779, "learning_rate": 0.00011949860724233981, "loss": 1.4284, "step": 143 }, { "epoch": 0.0401281872648739, "grad_norm": 0.0455318003855874, "learning_rate": 0.0001203342618384401, "loss": 1.4172, "step": 144 }, { "epoch": 0.040406855231991085, "grad_norm": 0.04084230331151954, "learning_rate": 0.00012116991643454039, "loss": 1.3782, "step": 145 }, { "epoch": 0.040685523199108266, "grad_norm": 0.04169877248390086, "learning_rate": 0.00012200557103064067, "loss": 1.3664, "step": 146 }, { "epoch": 0.04096419116622544, "grad_norm": 0.04939001171339211, "learning_rate": 0.00012284122562674092, "loss": 1.4173, "step": 147 }, { "epoch": 0.04124285913334262, "grad_norm": 0.048921187129098834, "learning_rate": 0.0001236768802228412, "loss": 1.3819, "step": 148 }, { "epoch": 0.0415215271004598, "grad_norm": 0.04159189516826921, "learning_rate": 0.00012451253481894148, "loss": 1.3972, "step": 149 }, { "epoch": 0.041800195067576984, "grad_norm": 0.047027959574838615, "learning_rate": 0.0001253481894150418, "loss": 1.3516, "step": 150 }, { "epoch": 0.042078863034694165, "grad_norm": 0.03840366569810316, "learning_rate": 0.00012618384401114204, "loss": 1.4, "step": 151 }, { "epoch": 0.04235753100181134, "grad_norm": 0.038691897598889066, "learning_rate": 0.00012701949860724232, "loss": 1.3911, "step": 152 }, { "epoch": 0.04263619896892852, "grad_norm": 0.04742391658671507, "learning_rate": 0.0001278551532033426, "loss": 1.3656, "step": 153 }, { "epoch": 0.0429148669360457, "grad_norm": 0.04166606606675214, "learning_rate": 0.00012869080779944289, "loss": 1.4143, "step": 154 }, { "epoch": 0.04319353490316288, "grad_norm": 0.043288145540043385, "learning_rate": 0.00012952646239554317, "loss": 1.3256, "step": 155 }, { "epoch": 0.043472202870280065, "grad_norm": 0.04712314945188257, "learning_rate": 0.00013036211699164345, "loss": 1.4797, "step": 156 }, { "epoch": 0.04375087083739724, "grad_norm": 0.04212192138957906, "learning_rate": 0.00013119777158774373, "loss": 1.3923, "step": 157 }, { "epoch": 0.04402953880451442, "grad_norm": 0.0409921438975744, "learning_rate": 0.000132033426183844, "loss": 1.3385, "step": 158 }, { "epoch": 0.0443082067716316, "grad_norm": 0.03904243839127973, "learning_rate": 0.00013286908077994426, "loss": 1.4297, "step": 159 }, { "epoch": 0.04458687473874878, "grad_norm": 0.047035390384443536, "learning_rate": 0.00013370473537604454, "loss": 1.4315, "step": 160 }, { "epoch": 0.04486554270586596, "grad_norm": 0.04144376702070777, "learning_rate": 0.00013454038997214482, "loss": 1.3797, "step": 161 }, { "epoch": 0.04514421067298314, "grad_norm": 0.0405712185938395, "learning_rate": 0.00013537604456824513, "loss": 1.3795, "step": 162 }, { "epoch": 0.04542287864010032, "grad_norm": 0.043099680888341846, "learning_rate": 0.00013621169916434538, "loss": 1.3653, "step": 163 }, { "epoch": 0.0457015466072175, "grad_norm": 0.04069252959005777, "learning_rate": 0.00013704735376044566, "loss": 1.3409, "step": 164 }, { "epoch": 0.04598021457433468, "grad_norm": 0.0388550321787244, "learning_rate": 0.00013788300835654595, "loss": 1.3765, "step": 165 }, { "epoch": 0.04625888254145186, "grad_norm": 0.04709770955796097, "learning_rate": 0.00013871866295264623, "loss": 1.3737, "step": 166 }, { "epoch": 0.04653755050856904, "grad_norm": 0.04065238773467595, "learning_rate": 0.0001395543175487465, "loss": 1.4027, "step": 167 }, { "epoch": 0.04681621847568622, "grad_norm": 0.040606208747916886, "learning_rate": 0.0001403899721448468, "loss": 1.4102, "step": 168 }, { "epoch": 0.0470948864428034, "grad_norm": 0.03905843534661932, "learning_rate": 0.00014122562674094707, "loss": 1.378, "step": 169 }, { "epoch": 0.04737355440992058, "grad_norm": 0.03975122027448924, "learning_rate": 0.00014206128133704735, "loss": 1.3815, "step": 170 }, { "epoch": 0.047652222377037756, "grad_norm": 0.03677446115177432, "learning_rate": 0.00014289693593314763, "loss": 1.3883, "step": 171 }, { "epoch": 0.04793089034415494, "grad_norm": 0.03626934755711194, "learning_rate": 0.00014373259052924788, "loss": 1.3216, "step": 172 }, { "epoch": 0.04820955831127212, "grad_norm": 0.04201577782036242, "learning_rate": 0.00014456824512534816, "loss": 1.3337, "step": 173 }, { "epoch": 0.0484882262783893, "grad_norm": 0.0395680720346748, "learning_rate": 0.00014540389972144847, "loss": 1.3082, "step": 174 }, { "epoch": 0.04876689424550648, "grad_norm": 0.03731886152780645, "learning_rate": 0.00014623955431754875, "loss": 1.3884, "step": 175 }, { "epoch": 0.049045562212623656, "grad_norm": 0.04156596984922078, "learning_rate": 0.000147075208913649, "loss": 1.3542, "step": 176 }, { "epoch": 0.04932423017974084, "grad_norm": 0.03616598936024348, "learning_rate": 0.00014791086350974929, "loss": 1.4051, "step": 177 }, { "epoch": 0.04960289814685802, "grad_norm": 0.036017623752631664, "learning_rate": 0.00014874651810584957, "loss": 1.3313, "step": 178 }, { "epoch": 0.0498815661139752, "grad_norm": 0.035933512697923456, "learning_rate": 0.00014958217270194985, "loss": 1.3694, "step": 179 }, { "epoch": 0.05016023408109238, "grad_norm": 0.04164510517343828, "learning_rate": 0.00015041782729805013, "loss": 1.3854, "step": 180 }, { "epoch": 0.050438902048209555, "grad_norm": 0.035743792357748175, "learning_rate": 0.0001512534818941504, "loss": 1.3613, "step": 181 }, { "epoch": 0.050717570015326736, "grad_norm": 0.0387436387728396, "learning_rate": 0.0001520891364902507, "loss": 1.314, "step": 182 }, { "epoch": 0.05099623798244392, "grad_norm": 0.04009063734574423, "learning_rate": 0.00015292479108635094, "loss": 1.3503, "step": 183 }, { "epoch": 0.0512749059495611, "grad_norm": 0.03941731572882044, "learning_rate": 0.00015376044568245125, "loss": 1.3485, "step": 184 }, { "epoch": 0.05155357391667828, "grad_norm": 0.0398214691043806, "learning_rate": 0.0001545961002785515, "loss": 1.343, "step": 185 }, { "epoch": 0.051832241883795455, "grad_norm": 0.03838768063732558, "learning_rate": 0.00015543175487465178, "loss": 1.4059, "step": 186 }, { "epoch": 0.052110909850912636, "grad_norm": 0.03774397813152961, "learning_rate": 0.0001562674094707521, "loss": 1.3577, "step": 187 }, { "epoch": 0.05238957781802982, "grad_norm": 0.03745526593493334, "learning_rate": 0.00015710306406685234, "loss": 1.3272, "step": 188 }, { "epoch": 0.052668245785147, "grad_norm": 0.04091806322486861, "learning_rate": 0.00015793871866295265, "loss": 1.3441, "step": 189 }, { "epoch": 0.05294691375226418, "grad_norm": 0.0431662426598939, "learning_rate": 0.0001587743732590529, "loss": 1.3793, "step": 190 }, { "epoch": 0.053225581719381354, "grad_norm": 0.041973774075651185, "learning_rate": 0.0001596100278551532, "loss": 1.3335, "step": 191 }, { "epoch": 0.053504249686498535, "grad_norm": 0.04387667879794686, "learning_rate": 0.00016044568245125347, "loss": 1.3488, "step": 192 }, { "epoch": 0.05378291765361572, "grad_norm": 0.0376026727388932, "learning_rate": 0.00016128133704735375, "loss": 1.2725, "step": 193 }, { "epoch": 0.0540615856207329, "grad_norm": 0.04545386450839355, "learning_rate": 0.000162116991643454, "loss": 1.3186, "step": 194 }, { "epoch": 0.05434025358785008, "grad_norm": 0.040886290242198225, "learning_rate": 0.0001629526462395543, "loss": 1.3417, "step": 195 }, { "epoch": 0.054618921554967254, "grad_norm": 0.03823435202461821, "learning_rate": 0.00016378830083565456, "loss": 1.3655, "step": 196 }, { "epoch": 0.054897589522084435, "grad_norm": 0.03961702456649532, "learning_rate": 0.00016462395543175487, "loss": 1.3587, "step": 197 }, { "epoch": 0.055176257489201616, "grad_norm": 0.03767587018946527, "learning_rate": 0.00016545961002785515, "loss": 1.3356, "step": 198 }, { "epoch": 0.0554549254563188, "grad_norm": 0.04038332432298699, "learning_rate": 0.0001662952646239554, "loss": 1.3191, "step": 199 }, { "epoch": 0.05573359342343598, "grad_norm": 0.04335863102952142, "learning_rate": 0.0001671309192200557, "loss": 1.3155, "step": 200 }, { "epoch": 0.05601226139055315, "grad_norm": 0.045775497228563264, "learning_rate": 0.00016796657381615597, "loss": 1.3333, "step": 201 }, { "epoch": 0.056290929357670334, "grad_norm": 0.044397583288125334, "learning_rate": 0.00016880222841225627, "loss": 1.3266, "step": 202 }, { "epoch": 0.056569597324787516, "grad_norm": 0.038492786002120354, "learning_rate": 0.00016963788300835653, "loss": 1.3365, "step": 203 }, { "epoch": 0.0568482652919047, "grad_norm": 0.04077275592560795, "learning_rate": 0.0001704735376044568, "loss": 1.325, "step": 204 }, { "epoch": 0.05712693325902188, "grad_norm": 0.0417468499082688, "learning_rate": 0.0001713091922005571, "loss": 1.3783, "step": 205 }, { "epoch": 0.05740560122613905, "grad_norm": 0.04107666892736845, "learning_rate": 0.00017214484679665737, "loss": 1.3391, "step": 206 }, { "epoch": 0.057684269193256234, "grad_norm": 0.03808960938311759, "learning_rate": 0.00017298050139275762, "loss": 1.3445, "step": 207 }, { "epoch": 0.057962937160373415, "grad_norm": 0.03729154869936284, "learning_rate": 0.00017381615598885793, "loss": 1.3689, "step": 208 }, { "epoch": 0.058241605127490596, "grad_norm": 0.03782359285614817, "learning_rate": 0.00017465181058495818, "loss": 1.3881, "step": 209 }, { "epoch": 0.05852027309460778, "grad_norm": 0.03732397091419006, "learning_rate": 0.0001754874651810585, "loss": 1.4025, "step": 210 }, { "epoch": 0.05879894106172495, "grad_norm": 0.036371981877785245, "learning_rate": 0.00017632311977715877, "loss": 1.3335, "step": 211 }, { "epoch": 0.05907760902884213, "grad_norm": 0.037298287357022446, "learning_rate": 0.00017715877437325902, "loss": 1.3137, "step": 212 }, { "epoch": 0.059356276995959315, "grad_norm": 0.03602010923485085, "learning_rate": 0.00017799442896935933, "loss": 1.3689, "step": 213 }, { "epoch": 0.059634944963076496, "grad_norm": 0.03298371621439949, "learning_rate": 0.00017883008356545959, "loss": 1.3836, "step": 214 }, { "epoch": 0.05991361293019368, "grad_norm": 0.03631845849923131, "learning_rate": 0.00017966573816155987, "loss": 1.3309, "step": 215 }, { "epoch": 0.06019228089731085, "grad_norm": 0.04631361147659327, "learning_rate": 0.00018050139275766015, "loss": 1.4069, "step": 216 }, { "epoch": 0.06047094886442803, "grad_norm": 0.042212812961369556, "learning_rate": 0.00018133704735376043, "loss": 1.3288, "step": 217 }, { "epoch": 0.060749616831545214, "grad_norm": 0.04203760509845599, "learning_rate": 0.00018217270194986074, "loss": 1.3978, "step": 218 }, { "epoch": 0.061028284798662395, "grad_norm": 0.03679325769709726, "learning_rate": 0.000183008356545961, "loss": 1.3296, "step": 219 }, { "epoch": 0.06130695276577958, "grad_norm": 0.04020888640916022, "learning_rate": 0.00018384401114206124, "loss": 1.4188, "step": 220 }, { "epoch": 0.06158562073289675, "grad_norm": 0.04430811810918821, "learning_rate": 0.00018467966573816155, "loss": 1.3386, "step": 221 }, { "epoch": 0.06186428870001393, "grad_norm": 0.03742697939960308, "learning_rate": 0.00018551532033426183, "loss": 1.3613, "step": 222 }, { "epoch": 0.06214295666713111, "grad_norm": 0.04268612639938782, "learning_rate": 0.00018635097493036208, "loss": 1.3717, "step": 223 }, { "epoch": 0.062421624634248295, "grad_norm": 0.03842758798915902, "learning_rate": 0.0001871866295264624, "loss": 1.3589, "step": 224 }, { "epoch": 0.06270029260136548, "grad_norm": 0.03417144935270837, "learning_rate": 0.00018802228412256265, "loss": 1.3059, "step": 225 }, { "epoch": 0.06297896056848265, "grad_norm": 0.04471194063350292, "learning_rate": 0.00018885793871866295, "loss": 1.3138, "step": 226 }, { "epoch": 0.06325762853559984, "grad_norm": 0.04207921585304418, "learning_rate": 0.0001896935933147632, "loss": 1.3185, "step": 227 }, { "epoch": 0.06353629650271701, "grad_norm": 0.04128777861761759, "learning_rate": 0.0001905292479108635, "loss": 1.3239, "step": 228 }, { "epoch": 0.06381496446983419, "grad_norm": 0.037738545293647015, "learning_rate": 0.00019136490250696377, "loss": 1.3657, "step": 229 }, { "epoch": 0.06409363243695138, "grad_norm": 0.03258212473233103, "learning_rate": 0.00019220055710306405, "loss": 1.318, "step": 230 }, { "epoch": 0.06437230040406855, "grad_norm": 0.035791977918305025, "learning_rate": 0.00019303621169916436, "loss": 1.3421, "step": 231 }, { "epoch": 0.06465096837118574, "grad_norm": 0.040996563109248874, "learning_rate": 0.0001938718662952646, "loss": 1.3314, "step": 232 }, { "epoch": 0.06492963633830291, "grad_norm": 0.040061324813887626, "learning_rate": 0.00019470752089136486, "loss": 1.3124, "step": 233 }, { "epoch": 0.06520830430542009, "grad_norm": 0.0399904211187729, "learning_rate": 0.00019554317548746517, "loss": 1.3312, "step": 234 }, { "epoch": 0.06548697227253727, "grad_norm": 0.035814115081118765, "learning_rate": 0.00019637883008356545, "loss": 1.307, "step": 235 }, { "epoch": 0.06576564023965445, "grad_norm": 0.03831352508433097, "learning_rate": 0.0001972144846796657, "loss": 1.3501, "step": 236 }, { "epoch": 0.06604430820677164, "grad_norm": 0.04393877349311175, "learning_rate": 0.000198050139275766, "loss": 1.3076, "step": 237 }, { "epoch": 0.06632297617388881, "grad_norm": 0.04086389679168002, "learning_rate": 0.00019888579387186627, "loss": 1.3107, "step": 238 }, { "epoch": 0.06660164414100599, "grad_norm": 0.03623670831620614, "learning_rate": 0.00019972144846796657, "loss": 1.365, "step": 239 }, { "epoch": 0.06688031210812317, "grad_norm": 0.03770464974550928, "learning_rate": 0.00020055710306406683, "loss": 1.3033, "step": 240 }, { "epoch": 0.06715898007524035, "grad_norm": 0.04375453458048074, "learning_rate": 0.0002013927576601671, "loss": 1.3111, "step": 241 }, { "epoch": 0.06743764804235754, "grad_norm": 0.038266167135978385, "learning_rate": 0.00020222841225626742, "loss": 1.2754, "step": 242 }, { "epoch": 0.06771631600947471, "grad_norm": 0.03850879415669324, "learning_rate": 0.00020306406685236767, "loss": 1.3134, "step": 243 }, { "epoch": 0.06799498397659189, "grad_norm": 0.0395287637663834, "learning_rate": 0.00020389972144846792, "loss": 1.2585, "step": 244 }, { "epoch": 0.06827365194370907, "grad_norm": 0.03660082760074572, "learning_rate": 0.00020473537604456823, "loss": 1.2947, "step": 245 }, { "epoch": 0.06855231991082625, "grad_norm": 0.03603073798604866, "learning_rate": 0.0002055710306406685, "loss": 1.2844, "step": 246 }, { "epoch": 0.06883098787794344, "grad_norm": 0.03561043112106522, "learning_rate": 0.0002064066852367688, "loss": 1.3279, "step": 247 }, { "epoch": 0.06910965584506061, "grad_norm": 0.03812320601527198, "learning_rate": 0.00020724233983286907, "loss": 1.3152, "step": 248 }, { "epoch": 0.06938832381217779, "grad_norm": 0.037958129521944446, "learning_rate": 0.00020807799442896933, "loss": 1.3054, "step": 249 }, { "epoch": 0.06966699177929497, "grad_norm": 0.038728755166226224, "learning_rate": 0.00020891364902506963, "loss": 1.2799, "step": 250 }, { "epoch": 0.06994565974641215, "grad_norm": 0.03568799612328925, "learning_rate": 0.0002097493036211699, "loss": 1.3436, "step": 251 }, { "epoch": 0.07022432771352934, "grad_norm": 0.03516485741146774, "learning_rate": 0.00021058495821727017, "loss": 1.3613, "step": 252 }, { "epoch": 0.07050299568064651, "grad_norm": 0.03529857159298365, "learning_rate": 0.00021142061281337045, "loss": 1.3154, "step": 253 }, { "epoch": 0.07078166364776368, "grad_norm": 0.039407697480436335, "learning_rate": 0.00021225626740947073, "loss": 1.3925, "step": 254 }, { "epoch": 0.07106033161488087, "grad_norm": 0.034053863413450104, "learning_rate": 0.00021309192200557104, "loss": 1.2727, "step": 255 }, { "epoch": 0.07133899958199805, "grad_norm": 0.03822481902584161, "learning_rate": 0.0002139275766016713, "loss": 1.3121, "step": 256 }, { "epoch": 0.07161766754911524, "grad_norm": 0.04486791570897054, "learning_rate": 0.00021476323119777157, "loss": 1.3082, "step": 257 }, { "epoch": 0.07189633551623241, "grad_norm": 0.03740195430828842, "learning_rate": 0.00021559888579387185, "loss": 1.2713, "step": 258 }, { "epoch": 0.07217500348334958, "grad_norm": 0.03674633925317708, "learning_rate": 0.00021643454038997213, "loss": 1.2668, "step": 259 }, { "epoch": 0.07245367145046677, "grad_norm": 0.03942431953554539, "learning_rate": 0.00021727019498607238, "loss": 1.3357, "step": 260 }, { "epoch": 0.07273233941758395, "grad_norm": 0.0369751732663348, "learning_rate": 0.0002181058495821727, "loss": 1.3373, "step": 261 }, { "epoch": 0.07301100738470113, "grad_norm": 0.042985650787072935, "learning_rate": 0.00021894150417827295, "loss": 1.3609, "step": 262 }, { "epoch": 0.07328967535181831, "grad_norm": 0.032363325451324615, "learning_rate": 0.00021977715877437325, "loss": 1.3194, "step": 263 }, { "epoch": 0.07356834331893548, "grad_norm": 0.037024337476480505, "learning_rate": 0.0002206128133704735, "loss": 1.3027, "step": 264 }, { "epoch": 0.07384701128605267, "grad_norm": 0.034274818237915845, "learning_rate": 0.0002214484679665738, "loss": 1.3368, "step": 265 }, { "epoch": 0.07412567925316985, "grad_norm": 0.03358885100437799, "learning_rate": 0.0002222841225626741, "loss": 1.3381, "step": 266 }, { "epoch": 0.07440434722028703, "grad_norm": 0.03905583640973506, "learning_rate": 0.00022311977715877435, "loss": 1.2169, "step": 267 }, { "epoch": 0.07468301518740421, "grad_norm": 0.04128308703774118, "learning_rate": 0.00022395543175487466, "loss": 1.3598, "step": 268 }, { "epoch": 0.07496168315452138, "grad_norm": 0.034519308773934526, "learning_rate": 0.0002247910863509749, "loss": 1.2902, "step": 269 }, { "epoch": 0.07524035112163857, "grad_norm": 0.03719240833528644, "learning_rate": 0.0002256267409470752, "loss": 1.2845, "step": 270 }, { "epoch": 0.07551901908875575, "grad_norm": 0.038285560615557525, "learning_rate": 0.00022646239554317547, "loss": 1.2501, "step": 271 }, { "epoch": 0.07579768705587293, "grad_norm": 0.037997776873394895, "learning_rate": 0.00022729805013927575, "loss": 1.2905, "step": 272 }, { "epoch": 0.07607635502299011, "grad_norm": 0.039375573032237494, "learning_rate": 0.000228133704735376, "loss": 1.2364, "step": 273 }, { "epoch": 0.07635502299010728, "grad_norm": 0.03778815639390618, "learning_rate": 0.0002289693593314763, "loss": 1.3173, "step": 274 }, { "epoch": 0.07663369095722447, "grad_norm": 0.036952536527226944, "learning_rate": 0.00022980501392757657, "loss": 1.2288, "step": 275 }, { "epoch": 0.07691235892434165, "grad_norm": 0.03558412960794384, "learning_rate": 0.00023064066852367687, "loss": 1.3175, "step": 276 }, { "epoch": 0.07719102689145883, "grad_norm": 0.052062695786391236, "learning_rate": 0.00023147632311977713, "loss": 1.3182, "step": 277 }, { "epoch": 0.07746969485857601, "grad_norm": 0.051887283755342774, "learning_rate": 0.0002323119777158774, "loss": 1.3439, "step": 278 }, { "epoch": 0.07774836282569318, "grad_norm": 0.0455263784330644, "learning_rate": 0.00023314763231197772, "loss": 1.2767, "step": 279 }, { "epoch": 0.07802703079281037, "grad_norm": 0.035817813495240734, "learning_rate": 0.00023398328690807797, "loss": 1.2827, "step": 280 }, { "epoch": 0.07830569875992754, "grad_norm": 0.031521032840016776, "learning_rate": 0.00023481894150417825, "loss": 1.2724, "step": 281 }, { "epoch": 0.07858436672704473, "grad_norm": 0.03669203814979694, "learning_rate": 0.00023565459610027853, "loss": 1.3324, "step": 282 }, { "epoch": 0.07886303469416191, "grad_norm": 0.04337816544384796, "learning_rate": 0.0002364902506963788, "loss": 1.277, "step": 283 }, { "epoch": 0.07914170266127908, "grad_norm": 0.043841401051926146, "learning_rate": 0.0002373259052924791, "loss": 1.2928, "step": 284 }, { "epoch": 0.07942037062839627, "grad_norm": 0.04653438753205171, "learning_rate": 0.00023816155988857937, "loss": 1.3489, "step": 285 }, { "epoch": 0.07969903859551344, "grad_norm": 0.038014248409780765, "learning_rate": 0.00023899721448467963, "loss": 1.2757, "step": 286 }, { "epoch": 0.07997770656263063, "grad_norm": 0.03868877160577901, "learning_rate": 0.00023983286908077993, "loss": 1.3731, "step": 287 }, { "epoch": 0.0802563745297478, "grad_norm": 0.04747794017565251, "learning_rate": 0.0002406685236768802, "loss": 1.3661, "step": 288 }, { "epoch": 0.08053504249686498, "grad_norm": 0.04440995800773329, "learning_rate": 0.00024150417827298047, "loss": 1.3802, "step": 289 }, { "epoch": 0.08081371046398217, "grad_norm": 0.037982485989411736, "learning_rate": 0.00024233983286908078, "loss": 1.3618, "step": 290 }, { "epoch": 0.08109237843109934, "grad_norm": 0.03835822451237514, "learning_rate": 0.00024317548746518103, "loss": 1.413, "step": 291 }, { "epoch": 0.08137104639821653, "grad_norm": 0.04795154942218908, "learning_rate": 0.00024401114206128134, "loss": 1.3603, "step": 292 }, { "epoch": 0.0816497143653337, "grad_norm": 0.04174942524844225, "learning_rate": 0.0002448467966573816, "loss": 1.2677, "step": 293 }, { "epoch": 0.08192838233245088, "grad_norm": 0.040711011370140625, "learning_rate": 0.00024568245125348184, "loss": 1.2565, "step": 294 }, { "epoch": 0.08220705029956807, "grad_norm": 0.04087372481447051, "learning_rate": 0.0002465181058495822, "loss": 1.2927, "step": 295 }, { "epoch": 0.08248571826668524, "grad_norm": 0.05341419160262984, "learning_rate": 0.0002473537604456824, "loss": 1.309, "step": 296 }, { "epoch": 0.08276438623380243, "grad_norm": 0.051418309009162624, "learning_rate": 0.00024818941504178274, "loss": 1.326, "step": 297 }, { "epoch": 0.0830430542009196, "grad_norm": 0.03703277178633016, "learning_rate": 0.00024902506963788297, "loss": 1.3449, "step": 298 }, { "epoch": 0.08332172216803678, "grad_norm": 0.04069817842435225, "learning_rate": 0.00024986072423398325, "loss": 1.2951, "step": 299 }, { "epoch": 0.08360039013515397, "grad_norm": 0.03623013416117769, "learning_rate": 0.0002506963788300836, "loss": 1.3466, "step": 300 }, { "epoch": 0.08387905810227114, "grad_norm": 0.04084948785082388, "learning_rate": 0.0002515320334261838, "loss": 1.2969, "step": 301 }, { "epoch": 0.08415772606938833, "grad_norm": 0.04400457581072801, "learning_rate": 0.0002523676880222841, "loss": 1.2722, "step": 302 }, { "epoch": 0.0844363940365055, "grad_norm": 0.03016474710264561, "learning_rate": 0.00025320334261838437, "loss": 1.3079, "step": 303 }, { "epoch": 0.08471506200362268, "grad_norm": 0.035395605029690164, "learning_rate": 0.00025403899721448465, "loss": 1.2907, "step": 304 }, { "epoch": 0.08499372997073987, "grad_norm": 0.0375481919489677, "learning_rate": 0.00025487465181058493, "loss": 1.2649, "step": 305 }, { "epoch": 0.08527239793785704, "grad_norm": 0.04802618308677891, "learning_rate": 0.0002557103064066852, "loss": 1.3048, "step": 306 }, { "epoch": 0.08555106590497423, "grad_norm": 0.044195379710511885, "learning_rate": 0.0002565459610027855, "loss": 1.3483, "step": 307 }, { "epoch": 0.0858297338720914, "grad_norm": 0.03562288567292837, "learning_rate": 0.00025738161559888577, "loss": 1.2882, "step": 308 }, { "epoch": 0.08610840183920858, "grad_norm": 0.04180397884306972, "learning_rate": 0.00025821727019498605, "loss": 1.2959, "step": 309 }, { "epoch": 0.08638706980632577, "grad_norm": 0.038986588522461015, "learning_rate": 0.00025905292479108633, "loss": 1.3237, "step": 310 }, { "epoch": 0.08666573777344294, "grad_norm": 0.04335764742744036, "learning_rate": 0.0002598885793871866, "loss": 1.3016, "step": 311 }, { "epoch": 0.08694440574056013, "grad_norm": 0.03887893076168805, "learning_rate": 0.0002607242339832869, "loss": 1.2791, "step": 312 }, { "epoch": 0.0872230737076773, "grad_norm": 0.03887571919666219, "learning_rate": 0.0002615598885793872, "loss": 1.2959, "step": 313 }, { "epoch": 0.08750174167479448, "grad_norm": 0.037356087043231366, "learning_rate": 0.00026239554317548746, "loss": 1.2577, "step": 314 }, { "epoch": 0.08778040964191167, "grad_norm": 0.037267674627406856, "learning_rate": 0.00026323119777158774, "loss": 1.2801, "step": 315 }, { "epoch": 0.08805907760902884, "grad_norm": 0.035998445020558274, "learning_rate": 0.000264066852367688, "loss": 1.2739, "step": 316 }, { "epoch": 0.08833774557614602, "grad_norm": 0.039753264389863506, "learning_rate": 0.0002649025069637883, "loss": 1.3489, "step": 317 }, { "epoch": 0.0886164135432632, "grad_norm": 0.0399186915652619, "learning_rate": 0.0002657381615598885, "loss": 1.2767, "step": 318 }, { "epoch": 0.08889508151038038, "grad_norm": 0.03653593539112823, "learning_rate": 0.00026657381615598886, "loss": 1.2507, "step": 319 }, { "epoch": 0.08917374947749757, "grad_norm": 0.03593445795881327, "learning_rate": 0.0002674094707520891, "loss": 1.319, "step": 320 }, { "epoch": 0.08945241744461474, "grad_norm": 0.03512323145400372, "learning_rate": 0.0002682451253481894, "loss": 1.2772, "step": 321 }, { "epoch": 0.08973108541173191, "grad_norm": 0.03645352438551946, "learning_rate": 0.00026908077994428965, "loss": 1.3359, "step": 322 }, { "epoch": 0.0900097533788491, "grad_norm": 0.041879029401921605, "learning_rate": 0.0002699164345403899, "loss": 1.4035, "step": 323 }, { "epoch": 0.09028842134596628, "grad_norm": 0.03374781642227113, "learning_rate": 0.00027075208913649026, "loss": 1.2085, "step": 324 }, { "epoch": 0.09056708931308347, "grad_norm": 0.040219150023632244, "learning_rate": 0.0002715877437325905, "loss": 1.3618, "step": 325 }, { "epoch": 0.09084575728020064, "grad_norm": 0.03729367021841523, "learning_rate": 0.00027242339832869077, "loss": 1.285, "step": 326 }, { "epoch": 0.09112442524731781, "grad_norm": 0.03750981645127858, "learning_rate": 0.00027325905292479105, "loss": 1.2913, "step": 327 }, { "epoch": 0.091403093214435, "grad_norm": 0.0418170876109838, "learning_rate": 0.00027409470752089133, "loss": 1.3278, "step": 328 }, { "epoch": 0.09168176118155218, "grad_norm": 0.04564010889085313, "learning_rate": 0.0002749303621169916, "loss": 1.2873, "step": 329 }, { "epoch": 0.09196042914866936, "grad_norm": 0.0382136064603039, "learning_rate": 0.0002757660167130919, "loss": 1.3227, "step": 330 }, { "epoch": 0.09223909711578654, "grad_norm": 0.0330453928756551, "learning_rate": 0.00027660167130919217, "loss": 1.2978, "step": 331 }, { "epoch": 0.09251776508290371, "grad_norm": 0.04546655224898232, "learning_rate": 0.00027743732590529245, "loss": 1.2651, "step": 332 }, { "epoch": 0.0927964330500209, "grad_norm": 0.03729098548877643, "learning_rate": 0.00027827298050139273, "loss": 1.2853, "step": 333 }, { "epoch": 0.09307510101713808, "grad_norm": 0.040595511504103725, "learning_rate": 0.000279108635097493, "loss": 1.3144, "step": 334 }, { "epoch": 0.09335376898425526, "grad_norm": 0.04072357776814218, "learning_rate": 0.0002799442896935933, "loss": 1.2463, "step": 335 }, { "epoch": 0.09363243695137244, "grad_norm": 0.03814625419214398, "learning_rate": 0.0002807799442896936, "loss": 1.352, "step": 336 }, { "epoch": 0.09391110491848961, "grad_norm": 0.029653804222801627, "learning_rate": 0.00028161559888579385, "loss": 1.2759, "step": 337 }, { "epoch": 0.0941897728856068, "grad_norm": 0.03181780152553538, "learning_rate": 0.00028245125348189414, "loss": 1.2722, "step": 338 }, { "epoch": 0.09446844085272398, "grad_norm": 0.03756551132012242, "learning_rate": 0.0002832869080779944, "loss": 1.2587, "step": 339 }, { "epoch": 0.09474710881984116, "grad_norm": 0.04019816426146754, "learning_rate": 0.0002841225626740947, "loss": 1.3551, "step": 340 }, { "epoch": 0.09502577678695834, "grad_norm": 0.03504723755195628, "learning_rate": 0.000284958217270195, "loss": 1.3287, "step": 341 }, { "epoch": 0.09530444475407551, "grad_norm": 0.03912593437513201, "learning_rate": 0.00028579387186629526, "loss": 1.213, "step": 342 }, { "epoch": 0.0955831127211927, "grad_norm": 0.03492992250614735, "learning_rate": 0.00028662952646239554, "loss": 1.2413, "step": 343 }, { "epoch": 0.09586178068830987, "grad_norm": 0.03619235728586668, "learning_rate": 0.00028746518105849576, "loss": 1.3448, "step": 344 }, { "epoch": 0.09614044865542706, "grad_norm": 0.04336982303391341, "learning_rate": 0.0002883008356545961, "loss": 1.3693, "step": 345 }, { "epoch": 0.09641911662254424, "grad_norm": 0.0400565300895545, "learning_rate": 0.0002891364902506963, "loss": 1.2474, "step": 346 }, { "epoch": 0.09669778458966141, "grad_norm": 0.04034919783984268, "learning_rate": 0.0002899721448467966, "loss": 1.3876, "step": 347 }, { "epoch": 0.0969764525567786, "grad_norm": 0.04467446707956057, "learning_rate": 0.00029080779944289694, "loss": 1.264, "step": 348 }, { "epoch": 0.09725512052389577, "grad_norm": 0.038601106819258046, "learning_rate": 0.00029164345403899717, "loss": 1.2408, "step": 349 }, { "epoch": 0.09753378849101296, "grad_norm": 0.03749574897210531, "learning_rate": 0.0002924791086350975, "loss": 1.3036, "step": 350 }, { "epoch": 0.09781245645813014, "grad_norm": 0.039381370958679135, "learning_rate": 0.00029331476323119773, "loss": 1.2345, "step": 351 }, { "epoch": 0.09809112442524731, "grad_norm": 0.04125143381523446, "learning_rate": 0.000294150417827298, "loss": 1.2438, "step": 352 }, { "epoch": 0.0983697923923645, "grad_norm": 0.04082813640302393, "learning_rate": 0.0002949860724233983, "loss": 1.2524, "step": 353 }, { "epoch": 0.09864846035948167, "grad_norm": 0.03568826466058468, "learning_rate": 0.00029582172701949857, "loss": 1.3074, "step": 354 }, { "epoch": 0.09892712832659886, "grad_norm": 0.03346926566150839, "learning_rate": 0.00029665738161559885, "loss": 1.2345, "step": 355 }, { "epoch": 0.09920579629371604, "grad_norm": 0.0349226341781067, "learning_rate": 0.00029749303621169913, "loss": 1.1989, "step": 356 }, { "epoch": 0.09948446426083321, "grad_norm": 0.04024821119025489, "learning_rate": 0.0002983286908077994, "loss": 1.2854, "step": 357 }, { "epoch": 0.0997631322279504, "grad_norm": 0.037313908729313255, "learning_rate": 0.0002991643454038997, "loss": 1.292, "step": 358 }, { "epoch": 0.10004180019506757, "grad_norm": 0.03265056473686412, "learning_rate": 0.0003, "loss": 1.2482, "step": 359 }, { "epoch": 0.10032046816218476, "grad_norm": 0.04457505594433796, "learning_rate": 0.0002999999290054706, "loss": 1.2954, "step": 360 }, { "epoch": 0.10059913612930194, "grad_norm": 0.04044682777549126, "learning_rate": 0.00029999971602194975, "loss": 1.2702, "step": 361 }, { "epoch": 0.10087780409641911, "grad_norm": 0.03555156072265829, "learning_rate": 0.00029999936104963893, "loss": 1.2506, "step": 362 }, { "epoch": 0.1011564720635363, "grad_norm": 0.03269190060695945, "learning_rate": 0.0002999988640888743, "loss": 1.3068, "step": 363 }, { "epoch": 0.10143514003065347, "grad_norm": 0.03567683223423196, "learning_rate": 0.00029999822514012616, "loss": 1.2364, "step": 364 }, { "epoch": 0.10171380799777066, "grad_norm": 0.03961713976996479, "learning_rate": 0.00029999744420399936, "loss": 1.2735, "step": 365 }, { "epoch": 0.10199247596488784, "grad_norm": 0.04155899208770768, "learning_rate": 0.00029999652128123315, "loss": 1.2489, "step": 366 }, { "epoch": 0.10227114393200501, "grad_norm": 0.043829526131339065, "learning_rate": 0.0002999954563727011, "loss": 1.3347, "step": 367 }, { "epoch": 0.1025498118991222, "grad_norm": 0.03511121104521967, "learning_rate": 0.00029999424947941135, "loss": 1.253, "step": 368 }, { "epoch": 0.10282847986623937, "grad_norm": 0.042427450928630095, "learning_rate": 0.0002999929006025063, "loss": 1.2248, "step": 369 }, { "epoch": 0.10310714783335656, "grad_norm": 0.048324418521388635, "learning_rate": 0.00029999140974326275, "loss": 1.2776, "step": 370 }, { "epoch": 0.10338581580047373, "grad_norm": 0.041413431399281726, "learning_rate": 0.00029998977690309195, "loss": 1.2384, "step": 371 }, { "epoch": 0.10366448376759091, "grad_norm": 0.033461774693597304, "learning_rate": 0.0002999880020835396, "loss": 1.3284, "step": 372 }, { "epoch": 0.1039431517347081, "grad_norm": 0.036233757212834956, "learning_rate": 0.00029998608528628565, "loss": 1.2199, "step": 373 }, { "epoch": 0.10422181970182527, "grad_norm": 0.03973986791757179, "learning_rate": 0.0002999840265131446, "loss": 1.3167, "step": 374 }, { "epoch": 0.10450048766894246, "grad_norm": 0.03838441823211355, "learning_rate": 0.00029998182576606517, "loss": 1.2106, "step": 375 }, { "epoch": 0.10477915563605963, "grad_norm": 0.03677934501469638, "learning_rate": 0.00029997948304713064, "loss": 1.2605, "step": 376 }, { "epoch": 0.10505782360317681, "grad_norm": 0.032569483031651185, "learning_rate": 0.0002999769983585587, "loss": 1.2647, "step": 377 }, { "epoch": 0.105336491570294, "grad_norm": 0.037266430945655356, "learning_rate": 0.00029997437170270117, "loss": 1.2894, "step": 378 }, { "epoch": 0.10561515953741117, "grad_norm": 0.04466026931731475, "learning_rate": 0.00029997160308204454, "loss": 1.3406, "step": 379 }, { "epoch": 0.10589382750452836, "grad_norm": 0.036008092136707366, "learning_rate": 0.0002999686924992095, "loss": 1.2795, "step": 380 }, { "epoch": 0.10617249547164553, "grad_norm": 0.04240923332623951, "learning_rate": 0.0002999656399569512, "loss": 1.2768, "step": 381 }, { "epoch": 0.10645116343876271, "grad_norm": 0.03905209447071194, "learning_rate": 0.00029996244545815926, "loss": 1.2983, "step": 382 }, { "epoch": 0.1067298314058799, "grad_norm": 0.04065415844792499, "learning_rate": 0.0002999591090058575, "loss": 1.2488, "step": 383 }, { "epoch": 0.10700849937299707, "grad_norm": 0.03762251157606461, "learning_rate": 0.0002999556306032041, "loss": 1.2659, "step": 384 }, { "epoch": 0.10728716734011426, "grad_norm": 0.041289795818664335, "learning_rate": 0.0002999520102534918, "loss": 1.2157, "step": 385 }, { "epoch": 0.10756583530723143, "grad_norm": 0.034255092109944596, "learning_rate": 0.00029994824796014766, "loss": 1.2306, "step": 386 }, { "epoch": 0.10784450327434861, "grad_norm": 0.0350644743108959, "learning_rate": 0.00029994434372673285, "loss": 1.2529, "step": 387 }, { "epoch": 0.1081231712414658, "grad_norm": 0.03670973306293993, "learning_rate": 0.00029994029755694323, "loss": 1.4282, "step": 388 }, { "epoch": 0.10840183920858297, "grad_norm": 0.042690012349982445, "learning_rate": 0.00029993610945460884, "loss": 1.2973, "step": 389 }, { "epoch": 0.10868050717570016, "grad_norm": 0.05247220131233175, "learning_rate": 0.0002999317794236941, "loss": 1.3085, "step": 390 }, { "epoch": 0.10895917514281733, "grad_norm": 0.03819312544170647, "learning_rate": 0.0002999273074682979, "loss": 1.2666, "step": 391 }, { "epoch": 0.10923784310993451, "grad_norm": 0.03838234053305805, "learning_rate": 0.0002999226935926532, "loss": 1.2746, "step": 392 }, { "epoch": 0.1095165110770517, "grad_norm": 0.04625464733879025, "learning_rate": 0.0002999179378011276, "loss": 1.2878, "step": 393 }, { "epoch": 0.10979517904416887, "grad_norm": 0.042832533412025316, "learning_rate": 0.00029991304009822277, "loss": 1.3263, "step": 394 }, { "epoch": 0.11007384701128606, "grad_norm": 0.03973229221162098, "learning_rate": 0.00029990800048857494, "loss": 1.2802, "step": 395 }, { "epoch": 0.11035251497840323, "grad_norm": 0.030864606003933277, "learning_rate": 0.00029990281897695456, "loss": 1.1859, "step": 396 }, { "epoch": 0.1106311829455204, "grad_norm": 0.037895328474979714, "learning_rate": 0.0002998974955682664, "loss": 1.3217, "step": 397 }, { "epoch": 0.1109098509126376, "grad_norm": 0.043397005337646156, "learning_rate": 0.0002998920302675496, "loss": 1.2287, "step": 398 }, { "epoch": 0.11118851887975477, "grad_norm": 0.036202404553004205, "learning_rate": 0.00029988642307997754, "loss": 1.2662, "step": 399 }, { "epoch": 0.11146718684687196, "grad_norm": 0.03912930173736913, "learning_rate": 0.00029988067401085796, "loss": 1.1928, "step": 400 }, { "epoch": 0.11174585481398913, "grad_norm": 0.033542856212233775, "learning_rate": 0.0002998747830656329, "loss": 1.3284, "step": 401 }, { "epoch": 0.1120245227811063, "grad_norm": 0.04562875203526185, "learning_rate": 0.0002998687502498786, "loss": 1.2687, "step": 402 }, { "epoch": 0.1123031907482235, "grad_norm": 0.03536051630450146, "learning_rate": 0.0002998625755693059, "loss": 1.2411, "step": 403 }, { "epoch": 0.11258185871534067, "grad_norm": 0.034091115265485424, "learning_rate": 0.0002998562590297595, "loss": 1.2177, "step": 404 }, { "epoch": 0.11286052668245786, "grad_norm": 0.03978256364032817, "learning_rate": 0.0002998498006372187, "loss": 1.2854, "step": 405 }, { "epoch": 0.11313919464957503, "grad_norm": 0.04015454414311876, "learning_rate": 0.000299843200397797, "loss": 1.2424, "step": 406 }, { "epoch": 0.1134178626166922, "grad_norm": 0.0400834354252404, "learning_rate": 0.00029983645831774204, "loss": 1.2093, "step": 407 }, { "epoch": 0.1136965305838094, "grad_norm": 0.039424257409755845, "learning_rate": 0.00029982957440343594, "loss": 1.2699, "step": 408 }, { "epoch": 0.11397519855092657, "grad_norm": 0.039566602539892214, "learning_rate": 0.00029982254866139484, "loss": 1.2226, "step": 409 }, { "epoch": 0.11425386651804376, "grad_norm": 0.03239116245122768, "learning_rate": 0.0002998153810982694, "loss": 1.2056, "step": 410 }, { "epoch": 0.11453253448516093, "grad_norm": 0.03162183234220862, "learning_rate": 0.00029980807172084436, "loss": 1.2098, "step": 411 }, { "epoch": 0.1148112024522781, "grad_norm": 0.03311469429192412, "learning_rate": 0.0002998006205360387, "loss": 1.2857, "step": 412 }, { "epoch": 0.1150898704193953, "grad_norm": 0.03997627092104008, "learning_rate": 0.00029979302755090564, "loss": 1.2721, "step": 413 }, { "epoch": 0.11536853838651247, "grad_norm": 0.04387863415043744, "learning_rate": 0.0002997852927726327, "loss": 1.3139, "step": 414 }, { "epoch": 0.11564720635362966, "grad_norm": 0.03577847461857504, "learning_rate": 0.0002997774162085415, "loss": 1.3347, "step": 415 }, { "epoch": 0.11592587432074683, "grad_norm": 0.04114839165275904, "learning_rate": 0.0002997693978660881, "loss": 1.3598, "step": 416 }, { "epoch": 0.116204542287864, "grad_norm": 0.04014674369206999, "learning_rate": 0.00029976123775286243, "loss": 1.2604, "step": 417 }, { "epoch": 0.11648321025498119, "grad_norm": 0.032781824875065556, "learning_rate": 0.0002997529358765889, "loss": 1.2425, "step": 418 }, { "epoch": 0.11676187822209837, "grad_norm": 0.036659953990480526, "learning_rate": 0.000299744492245126, "loss": 1.2703, "step": 419 }, { "epoch": 0.11704054618921556, "grad_norm": 0.03349481878372905, "learning_rate": 0.00029973590686646645, "loss": 1.2664, "step": 420 }, { "epoch": 0.11731921415633273, "grad_norm": 0.038429361417740074, "learning_rate": 0.00029972717974873707, "loss": 1.2585, "step": 421 }, { "epoch": 0.1175978821234499, "grad_norm": 0.03493423306857423, "learning_rate": 0.0002997183109001989, "loss": 1.2729, "step": 422 }, { "epoch": 0.11787655009056709, "grad_norm": 0.034966498477053264, "learning_rate": 0.00029970930032924715, "loss": 1.2836, "step": 423 }, { "epoch": 0.11815521805768427, "grad_norm": 0.03452995315990418, "learning_rate": 0.0002997001480444112, "loss": 1.2632, "step": 424 }, { "epoch": 0.11843388602480145, "grad_norm": 0.03593322750579843, "learning_rate": 0.00029969085405435444, "loss": 1.2403, "step": 425 }, { "epoch": 0.11871255399191863, "grad_norm": 0.035974001169753245, "learning_rate": 0.0002996814183678746, "loss": 1.2443, "step": 426 }, { "epoch": 0.1189912219590358, "grad_norm": 0.03916274269985071, "learning_rate": 0.0002996718409939035, "loss": 1.2285, "step": 427 }, { "epoch": 0.11926988992615299, "grad_norm": 0.03796021083673309, "learning_rate": 0.0002996621219415068, "loss": 1.3156, "step": 428 }, { "epoch": 0.11954855789327017, "grad_norm": 0.033826020962284604, "learning_rate": 0.00029965226121988467, "loss": 1.275, "step": 429 }, { "epoch": 0.11982722586038735, "grad_norm": 0.04603300963734349, "learning_rate": 0.00029964225883837123, "loss": 1.2277, "step": 430 }, { "epoch": 0.12010589382750453, "grad_norm": 0.033627145865942606, "learning_rate": 0.0002996321148064346, "loss": 1.2202, "step": 431 }, { "epoch": 0.1203845617946217, "grad_norm": 0.03443735952415964, "learning_rate": 0.000299621829133677, "loss": 1.2767, "step": 432 }, { "epoch": 0.12066322976173889, "grad_norm": 0.03972227946541685, "learning_rate": 0.0002996114018298349, "loss": 1.2567, "step": 433 }, { "epoch": 0.12094189772885607, "grad_norm": 0.031546992210430065, "learning_rate": 0.00029960083290477864, "loss": 1.2029, "step": 434 }, { "epoch": 0.12122056569597325, "grad_norm": 0.03317043527583936, "learning_rate": 0.00029959012236851275, "loss": 1.2558, "step": 435 }, { "epoch": 0.12149923366309043, "grad_norm": 0.0383371055241894, "learning_rate": 0.0002995792702311757, "loss": 1.2142, "step": 436 }, { "epoch": 0.1217779016302076, "grad_norm": 0.034336297689633484, "learning_rate": 0.00029956827650304014, "loss": 1.2062, "step": 437 }, { "epoch": 0.12205656959732479, "grad_norm": 0.03357746221127229, "learning_rate": 0.00029955714119451256, "loss": 1.2587, "step": 438 }, { "epoch": 0.12233523756444196, "grad_norm": 0.03707488185503544, "learning_rate": 0.00029954586431613366, "loss": 1.2074, "step": 439 }, { "epoch": 0.12261390553155915, "grad_norm": 0.033948517174638336, "learning_rate": 0.00029953444587857805, "loss": 1.3066, "step": 440 }, { "epoch": 0.12289257349867633, "grad_norm": 0.03539583137476195, "learning_rate": 0.0002995228858926543, "loss": 1.2565, "step": 441 }, { "epoch": 0.1231712414657935, "grad_norm": 0.03645684236669165, "learning_rate": 0.00029951118436930506, "loss": 1.2291, "step": 442 }, { "epoch": 0.12344990943291069, "grad_norm": 0.033057904710367306, "learning_rate": 0.0002994993413196069, "loss": 1.2359, "step": 443 }, { "epoch": 0.12372857740002786, "grad_norm": 0.037553448337432724, "learning_rate": 0.0002994873567547704, "loss": 1.2173, "step": 444 }, { "epoch": 0.12400724536714505, "grad_norm": 0.03578952287041568, "learning_rate": 0.00029947523068614006, "loss": 1.3233, "step": 445 }, { "epoch": 0.12428591333426223, "grad_norm": 0.03250365624640425, "learning_rate": 0.0002994629631251943, "loss": 1.2996, "step": 446 }, { "epoch": 0.1245645813013794, "grad_norm": 0.03270295403211792, "learning_rate": 0.00029945055408354563, "loss": 1.2841, "step": 447 }, { "epoch": 0.12484324926849659, "grad_norm": 0.03460972993350578, "learning_rate": 0.00029943800357294024, "loss": 1.2079, "step": 448 }, { "epoch": 0.12512191723561378, "grad_norm": 0.03492846705329515, "learning_rate": 0.00029942531160525844, "loss": 1.2893, "step": 449 }, { "epoch": 0.12540058520273095, "grad_norm": 0.03904407603803568, "learning_rate": 0.0002994124781925144, "loss": 1.1778, "step": 450 }, { "epoch": 0.12567925316984813, "grad_norm": 0.03645101985453827, "learning_rate": 0.000299399503346856, "loss": 1.2125, "step": 451 }, { "epoch": 0.1259579211369653, "grad_norm": 0.03310908039156772, "learning_rate": 0.00029938638708056526, "loss": 1.2418, "step": 452 }, { "epoch": 0.12623658910408248, "grad_norm": 0.03185167098624464, "learning_rate": 0.000299373129406058, "loss": 1.1824, "step": 453 }, { "epoch": 0.12651525707119968, "grad_norm": 0.03855146676566342, "learning_rate": 0.00029935973033588375, "loss": 1.2762, "step": 454 }, { "epoch": 0.12679392503831685, "grad_norm": 0.03633507123880622, "learning_rate": 0.00029934618988272606, "loss": 1.2846, "step": 455 }, { "epoch": 0.12707259300543403, "grad_norm": 0.031785197553858245, "learning_rate": 0.00029933250805940214, "loss": 1.2949, "step": 456 }, { "epoch": 0.1273512609725512, "grad_norm": 0.03112789802000031, "learning_rate": 0.0002993186848788633, "loss": 1.2509, "step": 457 }, { "epoch": 0.12762992893966837, "grad_norm": 0.04045628336819305, "learning_rate": 0.00029930472035419423, "loss": 1.3006, "step": 458 }, { "epoch": 0.12790859690678558, "grad_norm": 0.0352409848550557, "learning_rate": 0.00029929061449861385, "loss": 1.2663, "step": 459 }, { "epoch": 0.12818726487390275, "grad_norm": 0.033619411707288334, "learning_rate": 0.0002992763673254746, "loss": 1.237, "step": 460 }, { "epoch": 0.12846593284101993, "grad_norm": 0.0341338623463431, "learning_rate": 0.0002992619788482628, "loss": 1.2686, "step": 461 }, { "epoch": 0.1287446008081371, "grad_norm": 0.03915613674977482, "learning_rate": 0.0002992474490805985, "loss": 1.3115, "step": 462 }, { "epoch": 0.12902326877525427, "grad_norm": 0.03361836944766046, "learning_rate": 0.0002992327780362354, "loss": 1.1914, "step": 463 }, { "epoch": 0.12930193674237148, "grad_norm": 0.034595143623701856, "learning_rate": 0.00029921796572906107, "loss": 1.2051, "step": 464 }, { "epoch": 0.12958060470948865, "grad_norm": 0.03211630638499192, "learning_rate": 0.0002992030121730968, "loss": 1.2026, "step": 465 }, { "epoch": 0.12985927267660582, "grad_norm": 0.03606865234814787, "learning_rate": 0.00029918791738249746, "loss": 1.2491, "step": 466 }, { "epoch": 0.130137940643723, "grad_norm": 0.031835350096135494, "learning_rate": 0.00029917268137155174, "loss": 1.2797, "step": 467 }, { "epoch": 0.13041660861084017, "grad_norm": 0.03234378986209017, "learning_rate": 0.0002991573041546819, "loss": 1.2886, "step": 468 }, { "epoch": 0.13069527657795738, "grad_norm": 0.031821021072897054, "learning_rate": 0.00029914178574644393, "loss": 1.2096, "step": 469 }, { "epoch": 0.13097394454507455, "grad_norm": 0.03819777118972632, "learning_rate": 0.00029912612616152744, "loss": 1.1984, "step": 470 }, { "epoch": 0.13125261251219172, "grad_norm": 0.03199605488457973, "learning_rate": 0.00029911032541475574, "loss": 1.1356, "step": 471 }, { "epoch": 0.1315312804793089, "grad_norm": 0.035309271728970665, "learning_rate": 0.00029909438352108573, "loss": 1.2378, "step": 472 }, { "epoch": 0.13180994844642607, "grad_norm": 0.03657428784633037, "learning_rate": 0.0002990783004956079, "loss": 1.2443, "step": 473 }, { "epoch": 0.13208861641354327, "grad_norm": 0.029126732119635245, "learning_rate": 0.00029906207635354627, "loss": 1.287, "step": 474 }, { "epoch": 0.13236728438066045, "grad_norm": 0.03182437609782121, "learning_rate": 0.0002990457111102586, "loss": 1.2996, "step": 475 }, { "epoch": 0.13264595234777762, "grad_norm": 0.040738025294536004, "learning_rate": 0.00029902920478123605, "loss": 1.2722, "step": 476 }, { "epoch": 0.1329246203148948, "grad_norm": 0.03350161020410013, "learning_rate": 0.00029901255738210345, "loss": 1.2111, "step": 477 }, { "epoch": 0.13320328828201197, "grad_norm": 0.030905457706756186, "learning_rate": 0.0002989957689286191, "loss": 1.2367, "step": 478 }, { "epoch": 0.13348195624912917, "grad_norm": 0.031132391138530016, "learning_rate": 0.0002989788394366749, "loss": 1.3186, "step": 479 }, { "epoch": 0.13376062421624635, "grad_norm": 0.03201412261755017, "learning_rate": 0.00029896176892229615, "loss": 1.2645, "step": 480 }, { "epoch": 0.13403929218336352, "grad_norm": 0.03063610548219971, "learning_rate": 0.0002989445574016417, "loss": 1.3161, "step": 481 }, { "epoch": 0.1343179601504807, "grad_norm": 0.0337931928862511, "learning_rate": 0.00029892720489100384, "loss": 1.2351, "step": 482 }, { "epoch": 0.13459662811759787, "grad_norm": 0.035012465954637105, "learning_rate": 0.00029890971140680844, "loss": 1.227, "step": 483 }, { "epoch": 0.13487529608471507, "grad_norm": 0.033595896523660414, "learning_rate": 0.0002988920769656146, "loss": 1.2131, "step": 484 }, { "epoch": 0.13515396405183225, "grad_norm": 0.03155037063534228, "learning_rate": 0.000298874301584115, "loss": 1.2174, "step": 485 }, { "epoch": 0.13543263201894942, "grad_norm": 0.031229201614909972, "learning_rate": 0.0002988563852791358, "loss": 1.2766, "step": 486 }, { "epoch": 0.1357112999860666, "grad_norm": 0.032856623622484, "learning_rate": 0.0002988383280676364, "loss": 1.2575, "step": 487 }, { "epoch": 0.13598996795318377, "grad_norm": 0.032561315733754224, "learning_rate": 0.0002988201299667097, "loss": 1.239, "step": 488 }, { "epoch": 0.13626863592030097, "grad_norm": 0.034367899165125775, "learning_rate": 0.00029880179099358174, "loss": 1.2217, "step": 489 }, { "epoch": 0.13654730388741815, "grad_norm": 0.03293640728598069, "learning_rate": 0.00029878331116561227, "loss": 1.2764, "step": 490 }, { "epoch": 0.13682597185453532, "grad_norm": 0.03428048656113143, "learning_rate": 0.00029876469050029407, "loss": 1.2592, "step": 491 }, { "epoch": 0.1371046398216525, "grad_norm": 0.033315319173189306, "learning_rate": 0.00029874592901525334, "loss": 1.2705, "step": 492 }, { "epoch": 0.13738330778876967, "grad_norm": 0.031956669597670365, "learning_rate": 0.00029872702672824965, "loss": 1.2278, "step": 493 }, { "epoch": 0.13766197575588687, "grad_norm": 0.03333250715974156, "learning_rate": 0.00029870798365717573, "loss": 1.2868, "step": 494 }, { "epoch": 0.13794064372300405, "grad_norm": 0.03492804654421135, "learning_rate": 0.00029868879982005765, "loss": 1.2868, "step": 495 }, { "epoch": 0.13821931169012122, "grad_norm": 0.030721366537666106, "learning_rate": 0.00029866947523505476, "loss": 1.2526, "step": 496 }, { "epoch": 0.1384979796572384, "grad_norm": 0.030909108961067182, "learning_rate": 0.0002986500099204595, "loss": 1.2494, "step": 497 }, { "epoch": 0.13877664762435557, "grad_norm": 0.035973346515423645, "learning_rate": 0.00029863040389469767, "loss": 1.2724, "step": 498 }, { "epoch": 0.13905531559147277, "grad_norm": 0.031047008530174898, "learning_rate": 0.00029861065717632816, "loss": 1.2694, "step": 499 }, { "epoch": 0.13933398355858995, "grad_norm": 0.03299056646373238, "learning_rate": 0.00029859076978404315, "loss": 1.235, "step": 500 }, { "epoch": 0.13961265152570712, "grad_norm": 0.0310670980812163, "learning_rate": 0.00029857074173666797, "loss": 1.239, "step": 501 }, { "epoch": 0.1398913194928243, "grad_norm": 0.03216683795773354, "learning_rate": 0.00029855057305316093, "loss": 1.2159, "step": 502 }, { "epoch": 0.14016998745994147, "grad_norm": 0.03023645604100203, "learning_rate": 0.0002985302637526136, "loss": 1.2271, "step": 503 }, { "epoch": 0.14044865542705867, "grad_norm": 0.03490265189031319, "learning_rate": 0.00029850981385425067, "loss": 1.2858, "step": 504 }, { "epoch": 0.14072732339417585, "grad_norm": 0.03576928647898232, "learning_rate": 0.00029848922337742987, "loss": 1.3171, "step": 505 }, { "epoch": 0.14100599136129302, "grad_norm": 0.02977549560371122, "learning_rate": 0.00029846849234164204, "loss": 1.2665, "step": 506 }, { "epoch": 0.1412846593284102, "grad_norm": 0.02928962305237433, "learning_rate": 0.000298447620766511, "loss": 1.2323, "step": 507 }, { "epoch": 0.14156332729552737, "grad_norm": 0.03186165715883666, "learning_rate": 0.0002984266086717937, "loss": 1.2604, "step": 508 }, { "epoch": 0.14184199526264457, "grad_norm": 0.033459698551049216, "learning_rate": 0.00029840545607738003, "loss": 1.2309, "step": 509 }, { "epoch": 0.14212066322976175, "grad_norm": 0.03596719398554589, "learning_rate": 0.00029838416300329295, "loss": 1.1948, "step": 510 }, { "epoch": 0.14239933119687892, "grad_norm": 0.03303415092755014, "learning_rate": 0.0002983627294696883, "loss": 1.2035, "step": 511 }, { "epoch": 0.1426779991639961, "grad_norm": 0.032316451309728045, "learning_rate": 0.0002983411554968549, "loss": 1.3255, "step": 512 }, { "epoch": 0.14295666713111327, "grad_norm": 0.033415792824567546, "learning_rate": 0.0002983194411052146, "loss": 1.2359, "step": 513 }, { "epoch": 0.14323533509823047, "grad_norm": 0.033887147296120315, "learning_rate": 0.0002982975863153221, "loss": 1.3031, "step": 514 }, { "epoch": 0.14351400306534765, "grad_norm": 0.030668764375619578, "learning_rate": 0.000298275591147865, "loss": 1.2407, "step": 515 }, { "epoch": 0.14379267103246482, "grad_norm": 0.03610754168311715, "learning_rate": 0.0002982534556236637, "loss": 1.2381, "step": 516 }, { "epoch": 0.144071338999582, "grad_norm": 0.03372038523111608, "learning_rate": 0.0002982311797636717, "loss": 1.3252, "step": 517 }, { "epoch": 0.14435000696669917, "grad_norm": 0.03574905359592408, "learning_rate": 0.0002982087635889751, "loss": 1.2264, "step": 518 }, { "epoch": 0.14462867493381637, "grad_norm": 0.0360185442616471, "learning_rate": 0.00029818620712079294, "loss": 1.2593, "step": 519 }, { "epoch": 0.14490734290093354, "grad_norm": 0.031288492576710285, "learning_rate": 0.00029816351038047703, "loss": 1.189, "step": 520 }, { "epoch": 0.14518601086805072, "grad_norm": 0.030435081274563396, "learning_rate": 0.0002981406733895119, "loss": 1.201, "step": 521 }, { "epoch": 0.1454646788351679, "grad_norm": 0.031118289159488388, "learning_rate": 0.00029811769616951497, "loss": 1.2347, "step": 522 }, { "epoch": 0.14574334680228507, "grad_norm": 0.03926472912524905, "learning_rate": 0.00029809457874223637, "loss": 1.2619, "step": 523 }, { "epoch": 0.14602201476940227, "grad_norm": 0.039399631484528554, "learning_rate": 0.0002980713211295588, "loss": 1.2289, "step": 524 }, { "epoch": 0.14630068273651944, "grad_norm": 0.032473836475091625, "learning_rate": 0.00029804792335349784, "loss": 1.2766, "step": 525 }, { "epoch": 0.14657935070363662, "grad_norm": 0.03719190506045399, "learning_rate": 0.00029802438543620165, "loss": 1.2449, "step": 526 }, { "epoch": 0.1468580186707538, "grad_norm": 0.04144503592101877, "learning_rate": 0.0002980007073999511, "loss": 1.261, "step": 527 }, { "epoch": 0.14713668663787097, "grad_norm": 0.032103437974660975, "learning_rate": 0.0002979768892671597, "loss": 1.323, "step": 528 }, { "epoch": 0.14741535460498817, "grad_norm": 0.03480453496660747, "learning_rate": 0.0002979529310603734, "loss": 1.1901, "step": 529 }, { "epoch": 0.14769402257210534, "grad_norm": 0.040095141040747295, "learning_rate": 0.0002979288328022711, "loss": 1.1765, "step": 530 }, { "epoch": 0.14797269053922252, "grad_norm": 0.03578511532883455, "learning_rate": 0.00029790459451566384, "loss": 1.277, "step": 531 }, { "epoch": 0.1482513585063397, "grad_norm": 0.03703695166279059, "learning_rate": 0.00029788021622349564, "loss": 1.2488, "step": 532 }, { "epoch": 0.14853002647345687, "grad_norm": 0.03664150072963262, "learning_rate": 0.0002978556979488428, "loss": 1.2072, "step": 533 }, { "epoch": 0.14880869444057407, "grad_norm": 0.03143720108833376, "learning_rate": 0.000297831039714914, "loss": 1.2478, "step": 534 }, { "epoch": 0.14908736240769124, "grad_norm": 0.03501965310304291, "learning_rate": 0.0002978062415450507, "loss": 1.1644, "step": 535 }, { "epoch": 0.14936603037480842, "grad_norm": 0.031918968581831936, "learning_rate": 0.00029778130346272663, "loss": 1.2491, "step": 536 }, { "epoch": 0.1496446983419256, "grad_norm": 0.03529559366554864, "learning_rate": 0.0002977562254915481, "loss": 1.226, "step": 537 }, { "epoch": 0.14992336630904277, "grad_norm": 0.02869805154659447, "learning_rate": 0.00029773100765525374, "loss": 1.2417, "step": 538 }, { "epoch": 0.15020203427615997, "grad_norm": 0.03670009181658782, "learning_rate": 0.0002977056499777145, "loss": 1.2407, "step": 539 }, { "epoch": 0.15048070224327714, "grad_norm": 0.03808820094143069, "learning_rate": 0.000297680152482934, "loss": 1.2254, "step": 540 }, { "epoch": 0.15075937021039432, "grad_norm": 0.041487239677854774, "learning_rate": 0.00029765451519504776, "loss": 1.1868, "step": 541 }, { "epoch": 0.1510380381775115, "grad_norm": 0.03373119156959721, "learning_rate": 0.00029762873813832404, "loss": 1.2154, "step": 542 }, { "epoch": 0.15131670614462867, "grad_norm": 0.03833210851105886, "learning_rate": 0.0002976028213371632, "loss": 1.3028, "step": 543 }, { "epoch": 0.15159537411174587, "grad_norm": 0.041791226798413454, "learning_rate": 0.00029757676481609796, "loss": 1.2254, "step": 544 }, { "epoch": 0.15187404207886304, "grad_norm": 0.03789009759964915, "learning_rate": 0.00029755056859979317, "loss": 1.2343, "step": 545 }, { "epoch": 0.15215271004598022, "grad_norm": 0.03643837204237201, "learning_rate": 0.0002975242327130461, "loss": 1.251, "step": 546 }, { "epoch": 0.1524313780130974, "grad_norm": 0.03571554273337526, "learning_rate": 0.00029749775718078604, "loss": 1.2218, "step": 547 }, { "epoch": 0.15271004598021456, "grad_norm": 0.031134952343501825, "learning_rate": 0.00029747114202807463, "loss": 1.1644, "step": 548 }, { "epoch": 0.15298871394733177, "grad_norm": 0.03634100927399638, "learning_rate": 0.0002974443872801056, "loss": 1.2851, "step": 549 }, { "epoch": 0.15326738191444894, "grad_norm": 0.03304674248038632, "learning_rate": 0.0002974174929622048, "loss": 1.2788, "step": 550 }, { "epoch": 0.15354604988156612, "grad_norm": 0.032820736175422006, "learning_rate": 0.0002973904590998302, "loss": 1.2034, "step": 551 }, { "epoch": 0.1538247178486833, "grad_norm": 0.031244760451738827, "learning_rate": 0.00029736328571857204, "loss": 1.2452, "step": 552 }, { "epoch": 0.15410338581580046, "grad_norm": 0.040956440442661265, "learning_rate": 0.0002973359728441522, "loss": 1.2927, "step": 553 }, { "epoch": 0.15438205378291767, "grad_norm": 0.03338693989662646, "learning_rate": 0.00029730852050242516, "loss": 1.2907, "step": 554 }, { "epoch": 0.15466072175003484, "grad_norm": 0.031658219981052345, "learning_rate": 0.00029728092871937693, "loss": 1.2146, "step": 555 }, { "epoch": 0.15493938971715202, "grad_norm": 0.033175358507475716, "learning_rate": 0.0002972531975211258, "loss": 1.2191, "step": 556 }, { "epoch": 0.1552180576842692, "grad_norm": 0.037086362514976545, "learning_rate": 0.00029722532693392197, "loss": 1.2304, "step": 557 }, { "epoch": 0.15549672565138636, "grad_norm": 0.0348739053817665, "learning_rate": 0.0002971973169841475, "loss": 1.272, "step": 558 }, { "epoch": 0.15577539361850357, "grad_norm": 0.031686943555653366, "learning_rate": 0.0002971691676983165, "loss": 1.2715, "step": 559 }, { "epoch": 0.15605406158562074, "grad_norm": 0.0317423085415442, "learning_rate": 0.00029714087910307483, "loss": 1.1952, "step": 560 }, { "epoch": 0.15633272955273791, "grad_norm": 0.0344709644544451, "learning_rate": 0.00029711245122520036, "loss": 1.2642, "step": 561 }, { "epoch": 0.1566113975198551, "grad_norm": 0.03673359874586938, "learning_rate": 0.00029708388409160273, "loss": 1.2384, "step": 562 }, { "epoch": 0.15689006548697226, "grad_norm": 0.0318497836612939, "learning_rate": 0.0002970551777293234, "loss": 1.1984, "step": 563 }, { "epoch": 0.15716873345408947, "grad_norm": 0.0324160339479552, "learning_rate": 0.0002970263321655356, "loss": 1.2147, "step": 564 }, { "epoch": 0.15744740142120664, "grad_norm": 0.03386989898974232, "learning_rate": 0.00029699734742754443, "loss": 1.2495, "step": 565 }, { "epoch": 0.15772606938832381, "grad_norm": 0.03228701603921586, "learning_rate": 0.00029696822354278665, "loss": 1.3367, "step": 566 }, { "epoch": 0.158004737355441, "grad_norm": 0.03326984521923263, "learning_rate": 0.00029693896053883067, "loss": 1.168, "step": 567 }, { "epoch": 0.15828340532255816, "grad_norm": 0.03179131724831811, "learning_rate": 0.0002969095584433767, "loss": 1.3088, "step": 568 }, { "epoch": 0.15856207328967536, "grad_norm": 0.031872336210780325, "learning_rate": 0.00029688001728425667, "loss": 1.2484, "step": 569 }, { "epoch": 0.15884074125679254, "grad_norm": 0.0320352788594697, "learning_rate": 0.0002968503370894339, "loss": 1.287, "step": 570 }, { "epoch": 0.1591194092239097, "grad_norm": 0.032899027637249584, "learning_rate": 0.0002968205178870036, "loss": 1.252, "step": 571 }, { "epoch": 0.1593980771910269, "grad_norm": 0.03137680111739331, "learning_rate": 0.0002967905597051924, "loss": 1.2085, "step": 572 }, { "epoch": 0.15967674515814406, "grad_norm": 0.030183576463153537, "learning_rate": 0.00029676046257235853, "loss": 1.2402, "step": 573 }, { "epoch": 0.15995541312526126, "grad_norm": 0.031081475781780624, "learning_rate": 0.0002967302265169917, "loss": 1.2821, "step": 574 }, { "epoch": 0.16023408109237844, "grad_norm": 0.03629937006181477, "learning_rate": 0.00029669985156771325, "loss": 1.2958, "step": 575 }, { "epoch": 0.1605127490594956, "grad_norm": 0.03216426108447607, "learning_rate": 0.00029666933775327587, "loss": 1.2936, "step": 576 }, { "epoch": 0.1607914170266128, "grad_norm": 0.03168640968755723, "learning_rate": 0.00029663868510256377, "loss": 1.2086, "step": 577 }, { "epoch": 0.16107008499372996, "grad_norm": 0.03150168080151473, "learning_rate": 0.0002966078936445925, "loss": 1.2073, "step": 578 }, { "epoch": 0.16134875296084716, "grad_norm": 0.03276230293914223, "learning_rate": 0.0002965769634085091, "loss": 1.1953, "step": 579 }, { "epoch": 0.16162742092796434, "grad_norm": 0.030783589764623386, "learning_rate": 0.000296545894423592, "loss": 1.1728, "step": 580 }, { "epoch": 0.1619060888950815, "grad_norm": 0.033772694546961135, "learning_rate": 0.0002965146867192508, "loss": 1.2295, "step": 581 }, { "epoch": 0.1621847568621987, "grad_norm": 0.02991866021141229, "learning_rate": 0.0002964833403250265, "loss": 1.2591, "step": 582 }, { "epoch": 0.16246342482931586, "grad_norm": 0.03312117995199067, "learning_rate": 0.00029645185527059153, "loss": 1.2017, "step": 583 }, { "epoch": 0.16274209279643306, "grad_norm": 0.03244403766308769, "learning_rate": 0.0002964202315857494, "loss": 1.1959, "step": 584 }, { "epoch": 0.16302076076355024, "grad_norm": 0.02718651779594869, "learning_rate": 0.0002963884693004348, "loss": 1.2627, "step": 585 }, { "epoch": 0.1632994287306674, "grad_norm": 0.034266041769027096, "learning_rate": 0.00029635656844471383, "loss": 1.2266, "step": 586 }, { "epoch": 0.1635780966977846, "grad_norm": 0.03192357554309568, "learning_rate": 0.0002963245290487836, "loss": 1.1535, "step": 587 }, { "epoch": 0.16385676466490176, "grad_norm": 0.03372433295288647, "learning_rate": 0.00029629235114297237, "loss": 1.2997, "step": 588 }, { "epoch": 0.16413543263201896, "grad_norm": 0.034452958439948445, "learning_rate": 0.00029626003475773956, "loss": 1.2374, "step": 589 }, { "epoch": 0.16441410059913614, "grad_norm": 0.032081952824296975, "learning_rate": 0.00029622757992367563, "loss": 1.2179, "step": 590 }, { "epoch": 0.1646927685662533, "grad_norm": 0.031092513223434745, "learning_rate": 0.0002961949866715022, "loss": 1.2282, "step": 591 }, { "epoch": 0.16497143653337049, "grad_norm": 0.029891365122045363, "learning_rate": 0.00029616225503207173, "loss": 1.2421, "step": 592 }, { "epoch": 0.16525010450048766, "grad_norm": 0.031704762444395074, "learning_rate": 0.00029612938503636787, "loss": 1.2287, "step": 593 }, { "epoch": 0.16552877246760486, "grad_norm": 0.03637187071353171, "learning_rate": 0.0002960963767155052, "loss": 1.1959, "step": 594 }, { "epoch": 0.16580744043472204, "grad_norm": 0.031505137746574215, "learning_rate": 0.000296063230100729, "loss": 1.2229, "step": 595 }, { "epoch": 0.1660861084018392, "grad_norm": 0.03555381812891544, "learning_rate": 0.0002960299452234158, "loss": 1.2123, "step": 596 }, { "epoch": 0.16636477636895639, "grad_norm": 0.0321243515300127, "learning_rate": 0.00029599652211507283, "loss": 1.3055, "step": 597 }, { "epoch": 0.16664344433607356, "grad_norm": 0.04025715709968515, "learning_rate": 0.0002959629608073382, "loss": 1.2572, "step": 598 }, { "epoch": 0.16692211230319076, "grad_norm": 0.033381819286415544, "learning_rate": 0.0002959292613319808, "loss": 1.2345, "step": 599 }, { "epoch": 0.16720078027030794, "grad_norm": 0.030751612053479132, "learning_rate": 0.00029589542372090034, "loss": 1.2739, "step": 600 }, { "epoch": 0.1674794482374251, "grad_norm": 0.02898629192644695, "learning_rate": 0.0002958614480061274, "loss": 1.2793, "step": 601 }, { "epoch": 0.16775811620454228, "grad_norm": 0.033620128571143285, "learning_rate": 0.000295827334219823, "loss": 1.2723, "step": 602 }, { "epoch": 0.16803678417165946, "grad_norm": 0.037738221101748196, "learning_rate": 0.00029579308239427915, "loss": 1.2216, "step": 603 }, { "epoch": 0.16831545213877666, "grad_norm": 0.03420358718964332, "learning_rate": 0.0002957586925619184, "loss": 1.2385, "step": 604 }, { "epoch": 0.16859412010589384, "grad_norm": 0.03274345740442958, "learning_rate": 0.00029572416475529386, "loss": 1.224, "step": 605 }, { "epoch": 0.168872788073011, "grad_norm": 0.0333164458773006, "learning_rate": 0.00029568949900708947, "loss": 1.232, "step": 606 }, { "epoch": 0.16915145604012818, "grad_norm": 0.03448125589579309, "learning_rate": 0.0002956546953501196, "loss": 1.1926, "step": 607 }, { "epoch": 0.16943012400724536, "grad_norm": 0.03906731910986016, "learning_rate": 0.0002956197538173291, "loss": 1.2352, "step": 608 }, { "epoch": 0.16970879197436256, "grad_norm": 0.036285160064337345, "learning_rate": 0.00029558467444179337, "loss": 1.2525, "step": 609 }, { "epoch": 0.16998745994147973, "grad_norm": 0.03109105868641257, "learning_rate": 0.0002955494572567184, "loss": 1.2099, "step": 610 }, { "epoch": 0.1702661279085969, "grad_norm": 0.0342747522489378, "learning_rate": 0.0002955141022954406, "loss": 1.2677, "step": 611 }, { "epoch": 0.17054479587571408, "grad_norm": 0.03653682785157159, "learning_rate": 0.00029547860959142666, "loss": 1.2437, "step": 612 }, { "epoch": 0.17082346384283126, "grad_norm": 0.03397779640876986, "learning_rate": 0.0002954429791782738, "loss": 1.1994, "step": 613 }, { "epoch": 0.17110213180994846, "grad_norm": 0.030746933264785604, "learning_rate": 0.00029540721108970954, "loss": 1.3327, "step": 614 }, { "epoch": 0.17138079977706563, "grad_norm": 0.030562487089842265, "learning_rate": 0.0002953713053595917, "loss": 1.1881, "step": 615 }, { "epoch": 0.1716594677441828, "grad_norm": 0.03582496324166883, "learning_rate": 0.00029533526202190843, "loss": 1.2207, "step": 616 }, { "epoch": 0.17193813571129998, "grad_norm": 0.03155702496297025, "learning_rate": 0.0002952990811107782, "loss": 1.1687, "step": 617 }, { "epoch": 0.17221680367841716, "grad_norm": 0.03387361723792257, "learning_rate": 0.0002952627626604495, "loss": 1.2749, "step": 618 }, { "epoch": 0.17249547164553436, "grad_norm": 0.030352428002168228, "learning_rate": 0.0002952263067053013, "loss": 1.2139, "step": 619 }, { "epoch": 0.17277413961265153, "grad_norm": 0.03892828513164867, "learning_rate": 0.0002951897132798424, "loss": 1.1723, "step": 620 }, { "epoch": 0.1730528075797687, "grad_norm": 0.03346392210882643, "learning_rate": 0.0002951529824187121, "loss": 1.2637, "step": 621 }, { "epoch": 0.17333147554688588, "grad_norm": 0.03235125482908361, "learning_rate": 0.00029511611415667944, "loss": 1.2801, "step": 622 }, { "epoch": 0.17361014351400306, "grad_norm": 0.2762380873792376, "learning_rate": 0.00029507910852864377, "loss": 1.2532, "step": 623 }, { "epoch": 0.17388881148112026, "grad_norm": 0.03410112304217243, "learning_rate": 0.00029504196556963435, "loss": 1.2076, "step": 624 }, { "epoch": 0.17416747944823743, "grad_norm": 0.034177238055424455, "learning_rate": 0.00029500468531481057, "loss": 1.2414, "step": 625 }, { "epoch": 0.1744461474153546, "grad_norm": 0.032246423798919226, "learning_rate": 0.0002949672677994615, "loss": 1.2103, "step": 626 }, { "epoch": 0.17472481538247178, "grad_norm": 0.030585096241688467, "learning_rate": 0.0002949297130590065, "loss": 1.2502, "step": 627 }, { "epoch": 0.17500348334958896, "grad_norm": 0.0341517878675751, "learning_rate": 0.00029489202112899454, "loss": 1.2481, "step": 628 }, { "epoch": 0.17528215131670616, "grad_norm": 0.03206322106789338, "learning_rate": 0.00029485419204510464, "loss": 1.3309, "step": 629 }, { "epoch": 0.17556081928382333, "grad_norm": 0.03640053053395937, "learning_rate": 0.00029481622584314555, "loss": 1.2486, "step": 630 }, { "epoch": 0.1758394872509405, "grad_norm": 0.0324733111254804, "learning_rate": 0.00029477812255905583, "loss": 1.2104, "step": 631 }, { "epoch": 0.17611815521805768, "grad_norm": 0.031243407334748403, "learning_rate": 0.0002947398822289037, "loss": 1.2289, "step": 632 }, { "epoch": 0.17639682318517486, "grad_norm": 0.03685364274928173, "learning_rate": 0.00029470150488888745, "loss": 1.2338, "step": 633 }, { "epoch": 0.17667549115229203, "grad_norm": 0.0352694192880338, "learning_rate": 0.00029466299057533465, "loss": 1.2853, "step": 634 }, { "epoch": 0.17695415911940923, "grad_norm": 0.04186145268437417, "learning_rate": 0.0002946243393247028, "loss": 1.3655, "step": 635 }, { "epoch": 0.1772328270865264, "grad_norm": 0.03345862020308538, "learning_rate": 0.0002945855511735788, "loss": 1.2819, "step": 636 }, { "epoch": 0.17751149505364358, "grad_norm": 0.0288004768785742, "learning_rate": 0.0002945466261586795, "loss": 1.2515, "step": 637 }, { "epoch": 0.17779016302076076, "grad_norm": 0.0306240592130457, "learning_rate": 0.00029450756431685085, "loss": 1.218, "step": 638 }, { "epoch": 0.17806883098787793, "grad_norm": 0.03606719882729208, "learning_rate": 0.00029446836568506864, "loss": 1.2691, "step": 639 }, { "epoch": 0.17834749895499513, "grad_norm": 0.0309885243642602, "learning_rate": 0.00029442903030043807, "loss": 1.271, "step": 640 }, { "epoch": 0.1786261669221123, "grad_norm": 0.028897609854402376, "learning_rate": 0.00029438955820019374, "loss": 1.2222, "step": 641 }, { "epoch": 0.17890483488922948, "grad_norm": 0.03196798079120124, "learning_rate": 0.00029434994942169963, "loss": 1.2165, "step": 642 }, { "epoch": 0.17918350285634665, "grad_norm": 0.02918640637911947, "learning_rate": 0.00029431020400244926, "loss": 1.2051, "step": 643 }, { "epoch": 0.17946217082346383, "grad_norm": 0.03445123119382498, "learning_rate": 0.0002942703219800654, "loss": 1.1739, "step": 644 }, { "epoch": 0.17974083879058103, "grad_norm": 0.03293927085376705, "learning_rate": 0.00029423030339230003, "loss": 1.2883, "step": 645 }, { "epoch": 0.1800195067576982, "grad_norm": 0.038212546053950074, "learning_rate": 0.00029419014827703453, "loss": 1.2669, "step": 646 }, { "epoch": 0.18029817472481538, "grad_norm": 0.029862536421020484, "learning_rate": 0.0002941498566722795, "loss": 1.2296, "step": 647 }, { "epoch": 0.18057684269193255, "grad_norm": 0.03250391378026314, "learning_rate": 0.00029410942861617475, "loss": 1.1705, "step": 648 }, { "epoch": 0.18085551065904973, "grad_norm": 0.03406600653519755, "learning_rate": 0.0002940688641469892, "loss": 1.2075, "step": 649 }, { "epoch": 0.18113417862616693, "grad_norm": 0.030770298578135007, "learning_rate": 0.00029402816330312086, "loss": 1.3156, "step": 650 }, { "epoch": 0.1814128465932841, "grad_norm": 0.029217063155958806, "learning_rate": 0.00029398732612309695, "loss": 1.1711, "step": 651 }, { "epoch": 0.18169151456040128, "grad_norm": 0.033373323247100545, "learning_rate": 0.00029394635264557367, "loss": 1.195, "step": 652 }, { "epoch": 0.18197018252751845, "grad_norm": 0.03198744426971185, "learning_rate": 0.0002939052429093363, "loss": 1.2467, "step": 653 }, { "epoch": 0.18224885049463563, "grad_norm": 0.032672717037886406, "learning_rate": 0.00029386399695329905, "loss": 1.1986, "step": 654 }, { "epoch": 0.18252751846175283, "grad_norm": 0.033917474512994754, "learning_rate": 0.00029382261481650506, "loss": 1.2269, "step": 655 }, { "epoch": 0.18280618642887, "grad_norm": 0.02976741701702417, "learning_rate": 0.00029378109653812635, "loss": 1.2054, "step": 656 }, { "epoch": 0.18308485439598718, "grad_norm": 0.03725221714704739, "learning_rate": 0.0002937394421574639, "loss": 1.2206, "step": 657 }, { "epoch": 0.18336352236310435, "grad_norm": 0.03284488691461176, "learning_rate": 0.00029369765171394756, "loss": 1.2628, "step": 658 }, { "epoch": 0.18364219033022153, "grad_norm": 0.03125819754063577, "learning_rate": 0.00029365572524713576, "loss": 1.1859, "step": 659 }, { "epoch": 0.18392085829733873, "grad_norm": 0.029615376234914666, "learning_rate": 0.000293613662796716, "loss": 1.2494, "step": 660 }, { "epoch": 0.1841995262644559, "grad_norm": 0.03178442206629729, "learning_rate": 0.0002935714644025042, "loss": 1.2376, "step": 661 }, { "epoch": 0.18447819423157308, "grad_norm": 0.031117852597061964, "learning_rate": 0.0002935291301044452, "loss": 1.0939, "step": 662 }, { "epoch": 0.18475686219869025, "grad_norm": 0.03474969669223066, "learning_rate": 0.0002934866599426122, "loss": 1.2363, "step": 663 }, { "epoch": 0.18503553016580743, "grad_norm": 0.0319012233603725, "learning_rate": 0.00029344405395720737, "loss": 1.176, "step": 664 }, { "epoch": 0.18531419813292463, "grad_norm": 0.028796626783525788, "learning_rate": 0.00029340131218856124, "loss": 1.1909, "step": 665 }, { "epoch": 0.1855928661000418, "grad_norm": 0.028860764015560723, "learning_rate": 0.0002933584346771328, "loss": 1.2801, "step": 666 }, { "epoch": 0.18587153406715898, "grad_norm": 0.034264172369688574, "learning_rate": 0.0002933154214635097, "loss": 1.2863, "step": 667 }, { "epoch": 0.18615020203427615, "grad_norm": 0.03388705464826639, "learning_rate": 0.00029327227258840803, "loss": 1.2558, "step": 668 }, { "epoch": 0.18642887000139333, "grad_norm": 0.032576081713449334, "learning_rate": 0.0002932289880926721, "loss": 1.2187, "step": 669 }, { "epoch": 0.18670753796851053, "grad_norm": 0.035029091333189113, "learning_rate": 0.00029318556801727493, "loss": 1.1753, "step": 670 }, { "epoch": 0.1869862059356277, "grad_norm": 0.03524698134683569, "learning_rate": 0.0002931420124033176, "loss": 1.273, "step": 671 }, { "epoch": 0.18726487390274488, "grad_norm": 0.029941304446871552, "learning_rate": 0.0002930983212920295, "loss": 1.2492, "step": 672 }, { "epoch": 0.18754354186986205, "grad_norm": 0.03324484148159335, "learning_rate": 0.0002930544947247684, "loss": 1.2515, "step": 673 }, { "epoch": 0.18782220983697923, "grad_norm": 0.032844848918596174, "learning_rate": 0.00029301053274302045, "loss": 1.2738, "step": 674 }, { "epoch": 0.18810087780409643, "grad_norm": 0.03143774120494214, "learning_rate": 0.0002929664353883995, "loss": 1.2452, "step": 675 }, { "epoch": 0.1883795457712136, "grad_norm": 0.02872088845536618, "learning_rate": 0.000292922202702648, "loss": 1.2349, "step": 676 }, { "epoch": 0.18865821373833078, "grad_norm": 0.030438470004820677, "learning_rate": 0.0002928778347276363, "loss": 1.2122, "step": 677 }, { "epoch": 0.18893688170544795, "grad_norm": 0.03588975895551123, "learning_rate": 0.00029283333150536276, "loss": 1.2208, "step": 678 }, { "epoch": 0.18921554967256513, "grad_norm": 0.037964764285418146, "learning_rate": 0.00029278869307795396, "loss": 1.2371, "step": 679 }, { "epoch": 0.18949421763968233, "grad_norm": 0.031115966395500145, "learning_rate": 0.0002927439194876644, "loss": 1.2673, "step": 680 }, { "epoch": 0.1897728856067995, "grad_norm": 0.033635162912311015, "learning_rate": 0.0002926990107768763, "loss": 1.1861, "step": 681 }, { "epoch": 0.19005155357391668, "grad_norm": 0.0324477851045557, "learning_rate": 0.00029265396698810014, "loss": 1.2116, "step": 682 }, { "epoch": 0.19033022154103385, "grad_norm": 0.031925666247301386, "learning_rate": 0.00029260878816397395, "loss": 1.3223, "step": 683 }, { "epoch": 0.19060888950815102, "grad_norm": 0.029200252443867997, "learning_rate": 0.0002925634743472638, "loss": 1.2161, "step": 684 }, { "epoch": 0.19088755747526823, "grad_norm": 0.03647630258758756, "learning_rate": 0.00029251802558086346, "loss": 1.2271, "step": 685 }, { "epoch": 0.1911662254423854, "grad_norm": 0.03136809650983609, "learning_rate": 0.00029247244190779443, "loss": 1.2064, "step": 686 }, { "epoch": 0.19144489340950258, "grad_norm": 0.03300948014931483, "learning_rate": 0.0002924267233712059, "loss": 1.2531, "step": 687 }, { "epoch": 0.19172356137661975, "grad_norm": 0.03281169562936837, "learning_rate": 0.00029238087001437485, "loss": 1.1924, "step": 688 }, { "epoch": 0.19200222934373692, "grad_norm": 0.03342848154240899, "learning_rate": 0.00029233488188070566, "loss": 1.3032, "step": 689 }, { "epoch": 0.19228089731085413, "grad_norm": 0.03185681452275513, "learning_rate": 0.0002922887590137305, "loss": 1.2234, "step": 690 }, { "epoch": 0.1925595652779713, "grad_norm": 0.029327012899203794, "learning_rate": 0.0002922425014571089, "loss": 1.3597, "step": 691 }, { "epoch": 0.19283823324508848, "grad_norm": 0.030683973239651458, "learning_rate": 0.00029219610925462807, "loss": 1.2002, "step": 692 }, { "epoch": 0.19311690121220565, "grad_norm": 0.033604104845350445, "learning_rate": 0.0002921495824502025, "loss": 1.1541, "step": 693 }, { "epoch": 0.19339556917932282, "grad_norm": 0.033955354961977005, "learning_rate": 0.00029210292108787424, "loss": 1.2687, "step": 694 }, { "epoch": 0.19367423714644003, "grad_norm": 0.033010048348530545, "learning_rate": 0.00029205612521181255, "loss": 1.2326, "step": 695 }, { "epoch": 0.1939529051135572, "grad_norm": 0.028766066656968087, "learning_rate": 0.0002920091948663142, "loss": 1.1769, "step": 696 }, { "epoch": 0.19423157308067437, "grad_norm": 0.03078376615933629, "learning_rate": 0.00029196213009580316, "loss": 1.288, "step": 697 }, { "epoch": 0.19451024104779155, "grad_norm": 0.033574307640636125, "learning_rate": 0.0002919149309448305, "loss": 1.286, "step": 698 }, { "epoch": 0.19478890901490872, "grad_norm": 0.029675863166482755, "learning_rate": 0.00029186759745807484, "loss": 1.1899, "step": 699 }, { "epoch": 0.19506757698202593, "grad_norm": 0.029533163620540554, "learning_rate": 0.0002918201296803417, "loss": 1.1787, "step": 700 }, { "epoch": 0.1953462449491431, "grad_norm": 0.03333872024123354, "learning_rate": 0.00029177252765656366, "loss": 1.263, "step": 701 }, { "epoch": 0.19562491291626027, "grad_norm": 0.02985799230730299, "learning_rate": 0.00029172479143180067, "loss": 1.2043, "step": 702 }, { "epoch": 0.19590358088337745, "grad_norm": 0.03695094214151629, "learning_rate": 0.00029167692105123945, "loss": 1.222, "step": 703 }, { "epoch": 0.19618224885049462, "grad_norm": 0.03501800630089795, "learning_rate": 0.0002916289165601938, "loss": 1.2225, "step": 704 }, { "epoch": 0.19646091681761182, "grad_norm": 0.03158673711733493, "learning_rate": 0.0002915807780041045, "loss": 1.1457, "step": 705 }, { "epoch": 0.196739584784729, "grad_norm": 0.0335523571347806, "learning_rate": 0.00029153250542853914, "loss": 1.2529, "step": 706 }, { "epoch": 0.19701825275184617, "grad_norm": 0.029796712798822934, "learning_rate": 0.0002914840988791923, "loss": 1.1863, "step": 707 }, { "epoch": 0.19729692071896335, "grad_norm": 0.029917653782959814, "learning_rate": 0.0002914355584018853, "loss": 1.2454, "step": 708 }, { "epoch": 0.19757558868608052, "grad_norm": 0.02863685728098729, "learning_rate": 0.0002913868840425663, "loss": 1.1986, "step": 709 }, { "epoch": 0.19785425665319772, "grad_norm": 0.029864505223681916, "learning_rate": 0.00029133807584731013, "loss": 1.1697, "step": 710 }, { "epoch": 0.1981329246203149, "grad_norm": 0.026434081037196418, "learning_rate": 0.0002912891338623182, "loss": 1.2531, "step": 711 }, { "epoch": 0.19841159258743207, "grad_norm": 0.03390507175347794, "learning_rate": 0.0002912400581339188, "loss": 1.2212, "step": 712 }, { "epoch": 0.19869026055454925, "grad_norm": 0.028814584178117934, "learning_rate": 0.0002911908487085667, "loss": 1.2241, "step": 713 }, { "epoch": 0.19896892852166642, "grad_norm": 0.030539704785422245, "learning_rate": 0.00029114150563284324, "loss": 1.2583, "step": 714 }, { "epoch": 0.19924759648878362, "grad_norm": 0.03111673779097384, "learning_rate": 0.0002910920289534562, "loss": 1.2055, "step": 715 }, { "epoch": 0.1995262644559008, "grad_norm": 0.029567918713413712, "learning_rate": 0.0002910424187172399, "loss": 1.2244, "step": 716 }, { "epoch": 0.19980493242301797, "grad_norm": 0.030106149455426043, "learning_rate": 0.0002909926749711552, "loss": 1.2245, "step": 717 }, { "epoch": 0.20008360039013515, "grad_norm": 0.0302980257923478, "learning_rate": 0.00029094279776228904, "loss": 1.2092, "step": 718 }, { "epoch": 0.20036226835725232, "grad_norm": 0.03039529947266515, "learning_rate": 0.000290892787137855, "loss": 1.2208, "step": 719 }, { "epoch": 0.20064093632436952, "grad_norm": 0.03264444958759995, "learning_rate": 0.0002908426431451928, "loss": 1.2196, "step": 720 }, { "epoch": 0.2009196042914867, "grad_norm": 0.030556605595877435, "learning_rate": 0.0002907923658317684, "loss": 1.1914, "step": 721 }, { "epoch": 0.20119827225860387, "grad_norm": 0.03794325768029324, "learning_rate": 0.00029074195524517404, "loss": 1.1964, "step": 722 }, { "epoch": 0.20147694022572105, "grad_norm": 0.031874106936574415, "learning_rate": 0.00029069141143312795, "loss": 1.1825, "step": 723 }, { "epoch": 0.20175560819283822, "grad_norm": 0.03531334153537209, "learning_rate": 0.00029064073444347473, "loss": 1.192, "step": 724 }, { "epoch": 0.20203427615995542, "grad_norm": 0.03179624874467737, "learning_rate": 0.00029058992432418485, "loss": 1.2195, "step": 725 }, { "epoch": 0.2023129441270726, "grad_norm": 0.03170503830716214, "learning_rate": 0.0002905389811233548, "loss": 1.2343, "step": 726 }, { "epoch": 0.20259161209418977, "grad_norm": 0.030169057911507813, "learning_rate": 0.0002904879048892071, "loss": 1.2854, "step": 727 }, { "epoch": 0.20287028006130695, "grad_norm": 0.028476225560777415, "learning_rate": 0.00029043669567009025, "loss": 1.2314, "step": 728 }, { "epoch": 0.20314894802842412, "grad_norm": 0.03282722586186673, "learning_rate": 0.00029038535351447853, "loss": 1.2065, "step": 729 }, { "epoch": 0.20342761599554132, "grad_norm": 0.033195823784900075, "learning_rate": 0.00029033387847097217, "loss": 1.1412, "step": 730 }, { "epoch": 0.2037062839626585, "grad_norm": 0.03342823644488795, "learning_rate": 0.0002902822705882971, "loss": 1.2739, "step": 731 }, { "epoch": 0.20398495192977567, "grad_norm": 0.03573018125224969, "learning_rate": 0.0002902305299153049, "loss": 1.1927, "step": 732 }, { "epoch": 0.20426361989689285, "grad_norm": 0.030981558075224718, "learning_rate": 0.0002901786565009731, "loss": 1.2081, "step": 733 }, { "epoch": 0.20454228786401002, "grad_norm": 0.02930678475988091, "learning_rate": 0.0002901266503944047, "loss": 1.2145, "step": 734 }, { "epoch": 0.20482095583112722, "grad_norm": 0.036078922618339025, "learning_rate": 0.00029007451164482837, "loss": 1.1821, "step": 735 }, { "epoch": 0.2050996237982444, "grad_norm": 0.0315560505592102, "learning_rate": 0.0002900222403015983, "loss": 1.1877, "step": 736 }, { "epoch": 0.20537829176536157, "grad_norm": 0.0334054862334709, "learning_rate": 0.00028996983641419425, "loss": 1.1858, "step": 737 }, { "epoch": 0.20565695973247874, "grad_norm": 0.04201578181880877, "learning_rate": 0.00028991730003222134, "loss": 1.2261, "step": 738 }, { "epoch": 0.20593562769959592, "grad_norm": 0.03047050585229659, "learning_rate": 0.00028986463120541023, "loss": 1.2183, "step": 739 }, { "epoch": 0.20621429566671312, "grad_norm": 0.03237488113077084, "learning_rate": 0.000289811829983617, "loss": 1.1404, "step": 740 }, { "epoch": 0.2064929636338303, "grad_norm": 0.029564743029408903, "learning_rate": 0.00028975889641682276, "loss": 1.2449, "step": 741 }, { "epoch": 0.20677163160094747, "grad_norm": 0.03255815870336853, "learning_rate": 0.00028970583055513424, "loss": 1.1929, "step": 742 }, { "epoch": 0.20705029956806464, "grad_norm": 0.030298653105629045, "learning_rate": 0.0002896526324487831, "loss": 1.2384, "step": 743 }, { "epoch": 0.20732896753518182, "grad_norm": 0.02694767372272005, "learning_rate": 0.0002895993021481266, "loss": 1.2517, "step": 744 }, { "epoch": 0.20760763550229902, "grad_norm": 0.027105824682383064, "learning_rate": 0.00028954583970364656, "loss": 1.2107, "step": 745 }, { "epoch": 0.2078863034694162, "grad_norm": 0.03014435501508531, "learning_rate": 0.00028949224516595045, "loss": 1.2024, "step": 746 }, { "epoch": 0.20816497143653337, "grad_norm": 0.0280943765345841, "learning_rate": 0.0002894385185857703, "loss": 1.2262, "step": 747 }, { "epoch": 0.20844363940365054, "grad_norm": 0.028200250173294805, "learning_rate": 0.00028938466001396347, "loss": 1.1331, "step": 748 }, { "epoch": 0.20872230737076772, "grad_norm": 0.0320757609140061, "learning_rate": 0.0002893306695015122, "loss": 1.2724, "step": 749 }, { "epoch": 0.20900097533788492, "grad_norm": 0.02977577975844749, "learning_rate": 0.0002892765470995235, "loss": 1.2285, "step": 750 }, { "epoch": 0.2092796433050021, "grad_norm": 0.029625156331136583, "learning_rate": 0.0002892222928592292, "loss": 1.1931, "step": 751 }, { "epoch": 0.20955831127211927, "grad_norm": 0.029326297281886182, "learning_rate": 0.00028916790683198624, "loss": 1.2003, "step": 752 }, { "epoch": 0.20983697923923644, "grad_norm": 0.02899175433759914, "learning_rate": 0.000289113389069276, "loss": 1.2629, "step": 753 }, { "epoch": 0.21011564720635362, "grad_norm": 0.02945270369868384, "learning_rate": 0.0002890587396227046, "loss": 1.2343, "step": 754 }, { "epoch": 0.21039431517347082, "grad_norm": 0.031094921037894033, "learning_rate": 0.0002890039585440028, "loss": 1.2963, "step": 755 }, { "epoch": 0.210672983140588, "grad_norm": 0.03651482141328621, "learning_rate": 0.00028894904588502623, "loss": 1.2789, "step": 756 }, { "epoch": 0.21095165110770517, "grad_norm": 0.029911035477468838, "learning_rate": 0.00028889400169775477, "loss": 1.1974, "step": 757 }, { "epoch": 0.21123031907482234, "grad_norm": 0.027602879693413813, "learning_rate": 0.0002888388260342928, "loss": 1.295, "step": 758 }, { "epoch": 0.21150898704193952, "grad_norm": 0.030165425764972522, "learning_rate": 0.00028878351894686945, "loss": 1.204, "step": 759 }, { "epoch": 0.21178765500905672, "grad_norm": 0.03212501289874619, "learning_rate": 0.0002887280804878379, "loss": 1.1988, "step": 760 }, { "epoch": 0.2120663229761739, "grad_norm": 0.033180144379316094, "learning_rate": 0.0002886725107096759, "loss": 1.2523, "step": 761 }, { "epoch": 0.21234499094329107, "grad_norm": 0.027884608114542176, "learning_rate": 0.00028861680966498545, "loss": 1.2425, "step": 762 }, { "epoch": 0.21262365891040824, "grad_norm": 0.03203850023574579, "learning_rate": 0.00028856097740649286, "loss": 1.2163, "step": 763 }, { "epoch": 0.21290232687752542, "grad_norm": 0.029686804014248298, "learning_rate": 0.00028850501398704854, "loss": 1.1946, "step": 764 }, { "epoch": 0.21318099484464262, "grad_norm": 0.032233051761576884, "learning_rate": 0.00028844891945962716, "loss": 1.2775, "step": 765 }, { "epoch": 0.2134596628117598, "grad_norm": 0.028943565648642953, "learning_rate": 0.0002883926938773274, "loss": 1.2408, "step": 766 }, { "epoch": 0.21373833077887697, "grad_norm": 0.03065737415503869, "learning_rate": 0.000288336337293372, "loss": 1.2691, "step": 767 }, { "epoch": 0.21401699874599414, "grad_norm": 0.03130621186591336, "learning_rate": 0.0002882798497611079, "loss": 1.2257, "step": 768 }, { "epoch": 0.21429566671311132, "grad_norm": 0.03220510067159029, "learning_rate": 0.00028822323133400566, "loss": 1.2097, "step": 769 }, { "epoch": 0.21457433468022852, "grad_norm": 0.03029019677589418, "learning_rate": 0.00028816648206566003, "loss": 1.2724, "step": 770 }, { "epoch": 0.2148530026473457, "grad_norm": 0.02936460974367629, "learning_rate": 0.00028810960200978956, "loss": 1.2525, "step": 771 }, { "epoch": 0.21513167061446287, "grad_norm": 0.03058794912294211, "learning_rate": 0.00028805259122023646, "loss": 1.1951, "step": 772 }, { "epoch": 0.21541033858158004, "grad_norm": 0.02754010413776028, "learning_rate": 0.0002879954497509668, "loss": 1.1687, "step": 773 }, { "epoch": 0.21568900654869722, "grad_norm": 0.03127972932986371, "learning_rate": 0.0002879381776560704, "loss": 1.2103, "step": 774 }, { "epoch": 0.21596767451581442, "grad_norm": 0.028314146428764668, "learning_rate": 0.0002878807749897606, "loss": 1.242, "step": 775 }, { "epoch": 0.2162463424829316, "grad_norm": 0.03189143073422141, "learning_rate": 0.0002878232418063744, "loss": 1.2132, "step": 776 }, { "epoch": 0.21652501045004877, "grad_norm": 0.027178960386265345, "learning_rate": 0.0002877655781603724, "loss": 1.2293, "step": 777 }, { "epoch": 0.21680367841716594, "grad_norm": 0.028317933702712454, "learning_rate": 0.0002877077841063386, "loss": 1.2867, "step": 778 }, { "epoch": 0.21708234638428311, "grad_norm": 0.036983884928400594, "learning_rate": 0.0002876498596989805, "loss": 1.1817, "step": 779 }, { "epoch": 0.21736101435140032, "grad_norm": 0.03240285088932405, "learning_rate": 0.0002875918049931291, "loss": 1.2325, "step": 780 }, { "epoch": 0.2176396823185175, "grad_norm": 0.02925472150684447, "learning_rate": 0.0002875336200437384, "loss": 1.2213, "step": 781 }, { "epoch": 0.21791835028563467, "grad_norm": 0.035568293456674616, "learning_rate": 0.00028747530490588606, "loss": 1.1725, "step": 782 }, { "epoch": 0.21819701825275184, "grad_norm": 0.028521415715862822, "learning_rate": 0.0002874168596347728, "loss": 1.2202, "step": 783 }, { "epoch": 0.21847568621986901, "grad_norm": 0.03078579174030746, "learning_rate": 0.00028735828428572244, "loss": 1.2716, "step": 784 }, { "epoch": 0.21875435418698622, "grad_norm": 0.02966582460421171, "learning_rate": 0.0002872995789141822, "loss": 1.1634, "step": 785 }, { "epoch": 0.2190330221541034, "grad_norm": 0.02754603255794348, "learning_rate": 0.0002872407435757221, "loss": 1.2096, "step": 786 }, { "epoch": 0.21931169012122056, "grad_norm": 0.030733733672169243, "learning_rate": 0.00028718177832603536, "loss": 1.2725, "step": 787 }, { "epoch": 0.21959035808833774, "grad_norm": 0.08630639145784218, "learning_rate": 0.0002871226832209381, "loss": 1.2287, "step": 788 }, { "epoch": 0.2198690260554549, "grad_norm": 0.029666565969298023, "learning_rate": 0.0002870634583163694, "loss": 1.1842, "step": 789 }, { "epoch": 0.22014769402257212, "grad_norm": 0.029881150462222006, "learning_rate": 0.0002870041036683912, "loss": 1.1999, "step": 790 }, { "epoch": 0.2204263619896893, "grad_norm": 0.02936625760952875, "learning_rate": 0.0002869446193331881, "loss": 1.2155, "step": 791 }, { "epoch": 0.22070502995680646, "grad_norm": 0.033801865075315476, "learning_rate": 0.00028688500536706775, "loss": 1.1844, "step": 792 }, { "epoch": 0.22098369792392364, "grad_norm": 0.03327564730104723, "learning_rate": 0.0002868252618264603, "loss": 1.2562, "step": 793 }, { "epoch": 0.2212623658910408, "grad_norm": 0.03275344126327679, "learning_rate": 0.0002867653887679186, "loss": 1.1786, "step": 794 }, { "epoch": 0.22154103385815802, "grad_norm": 0.03425186702302083, "learning_rate": 0.00028670538624811807, "loss": 1.211, "step": 795 }, { "epoch": 0.2218197018252752, "grad_norm": 0.03572367539602343, "learning_rate": 0.0002866452543238569, "loss": 1.2367, "step": 796 }, { "epoch": 0.22209836979239236, "grad_norm": 0.03247920362433536, "learning_rate": 0.00028658499305205535, "loss": 1.1513, "step": 797 }, { "epoch": 0.22237703775950954, "grad_norm": 0.03320063996862529, "learning_rate": 0.0002865246024897565, "loss": 1.2311, "step": 798 }, { "epoch": 0.2226557057266267, "grad_norm": 0.036831915791145024, "learning_rate": 0.0002864640826941257, "loss": 1.2096, "step": 799 }, { "epoch": 0.22293437369374391, "grad_norm": 0.030633454674668867, "learning_rate": 0.0002864034337224505, "loss": 1.2236, "step": 800 }, { "epoch": 0.2232130416608611, "grad_norm": 0.029893627565152316, "learning_rate": 0.00028634265563214095, "loss": 1.1916, "step": 801 }, { "epoch": 0.22349170962797826, "grad_norm": 0.04091699622036334, "learning_rate": 0.0002862817484807291, "loss": 1.2216, "step": 802 }, { "epoch": 0.22377037759509544, "grad_norm": 0.03563146358440802, "learning_rate": 0.00028622071232586936, "loss": 1.223, "step": 803 }, { "epoch": 0.2240490455622126, "grad_norm": 0.03581445438943783, "learning_rate": 0.00028615954722533815, "loss": 1.2284, "step": 804 }, { "epoch": 0.22432771352932981, "grad_norm": 0.029693700805500753, "learning_rate": 0.000286098253237034, "loss": 1.2044, "step": 805 }, { "epoch": 0.224606381496447, "grad_norm": 0.04045724156143032, "learning_rate": 0.0002860368304189773, "loss": 1.2144, "step": 806 }, { "epoch": 0.22488504946356416, "grad_norm": 0.037513763665251484, "learning_rate": 0.00028597527882931066, "loss": 1.1592, "step": 807 }, { "epoch": 0.22516371743068134, "grad_norm": 0.04033652246043391, "learning_rate": 0.00028591359852629835, "loss": 1.1282, "step": 808 }, { "epoch": 0.2254423853977985, "grad_norm": 0.037162938237324535, "learning_rate": 0.0002858517895683265, "loss": 1.1756, "step": 809 }, { "epoch": 0.2257210533649157, "grad_norm": 0.0328698553837938, "learning_rate": 0.0002857898520139032, "loss": 1.1624, "step": 810 }, { "epoch": 0.2259997213320329, "grad_norm": 0.03517802699527156, "learning_rate": 0.0002857277859216581, "loss": 1.2067, "step": 811 }, { "epoch": 0.22627838929915006, "grad_norm": 0.03504461898191018, "learning_rate": 0.0002856655913503425, "loss": 1.1907, "step": 812 }, { "epoch": 0.22655705726626724, "grad_norm": 0.03370488982512282, "learning_rate": 0.0002856032683588295, "loss": 1.2235, "step": 813 }, { "epoch": 0.2268357252333844, "grad_norm": 0.027329470823207604, "learning_rate": 0.0002855408170061136, "loss": 1.1729, "step": 814 }, { "epoch": 0.2271143932005016, "grad_norm": 0.03428896787248513, "learning_rate": 0.0002854782373513109, "loss": 1.1673, "step": 815 }, { "epoch": 0.2273930611676188, "grad_norm": 0.03611403842182311, "learning_rate": 0.00028541552945365883, "loss": 1.2419, "step": 816 }, { "epoch": 0.22767172913473596, "grad_norm": 0.034511106987087814, "learning_rate": 0.00028535269337251636, "loss": 1.1229, "step": 817 }, { "epoch": 0.22795039710185314, "grad_norm": 0.0303040907868437, "learning_rate": 0.00028528972916736374, "loss": 1.1659, "step": 818 }, { "epoch": 0.2282290650689703, "grad_norm": 0.02911998676455371, "learning_rate": 0.0002852266368978024, "loss": 1.2227, "step": 819 }, { "epoch": 0.2285077330360875, "grad_norm": 0.03261340090836511, "learning_rate": 0.00028516341662355516, "loss": 1.2308, "step": 820 }, { "epoch": 0.2287864010032047, "grad_norm": 0.02921707494733937, "learning_rate": 0.000285100068404466, "loss": 1.1617, "step": 821 }, { "epoch": 0.22906506897032186, "grad_norm": 0.030565455370204535, "learning_rate": 0.0002850365923004998, "loss": 1.2028, "step": 822 }, { "epoch": 0.22934373693743904, "grad_norm": 0.03110415477574272, "learning_rate": 0.0002849729883717427, "loss": 1.1992, "step": 823 }, { "epoch": 0.2296224049045562, "grad_norm": 0.02773341375676052, "learning_rate": 0.0002849092566784018, "loss": 1.1474, "step": 824 }, { "epoch": 0.2299010728716734, "grad_norm": 0.03222106921235472, "learning_rate": 0.00028484539728080514, "loss": 1.2206, "step": 825 }, { "epoch": 0.2301797408387906, "grad_norm": 0.029118701385041232, "learning_rate": 0.00028478141023940154, "loss": 1.0952, "step": 826 }, { "epoch": 0.23045840880590776, "grad_norm": 0.028544330285277832, "learning_rate": 0.00028471729561476084, "loss": 1.2334, "step": 827 }, { "epoch": 0.23073707677302494, "grad_norm": 0.03512337726540897, "learning_rate": 0.00028465305346757347, "loss": 1.2035, "step": 828 }, { "epoch": 0.2310157447401421, "grad_norm": 0.028754764531371324, "learning_rate": 0.00028458868385865065, "loss": 1.2514, "step": 829 }, { "epoch": 0.2312944127072593, "grad_norm": 0.03170349203497191, "learning_rate": 0.00028452418684892433, "loss": 1.1413, "step": 830 }, { "epoch": 0.23157308067437649, "grad_norm": 0.02875360214724969, "learning_rate": 0.00028445956249944685, "loss": 1.2431, "step": 831 }, { "epoch": 0.23185174864149366, "grad_norm": 0.029139109575966996, "learning_rate": 0.00028439481087139123, "loss": 1.2418, "step": 832 }, { "epoch": 0.23213041660861083, "grad_norm": 0.029484291241721083, "learning_rate": 0.00028432993202605106, "loss": 1.1974, "step": 833 }, { "epoch": 0.232409084575728, "grad_norm": 0.027320737073748293, "learning_rate": 0.00028426492602484014, "loss": 1.1832, "step": 834 }, { "epoch": 0.2326877525428452, "grad_norm": 0.026810274701596578, "learning_rate": 0.0002841997929292928, "loss": 1.1742, "step": 835 }, { "epoch": 0.23296642050996239, "grad_norm": 0.028726382677481628, "learning_rate": 0.00028413453280106365, "loss": 1.2399, "step": 836 }, { "epoch": 0.23324508847707956, "grad_norm": 0.02746520401870028, "learning_rate": 0.00028406914570192743, "loss": 1.1939, "step": 837 }, { "epoch": 0.23352375644419673, "grad_norm": 0.028008102674588066, "learning_rate": 0.00028400363169377917, "loss": 1.2055, "step": 838 }, { "epoch": 0.2338024244113139, "grad_norm": 0.033048322610744876, "learning_rate": 0.0002839379908386341, "loss": 1.187, "step": 839 }, { "epoch": 0.2340810923784311, "grad_norm": 0.03217206924758465, "learning_rate": 0.00028387222319862734, "loss": 1.1738, "step": 840 }, { "epoch": 0.23435976034554828, "grad_norm": 0.02985424254145104, "learning_rate": 0.00028380632883601424, "loss": 1.1503, "step": 841 }, { "epoch": 0.23463842831266546, "grad_norm": 0.03602694522310205, "learning_rate": 0.0002837403078131699, "loss": 1.2047, "step": 842 }, { "epoch": 0.23491709627978263, "grad_norm": 0.035221969541459466, "learning_rate": 0.00028367416019258937, "loss": 1.2539, "step": 843 }, { "epoch": 0.2351957642468998, "grad_norm": 0.03178127128930625, "learning_rate": 0.00028360788603688775, "loss": 1.2491, "step": 844 }, { "epoch": 0.235474432214017, "grad_norm": 0.02830777233551559, "learning_rate": 0.00028354148540879953, "loss": 1.2384, "step": 845 }, { "epoch": 0.23575310018113418, "grad_norm": 0.03660532651228503, "learning_rate": 0.00028347495837117927, "loss": 1.2696, "step": 846 }, { "epoch": 0.23603176814825136, "grad_norm": 0.03192778000982763, "learning_rate": 0.00028340830498700095, "loss": 1.1654, "step": 847 }, { "epoch": 0.23631043611536853, "grad_norm": 0.03268712335896338, "learning_rate": 0.0002833415253193584, "loss": 1.1481, "step": 848 }, { "epoch": 0.2365891040824857, "grad_norm": 0.029500193993104464, "learning_rate": 0.00028327461943146464, "loss": 1.2827, "step": 849 }, { "epoch": 0.2368677720496029, "grad_norm": 0.032638215998844686, "learning_rate": 0.00028320758738665245, "loss": 1.2306, "step": 850 }, { "epoch": 0.23714644001672008, "grad_norm": 0.029198848896496293, "learning_rate": 0.00028314042924837396, "loss": 1.1338, "step": 851 }, { "epoch": 0.23742510798383726, "grad_norm": 0.033426860550312514, "learning_rate": 0.0002830731450802006, "loss": 1.1494, "step": 852 }, { "epoch": 0.23770377595095443, "grad_norm": 0.031431717344926495, "learning_rate": 0.00028300573494582324, "loss": 1.1972, "step": 853 }, { "epoch": 0.2379824439180716, "grad_norm": 0.03317464796857756, "learning_rate": 0.0002829381989090517, "loss": 1.2187, "step": 854 }, { "epoch": 0.2382611118851888, "grad_norm": 0.030431831164430868, "learning_rate": 0.00028287053703381543, "loss": 1.2187, "step": 855 }, { "epoch": 0.23853977985230598, "grad_norm": 0.03045579834670146, "learning_rate": 0.00028280274938416255, "loss": 1.229, "step": 856 }, { "epoch": 0.23881844781942316, "grad_norm": 0.029163694481551896, "learning_rate": 0.0002827348360242604, "loss": 1.1975, "step": 857 }, { "epoch": 0.23909711578654033, "grad_norm": 0.029488391945541332, "learning_rate": 0.00028266679701839545, "loss": 1.248, "step": 858 }, { "epoch": 0.2393757837536575, "grad_norm": 0.029665651293663615, "learning_rate": 0.000282598632430973, "loss": 1.2187, "step": 859 }, { "epoch": 0.2396544517207747, "grad_norm": 0.03059918273439699, "learning_rate": 0.0002825303423265171, "loss": 1.2484, "step": 860 }, { "epoch": 0.23993311968789188, "grad_norm": 0.031092596302116332, "learning_rate": 0.00028246192676967084, "loss": 1.2952, "step": 861 }, { "epoch": 0.24021178765500906, "grad_norm": 0.030473449451533307, "learning_rate": 0.00028239338582519593, "loss": 1.2017, "step": 862 }, { "epoch": 0.24049045562212623, "grad_norm": 0.02877366381042146, "learning_rate": 0.00028232471955797275, "loss": 1.154, "step": 863 }, { "epoch": 0.2407691235892434, "grad_norm": 0.03269777230480638, "learning_rate": 0.00028225592803300046, "loss": 1.1935, "step": 864 }, { "epoch": 0.2410477915563606, "grad_norm": 0.02943629361093985, "learning_rate": 0.00028218701131539663, "loss": 1.1804, "step": 865 }, { "epoch": 0.24132645952347778, "grad_norm": 0.026902059580154805, "learning_rate": 0.0002821179694703974, "loss": 1.1907, "step": 866 }, { "epoch": 0.24160512749059496, "grad_norm": 0.029778712774734036, "learning_rate": 0.00028204880256335726, "loss": 1.2194, "step": 867 }, { "epoch": 0.24188379545771213, "grad_norm": 0.028230873083258112, "learning_rate": 0.00028197951065974926, "loss": 1.19, "step": 868 }, { "epoch": 0.2421624634248293, "grad_norm": 0.028072201330806568, "learning_rate": 0.0002819100938251647, "loss": 1.1389, "step": 869 }, { "epoch": 0.2424411313919465, "grad_norm": 0.026438930704970637, "learning_rate": 0.00028184055212531313, "loss": 1.1864, "step": 870 }, { "epoch": 0.24271979935906368, "grad_norm": 0.029179930741893217, "learning_rate": 0.00028177088562602215, "loss": 1.2191, "step": 871 }, { "epoch": 0.24299846732618086, "grad_norm": 0.02773273547589574, "learning_rate": 0.0002817010943932378, "loss": 1.1714, "step": 872 }, { "epoch": 0.24327713529329803, "grad_norm": 0.027657979485207995, "learning_rate": 0.0002816311784930239, "loss": 1.2654, "step": 873 }, { "epoch": 0.2435558032604152, "grad_norm": 0.029577552077285123, "learning_rate": 0.0002815611379915625, "loss": 1.1969, "step": 874 }, { "epoch": 0.2438344712275324, "grad_norm": 0.02729013996501855, "learning_rate": 0.0002814909729551534, "loss": 1.2255, "step": 875 }, { "epoch": 0.24411313919464958, "grad_norm": 0.027788649096849267, "learning_rate": 0.0002814206834502145, "loss": 1.1958, "step": 876 }, { "epoch": 0.24439180716176676, "grad_norm": 0.03083544261249631, "learning_rate": 0.0002813502695432813, "loss": 1.2766, "step": 877 }, { "epoch": 0.24467047512888393, "grad_norm": 0.031935555680789894, "learning_rate": 0.00028127973130100717, "loss": 1.2047, "step": 878 }, { "epoch": 0.2449491430960011, "grad_norm": 0.02865729822675791, "learning_rate": 0.0002812090687901632, "loss": 1.1774, "step": 879 }, { "epoch": 0.2452278110631183, "grad_norm": 0.028957672601802778, "learning_rate": 0.00028113828207763816, "loss": 1.15, "step": 880 }, { "epoch": 0.24550647903023548, "grad_norm": 0.028922618296531054, "learning_rate": 0.0002810673712304381, "loss": 1.1699, "step": 881 }, { "epoch": 0.24578514699735265, "grad_norm": 0.029900354927513407, "learning_rate": 0.000280996336315687, "loss": 1.2905, "step": 882 }, { "epoch": 0.24606381496446983, "grad_norm": 0.030729913177330495, "learning_rate": 0.000280925177400626, "loss": 1.2521, "step": 883 }, { "epoch": 0.246342482931587, "grad_norm": 0.03201638767699121, "learning_rate": 0.00028085389455261353, "loss": 1.2127, "step": 884 }, { "epoch": 0.2466211508987042, "grad_norm": 0.028331239184826534, "learning_rate": 0.0002807824878391257, "loss": 1.243, "step": 885 }, { "epoch": 0.24689981886582138, "grad_norm": 0.028161457381165326, "learning_rate": 0.0002807109573277556, "loss": 1.1867, "step": 886 }, { "epoch": 0.24717848683293855, "grad_norm": 0.029173326954527547, "learning_rate": 0.0002806393030862134, "loss": 1.1322, "step": 887 }, { "epoch": 0.24745715480005573, "grad_norm": 0.02983044841692644, "learning_rate": 0.0002805675251823268, "loss": 1.1989, "step": 888 }, { "epoch": 0.2477358227671729, "grad_norm": 0.029512691201397653, "learning_rate": 0.0002804956236840402, "loss": 1.1293, "step": 889 }, { "epoch": 0.2480144907342901, "grad_norm": 0.028451863072342248, "learning_rate": 0.0002804235986594151, "loss": 1.1383, "step": 890 }, { "epoch": 0.24829315870140728, "grad_norm": 0.03461336924215736, "learning_rate": 0.00028035145017662983, "loss": 1.2828, "step": 891 }, { "epoch": 0.24857182666852445, "grad_norm": 0.028900850568776018, "learning_rate": 0.0002802791783039799, "loss": 1.1767, "step": 892 }, { "epoch": 0.24885049463564163, "grad_norm": 0.03161671722036808, "learning_rate": 0.00028020678310987737, "loss": 1.2166, "step": 893 }, { "epoch": 0.2491291626027588, "grad_norm": 0.027851837448806763, "learning_rate": 0.00028013426466285096, "loss": 1.1516, "step": 894 }, { "epoch": 0.249407830569876, "grad_norm": 0.02993974237805629, "learning_rate": 0.00028006162303154624, "loss": 1.2294, "step": 895 }, { "epoch": 0.24968649853699318, "grad_norm": 0.028958696838163094, "learning_rate": 0.0002799888582847253, "loss": 1.2477, "step": 896 }, { "epoch": 0.24996516650411035, "grad_norm": 0.03744633024180111, "learning_rate": 0.0002799159704912668, "loss": 1.1527, "step": 897 }, { "epoch": 0.25024383447122756, "grad_norm": 0.03128219177913374, "learning_rate": 0.000279842959720166, "loss": 1.2374, "step": 898 }, { "epoch": 0.25052250243834473, "grad_norm": 0.02876772322540513, "learning_rate": 0.00027976982604053423, "loss": 1.2089, "step": 899 }, { "epoch": 0.2508011704054619, "grad_norm": 0.02595691492648724, "learning_rate": 0.0002796965695215995, "loss": 1.2058, "step": 900 }, { "epoch": 0.2510798383725791, "grad_norm": 0.027282086460047903, "learning_rate": 0.0002796231902327059, "loss": 1.1902, "step": 901 }, { "epoch": 0.25135850633969625, "grad_norm": 0.032636491149783585, "learning_rate": 0.0002795496882433138, "loss": 1.1985, "step": 902 }, { "epoch": 0.2516371743068134, "grad_norm": 0.029015435597665843, "learning_rate": 0.0002794760636229998, "loss": 1.1987, "step": 903 }, { "epoch": 0.2519158422739306, "grad_norm": 0.02634838996284572, "learning_rate": 0.00027940231644145647, "loss": 1.117, "step": 904 }, { "epoch": 0.2521945102410478, "grad_norm": 0.025598796379539338, "learning_rate": 0.00027932844676849245, "loss": 1.1777, "step": 905 }, { "epoch": 0.25247317820816495, "grad_norm": 0.02759805725896378, "learning_rate": 0.00027925445467403225, "loss": 1.1907, "step": 906 }, { "epoch": 0.2527518461752821, "grad_norm": 0.03111491985378998, "learning_rate": 0.00027918034022811637, "loss": 1.1525, "step": 907 }, { "epoch": 0.25303051414239935, "grad_norm": 0.027063556151074832, "learning_rate": 0.00027910610350090104, "loss": 1.1952, "step": 908 }, { "epoch": 0.25330918210951653, "grad_norm": 0.029753324670131375, "learning_rate": 0.0002790317445626583, "loss": 1.2353, "step": 909 }, { "epoch": 0.2535878500766337, "grad_norm": 0.027632753736768604, "learning_rate": 0.00027895726348377585, "loss": 1.2433, "step": 910 }, { "epoch": 0.2538665180437509, "grad_norm": 0.028219576539031066, "learning_rate": 0.0002788826603347571, "loss": 1.2541, "step": 911 }, { "epoch": 0.25414518601086805, "grad_norm": 0.03164929628435335, "learning_rate": 0.00027880793518622073, "loss": 1.2299, "step": 912 }, { "epoch": 0.2544238539779852, "grad_norm": 0.03249601396068038, "learning_rate": 0.0002787330881089013, "loss": 1.1845, "step": 913 }, { "epoch": 0.2547025219451024, "grad_norm": 0.02925094901495183, "learning_rate": 0.0002786581191736484, "loss": 1.226, "step": 914 }, { "epoch": 0.2549811899122196, "grad_norm": 0.02725182486539517, "learning_rate": 0.00027858302845142736, "loss": 1.1452, "step": 915 }, { "epoch": 0.25525985787933675, "grad_norm": 0.030045880760025505, "learning_rate": 0.0002785078160133184, "loss": 1.2702, "step": 916 }, { "epoch": 0.2555385258464539, "grad_norm": 0.03437066045622484, "learning_rate": 0.0002784324819305172, "loss": 1.2633, "step": 917 }, { "epoch": 0.25581719381357115, "grad_norm": 0.026865027752643024, "learning_rate": 0.00027835702627433456, "loss": 1.2289, "step": 918 }, { "epoch": 0.2560958617806883, "grad_norm": 0.02981019464618125, "learning_rate": 0.00027828144911619634, "loss": 1.2267, "step": 919 }, { "epoch": 0.2563745297478055, "grad_norm": 0.031173538347632007, "learning_rate": 0.00027820575052764335, "loss": 1.2428, "step": 920 }, { "epoch": 0.2566531977149227, "grad_norm": 0.028069647078395255, "learning_rate": 0.0002781299305803315, "loss": 1.1673, "step": 921 }, { "epoch": 0.25693186568203985, "grad_norm": 0.027326993275807253, "learning_rate": 0.0002780539893460313, "loss": 1.179, "step": 922 }, { "epoch": 0.257210533649157, "grad_norm": 0.02615493639957378, "learning_rate": 0.00027797792689662846, "loss": 1.1861, "step": 923 }, { "epoch": 0.2574892016162742, "grad_norm": 0.028455154258675758, "learning_rate": 0.000277901743304123, "loss": 1.2312, "step": 924 }, { "epoch": 0.2577678695833914, "grad_norm": 0.02647863305420611, "learning_rate": 0.00027782543864063, "loss": 1.232, "step": 925 }, { "epoch": 0.25804653755050855, "grad_norm": 0.02587589805073937, "learning_rate": 0.0002777490129783789, "loss": 1.1991, "step": 926 }, { "epoch": 0.2583252055176257, "grad_norm": 0.029189351641699654, "learning_rate": 0.00027767246638971376, "loss": 1.1698, "step": 927 }, { "epoch": 0.25860387348474295, "grad_norm": 0.02689318432293204, "learning_rate": 0.0002775957989470931, "loss": 1.1795, "step": 928 }, { "epoch": 0.2588825414518601, "grad_norm": 0.026143166276756412, "learning_rate": 0.00027751901072308984, "loss": 1.2208, "step": 929 }, { "epoch": 0.2591612094189773, "grad_norm": 0.02826997395491209, "learning_rate": 0.00027744210179039125, "loss": 1.2239, "step": 930 }, { "epoch": 0.2594398773860945, "grad_norm": 0.0302340301271777, "learning_rate": 0.00027736507222179887, "loss": 1.2302, "step": 931 }, { "epoch": 0.25971854535321165, "grad_norm": 0.02633574198851119, "learning_rate": 0.0002772879220902283, "loss": 1.1877, "step": 932 }, { "epoch": 0.2599972133203288, "grad_norm": 0.02853582726361333, "learning_rate": 0.00027721065146870945, "loss": 1.1937, "step": 933 }, { "epoch": 0.260275881287446, "grad_norm": 0.028335368426218244, "learning_rate": 0.00027713326043038623, "loss": 1.1935, "step": 934 }, { "epoch": 0.2605545492545632, "grad_norm": 0.026513981750256484, "learning_rate": 0.0002770557490485165, "loss": 1.2161, "step": 935 }, { "epoch": 0.26083321722168035, "grad_norm": 0.028276410950949144, "learning_rate": 0.00027697811739647193, "loss": 1.1969, "step": 936 }, { "epoch": 0.2611118851887975, "grad_norm": 0.030152112472123124, "learning_rate": 0.00027690036554773835, "loss": 1.1675, "step": 937 }, { "epoch": 0.26139055315591475, "grad_norm": 0.02893106694126961, "learning_rate": 0.000276822493575915, "loss": 1.2468, "step": 938 }, { "epoch": 0.2616692211230319, "grad_norm": 0.028594630139034346, "learning_rate": 0.00027674450155471507, "loss": 1.2079, "step": 939 }, { "epoch": 0.2619478890901491, "grad_norm": 0.02891484797935135, "learning_rate": 0.0002766663895579653, "loss": 1.2412, "step": 940 }, { "epoch": 0.2622265570572663, "grad_norm": 0.027942611622127425, "learning_rate": 0.00027658815765960605, "loss": 1.2252, "step": 941 }, { "epoch": 0.26250522502438345, "grad_norm": 0.029100580491235463, "learning_rate": 0.0002765098059336911, "loss": 1.2124, "step": 942 }, { "epoch": 0.2627838929915006, "grad_norm": 0.02872625816140515, "learning_rate": 0.0002764313344543878, "loss": 1.1848, "step": 943 }, { "epoch": 0.2630625609586178, "grad_norm": 0.02703243510080367, "learning_rate": 0.00027635274329597663, "loss": 1.1518, "step": 944 }, { "epoch": 0.26334122892573497, "grad_norm": 0.03249448577743148, "learning_rate": 0.00027627403253285154, "loss": 1.2084, "step": 945 }, { "epoch": 0.26361989689285215, "grad_norm": 0.034241054409852575, "learning_rate": 0.00027619520223951975, "loss": 1.2735, "step": 946 }, { "epoch": 0.2638985648599693, "grad_norm": 0.026717677903242972, "learning_rate": 0.0002761162524906013, "loss": 1.2038, "step": 947 }, { "epoch": 0.26417723282708655, "grad_norm": 0.030434258892648444, "learning_rate": 0.0002760371833608297, "loss": 1.139, "step": 948 }, { "epoch": 0.2644559007942037, "grad_norm": 0.030764306268186162, "learning_rate": 0.00027595799492505127, "loss": 1.1778, "step": 949 }, { "epoch": 0.2647345687613209, "grad_norm": 0.02705070122581133, "learning_rate": 0.0002758786872582252, "loss": 1.2507, "step": 950 }, { "epoch": 0.2650132367284381, "grad_norm": 0.031505273734383554, "learning_rate": 0.0002757992604354237, "loss": 1.1906, "step": 951 }, { "epoch": 0.26529190469555525, "grad_norm": 0.028879769145302357, "learning_rate": 0.00027571971453183166, "loss": 1.197, "step": 952 }, { "epoch": 0.2655705726626724, "grad_norm": 0.03090529910324884, "learning_rate": 0.00027564004962274684, "loss": 1.1652, "step": 953 }, { "epoch": 0.2658492406297896, "grad_norm": 0.02946708094268516, "learning_rate": 0.00027556026578357944, "loss": 1.1537, "step": 954 }, { "epoch": 0.26612790859690677, "grad_norm": 0.02725814996265266, "learning_rate": 0.0002754803630898523, "loss": 1.1691, "step": 955 }, { "epoch": 0.26640657656402394, "grad_norm": 0.03233451104033823, "learning_rate": 0.0002754003416172009, "loss": 1.1655, "step": 956 }, { "epoch": 0.2666852445311411, "grad_norm": 0.03089859033662423, "learning_rate": 0.0002753202014413731, "loss": 1.2183, "step": 957 }, { "epoch": 0.26696391249825835, "grad_norm": 0.026540249288881622, "learning_rate": 0.00027523994263822896, "loss": 1.2607, "step": 958 }, { "epoch": 0.2672425804653755, "grad_norm": 0.03145233041490099, "learning_rate": 0.00027515956528374107, "loss": 1.1692, "step": 959 }, { "epoch": 0.2675212484324927, "grad_norm": 0.02799595876177431, "learning_rate": 0.000275079069453994, "loss": 1.1774, "step": 960 }, { "epoch": 0.26779991639960987, "grad_norm": 0.029693993872239297, "learning_rate": 0.0002749984552251848, "loss": 1.2115, "step": 961 }, { "epoch": 0.26807858436672705, "grad_norm": 0.02897978240410119, "learning_rate": 0.0002749177226736222, "loss": 1.1804, "step": 962 }, { "epoch": 0.2683572523338442, "grad_norm": 0.028232384822610628, "learning_rate": 0.0002748368718757272, "loss": 1.1728, "step": 963 }, { "epoch": 0.2686359203009614, "grad_norm": 0.02987320366833796, "learning_rate": 0.0002747559029080327, "loss": 1.1582, "step": 964 }, { "epoch": 0.26891458826807857, "grad_norm": 0.030782014512188594, "learning_rate": 0.00027467481584718336, "loss": 1.1766, "step": 965 }, { "epoch": 0.26919325623519574, "grad_norm": 0.027888049297504814, "learning_rate": 0.00027459361076993566, "loss": 1.1421, "step": 966 }, { "epoch": 0.2694719242023129, "grad_norm": 0.030661239868919655, "learning_rate": 0.0002745122877531579, "loss": 1.2321, "step": 967 }, { "epoch": 0.26975059216943015, "grad_norm": 0.026422423381665432, "learning_rate": 0.00027443084687382984, "loss": 1.1729, "step": 968 }, { "epoch": 0.2700292601365473, "grad_norm": 0.02898916254852715, "learning_rate": 0.000274349288209043, "loss": 1.223, "step": 969 }, { "epoch": 0.2703079281036645, "grad_norm": 0.03246135640834918, "learning_rate": 0.0002742676118360003, "loss": 1.1604, "step": 970 }, { "epoch": 0.27058659607078167, "grad_norm": 0.0296581963026962, "learning_rate": 0.000274185817832016, "loss": 1.1875, "step": 971 }, { "epoch": 0.27086526403789885, "grad_norm": 0.028128358293463884, "learning_rate": 0.0002741039062745158, "loss": 1.1625, "step": 972 }, { "epoch": 0.271143932005016, "grad_norm": 0.026173468876550126, "learning_rate": 0.00027402187724103675, "loss": 1.1598, "step": 973 }, { "epoch": 0.2714225999721332, "grad_norm": 0.0332652717467563, "learning_rate": 0.00027393973080922694, "loss": 1.1801, "step": 974 }, { "epoch": 0.27170126793925037, "grad_norm": 0.03391541875595992, "learning_rate": 0.0002738574670568457, "loss": 1.1912, "step": 975 }, { "epoch": 0.27197993590636754, "grad_norm": 0.03296442106715249, "learning_rate": 0.0002737750860617634, "loss": 1.2053, "step": 976 }, { "epoch": 0.2722586038734847, "grad_norm": 0.02902626882744729, "learning_rate": 0.0002736925879019613, "loss": 1.2253, "step": 977 }, { "epoch": 0.27253727184060195, "grad_norm": 0.03463531154540163, "learning_rate": 0.0002736099726555317, "loss": 1.2077, "step": 978 }, { "epoch": 0.2728159398077191, "grad_norm": 0.03162445089814455, "learning_rate": 0.0002735272404006777, "loss": 1.1765, "step": 979 }, { "epoch": 0.2730946077748363, "grad_norm": 0.02910282058232896, "learning_rate": 0.000273444391215713, "loss": 1.1635, "step": 980 }, { "epoch": 0.27337327574195347, "grad_norm": 0.02814476684970541, "learning_rate": 0.00027336142517906233, "loss": 1.2228, "step": 981 }, { "epoch": 0.27365194370907064, "grad_norm": 0.0265117790921299, "learning_rate": 0.00027327834236926066, "loss": 1.1742, "step": 982 }, { "epoch": 0.2739306116761878, "grad_norm": 0.029933458849974864, "learning_rate": 0.0002731951428649537, "loss": 1.2701, "step": 983 }, { "epoch": 0.274209279643305, "grad_norm": 0.027904009783295138, "learning_rate": 0.00027311182674489757, "loss": 1.1256, "step": 984 }, { "epoch": 0.27448794761042217, "grad_norm": 0.029540967178923815, "learning_rate": 0.0002730283940879588, "loss": 1.2187, "step": 985 }, { "epoch": 0.27476661557753934, "grad_norm": 0.02825676630236914, "learning_rate": 0.0002729448449731142, "loss": 1.1697, "step": 986 }, { "epoch": 0.2750452835446565, "grad_norm": 0.02861913188526291, "learning_rate": 0.0002728611794794509, "loss": 1.125, "step": 987 }, { "epoch": 0.27532395151177375, "grad_norm": 0.029568251813700413, "learning_rate": 0.00027277739768616604, "loss": 1.1509, "step": 988 }, { "epoch": 0.2756026194788909, "grad_norm": 0.028907402792537756, "learning_rate": 0.00027269349967256695, "loss": 1.2604, "step": 989 }, { "epoch": 0.2758812874460081, "grad_norm": 0.0285814302014814, "learning_rate": 0.00027260948551807105, "loss": 1.1646, "step": 990 }, { "epoch": 0.27615995541312527, "grad_norm": 0.03546692180970777, "learning_rate": 0.00027252535530220554, "loss": 1.1871, "step": 991 }, { "epoch": 0.27643862338024244, "grad_norm": 0.031239708552437805, "learning_rate": 0.00027244110910460757, "loss": 1.2119, "step": 992 }, { "epoch": 0.2767172913473596, "grad_norm": 0.038012009012169466, "learning_rate": 0.0002723567470050241, "loss": 1.2778, "step": 993 }, { "epoch": 0.2769959593144768, "grad_norm": 0.028030658752909922, "learning_rate": 0.0002722722690833117, "loss": 1.1743, "step": 994 }, { "epoch": 0.27727462728159397, "grad_norm": 0.02901582841701563, "learning_rate": 0.00027218767541943665, "loss": 1.1677, "step": 995 }, { "epoch": 0.27755329524871114, "grad_norm": 0.028892985002465746, "learning_rate": 0.00027210296609347484, "loss": 1.152, "step": 996 }, { "epoch": 0.2778319632158283, "grad_norm": 0.027718864682356122, "learning_rate": 0.00027201814118561155, "loss": 1.2311, "step": 997 }, { "epoch": 0.27811063118294554, "grad_norm": 0.030435011110325913, "learning_rate": 0.00027193320077614145, "loss": 1.2123, "step": 998 }, { "epoch": 0.2783892991500627, "grad_norm": 0.029301463829308372, "learning_rate": 0.0002718481449454687, "loss": 1.2082, "step": 999 }, { "epoch": 0.2786679671171799, "grad_norm": 0.02755463412513793, "learning_rate": 0.0002717629737741066, "loss": 1.1972, "step": 1000 }, { "epoch": 0.27894663508429707, "grad_norm": 0.02672880040157707, "learning_rate": 0.0002716776873426775, "loss": 1.1411, "step": 1001 }, { "epoch": 0.27922530305141424, "grad_norm": 0.031467337992614655, "learning_rate": 0.0002715922857319132, "loss": 1.2311, "step": 1002 }, { "epoch": 0.2795039710185314, "grad_norm": 0.026343021994981182, "learning_rate": 0.00027150676902265423, "loss": 1.1919, "step": 1003 }, { "epoch": 0.2797826389856486, "grad_norm": 0.025002833001770464, "learning_rate": 0.00027142113729585013, "loss": 1.1843, "step": 1004 }, { "epoch": 0.28006130695276577, "grad_norm": 0.02727755096181744, "learning_rate": 0.0002713353906325595, "loss": 1.2214, "step": 1005 }, { "epoch": 0.28033997491988294, "grad_norm": 0.029834017755233047, "learning_rate": 0.0002712495291139495, "loss": 1.24, "step": 1006 }, { "epoch": 0.2806186428870001, "grad_norm": 0.02811137002152119, "learning_rate": 0.000271163552821296, "loss": 1.1706, "step": 1007 }, { "epoch": 0.28089731085411734, "grad_norm": 0.031188138260615623, "learning_rate": 0.0002710774618359838, "loss": 1.1807, "step": 1008 }, { "epoch": 0.2811759788212345, "grad_norm": 0.024253755657584832, "learning_rate": 0.000270991256239506, "loss": 1.1968, "step": 1009 }, { "epoch": 0.2814546467883517, "grad_norm": 0.03243878508471089, "learning_rate": 0.0002709049361134643, "loss": 1.2278, "step": 1010 }, { "epoch": 0.28173331475546887, "grad_norm": 0.029329947353422987, "learning_rate": 0.0002708185015395687, "loss": 1.2131, "step": 1011 }, { "epoch": 0.28201198272258604, "grad_norm": 0.02625871117645347, "learning_rate": 0.00027073195259963776, "loss": 1.2031, "step": 1012 }, { "epoch": 0.2822906506897032, "grad_norm": 0.027934067795810084, "learning_rate": 0.00027064528937559816, "loss": 1.1669, "step": 1013 }, { "epoch": 0.2825693186568204, "grad_norm": 0.03265757476064567, "learning_rate": 0.00027055851194948466, "loss": 1.2669, "step": 1014 }, { "epoch": 0.28284798662393756, "grad_norm": 0.027899748348727594, "learning_rate": 0.0002704716204034403, "loss": 1.1443, "step": 1015 }, { "epoch": 0.28312665459105474, "grad_norm": 0.030203452357943066, "learning_rate": 0.000270384614819716, "loss": 1.1713, "step": 1016 }, { "epoch": 0.2834053225581719, "grad_norm": 0.034103313918130054, "learning_rate": 0.0002702974952806708, "loss": 1.2971, "step": 1017 }, { "epoch": 0.28368399052528914, "grad_norm": 0.029663755196579326, "learning_rate": 0.00027021026186877146, "loss": 1.1934, "step": 1018 }, { "epoch": 0.2839626584924063, "grad_norm": 0.0341401948192984, "learning_rate": 0.00027012291466659255, "loss": 1.1694, "step": 1019 }, { "epoch": 0.2842413264595235, "grad_norm": 0.03435181100704848, "learning_rate": 0.0002700354537568164, "loss": 1.2296, "step": 1020 }, { "epoch": 0.28451999442664067, "grad_norm": 0.027794390114510747, "learning_rate": 0.00026994787922223303, "loss": 1.2538, "step": 1021 }, { "epoch": 0.28479866239375784, "grad_norm": 0.02868167667419449, "learning_rate": 0.0002698601911457399, "loss": 1.2212, "step": 1022 }, { "epoch": 0.285077330360875, "grad_norm": 0.030316986214831136, "learning_rate": 0.00026977238961034186, "loss": 1.2128, "step": 1023 }, { "epoch": 0.2853559983279922, "grad_norm": 0.029922906681876478, "learning_rate": 0.0002696844746991515, "loss": 1.1454, "step": 1024 }, { "epoch": 0.28563466629510936, "grad_norm": 0.026359641197984313, "learning_rate": 0.0002695964464953884, "loss": 1.1981, "step": 1025 }, { "epoch": 0.28591333426222654, "grad_norm": 0.028192853602487567, "learning_rate": 0.00026950830508237956, "loss": 1.1953, "step": 1026 }, { "epoch": 0.2861920022293437, "grad_norm": 0.027283902767999347, "learning_rate": 0.00026942005054355906, "loss": 1.2115, "step": 1027 }, { "epoch": 0.28647067019646094, "grad_norm": 0.029955146557610023, "learning_rate": 0.0002693316829624681, "loss": 1.1697, "step": 1028 }, { "epoch": 0.2867493381635781, "grad_norm": 0.027882471919264573, "learning_rate": 0.0002692432024227549, "loss": 1.1922, "step": 1029 }, { "epoch": 0.2870280061306953, "grad_norm": 0.027427515497484626, "learning_rate": 0.00026915460900817453, "loss": 1.199, "step": 1030 }, { "epoch": 0.28730667409781246, "grad_norm": 0.028187169582022113, "learning_rate": 0.000269065902802589, "loss": 1.1896, "step": 1031 }, { "epoch": 0.28758534206492964, "grad_norm": 0.028511618841731454, "learning_rate": 0.00026897708388996707, "loss": 1.1665, "step": 1032 }, { "epoch": 0.2878640100320468, "grad_norm": 0.032896491418762615, "learning_rate": 0.0002688881523543841, "loss": 1.2079, "step": 1033 }, { "epoch": 0.288142677999164, "grad_norm": 0.035650105377530564, "learning_rate": 0.00026879910828002217, "loss": 1.14, "step": 1034 }, { "epoch": 0.28842134596628116, "grad_norm": 0.031200926513565003, "learning_rate": 0.00026870995175116985, "loss": 1.2084, "step": 1035 }, { "epoch": 0.28870001393339834, "grad_norm": 0.024714841048485404, "learning_rate": 0.00026862068285222205, "loss": 1.1192, "step": 1036 }, { "epoch": 0.2889786819005155, "grad_norm": 0.029599936217712668, "learning_rate": 0.00026853130166768033, "loss": 1.1631, "step": 1037 }, { "epoch": 0.28925734986763274, "grad_norm": 0.028385434010538942, "learning_rate": 0.0002684418082821522, "loss": 1.1686, "step": 1038 }, { "epoch": 0.2895360178347499, "grad_norm": 0.027027115829378515, "learning_rate": 0.00026835220278035167, "loss": 1.1779, "step": 1039 }, { "epoch": 0.2898146858018671, "grad_norm": 0.02667227537863127, "learning_rate": 0.0002682624852470986, "loss": 1.2445, "step": 1040 }, { "epoch": 0.29009335376898426, "grad_norm": 0.030023360722093915, "learning_rate": 0.00026817265576731914, "loss": 1.1584, "step": 1041 }, { "epoch": 0.29037202173610144, "grad_norm": 0.026628277353311015, "learning_rate": 0.0002680827144260453, "loss": 1.226, "step": 1042 }, { "epoch": 0.2906506897032186, "grad_norm": 0.030079309950487104, "learning_rate": 0.0002679926613084149, "loss": 1.2036, "step": 1043 }, { "epoch": 0.2909293576703358, "grad_norm": 0.02689753310655362, "learning_rate": 0.0002679024964996718, "loss": 1.2277, "step": 1044 }, { "epoch": 0.29120802563745296, "grad_norm": 0.028045229500240573, "learning_rate": 0.00026781222008516545, "loss": 1.2842, "step": 1045 }, { "epoch": 0.29148669360457014, "grad_norm": 0.030521485669541656, "learning_rate": 0.00026772183215035075, "loss": 1.2061, "step": 1046 }, { "epoch": 0.2917653615716873, "grad_norm": 0.027306059997848117, "learning_rate": 0.0002676313327807885, "loss": 1.2098, "step": 1047 }, { "epoch": 0.29204402953880454, "grad_norm": 0.03197891544109674, "learning_rate": 0.0002675407220621448, "loss": 1.132, "step": 1048 }, { "epoch": 0.2923226975059217, "grad_norm": 0.02743865239814558, "learning_rate": 0.0002674500000801912, "loss": 1.1857, "step": 1049 }, { "epoch": 0.2926013654730389, "grad_norm": 0.028000480343765156, "learning_rate": 0.00026735916692080454, "loss": 1.2053, "step": 1050 }, { "epoch": 0.29288003344015606, "grad_norm": 0.034674860942841924, "learning_rate": 0.0002672682226699669, "loss": 1.2436, "step": 1051 }, { "epoch": 0.29315870140727324, "grad_norm": 0.03759683852130606, "learning_rate": 0.00026717716741376564, "loss": 1.2264, "step": 1052 }, { "epoch": 0.2934373693743904, "grad_norm": 0.03044379814460284, "learning_rate": 0.00026708600123839294, "loss": 1.1842, "step": 1053 }, { "epoch": 0.2937160373415076, "grad_norm": 0.027679175731263474, "learning_rate": 0.00026699472423014623, "loss": 1.1405, "step": 1054 }, { "epoch": 0.29399470530862476, "grad_norm": 0.025439743019130337, "learning_rate": 0.00026690333647542774, "loss": 1.2231, "step": 1055 }, { "epoch": 0.29427337327574193, "grad_norm": 0.02873834202035625, "learning_rate": 0.0002668118380607445, "loss": 1.1947, "step": 1056 }, { "epoch": 0.2945520412428591, "grad_norm": 0.028899477368393035, "learning_rate": 0.0002667202290727084, "loss": 1.2176, "step": 1057 }, { "epoch": 0.29483070920997634, "grad_norm": 0.03350365206368117, "learning_rate": 0.0002666285095980359, "loss": 1.2005, "step": 1058 }, { "epoch": 0.2951093771770935, "grad_norm": 0.026626001359168643, "learning_rate": 0.00026653667972354806, "loss": 1.2098, "step": 1059 }, { "epoch": 0.2953880451442107, "grad_norm": 0.026391474875532884, "learning_rate": 0.00026644473953617054, "loss": 1.2641, "step": 1060 }, { "epoch": 0.29566671311132786, "grad_norm": 0.026562463731344705, "learning_rate": 0.0002663526891229332, "loss": 1.2024, "step": 1061 }, { "epoch": 0.29594538107844504, "grad_norm": 0.027261888037712616, "learning_rate": 0.00026626052857097056, "loss": 1.0983, "step": 1062 }, { "epoch": 0.2962240490455622, "grad_norm": 0.02996025572138953, "learning_rate": 0.0002661682579675211, "loss": 1.1911, "step": 1063 }, { "epoch": 0.2965027170126794, "grad_norm": 0.02584383105060872, "learning_rate": 0.00026607587739992767, "loss": 1.1602, "step": 1064 }, { "epoch": 0.29678138497979656, "grad_norm": 0.027081009778411932, "learning_rate": 0.00026598338695563704, "loss": 1.1812, "step": 1065 }, { "epoch": 0.29706005294691373, "grad_norm": 0.027925500858472063, "learning_rate": 0.0002658907867222001, "loss": 1.1976, "step": 1066 }, { "epoch": 0.2973387209140309, "grad_norm": 0.027066525676267245, "learning_rate": 0.0002657980767872717, "loss": 1.2854, "step": 1067 }, { "epoch": 0.29761738888114814, "grad_norm": 0.030424860580067888, "learning_rate": 0.0002657052572386106, "loss": 1.1294, "step": 1068 }, { "epoch": 0.2978960568482653, "grad_norm": 0.028531332302489786, "learning_rate": 0.00026561232816407895, "loss": 1.1934, "step": 1069 }, { "epoch": 0.2981747248153825, "grad_norm": 0.026578124712567232, "learning_rate": 0.000265519289651643, "loss": 1.1701, "step": 1070 }, { "epoch": 0.29845339278249966, "grad_norm": 0.026935472609211145, "learning_rate": 0.0002654261417893723, "loss": 1.234, "step": 1071 }, { "epoch": 0.29873206074961683, "grad_norm": 0.027626767266223762, "learning_rate": 0.0002653328846654402, "loss": 1.2687, "step": 1072 }, { "epoch": 0.299010728716734, "grad_norm": 0.028907131164565673, "learning_rate": 0.0002652395183681232, "loss": 1.1665, "step": 1073 }, { "epoch": 0.2992893966838512, "grad_norm": 0.024535648890856267, "learning_rate": 0.0002651460429858012, "loss": 1.1317, "step": 1074 }, { "epoch": 0.29956806465096836, "grad_norm": 0.02677212618712895, "learning_rate": 0.00026505245860695747, "loss": 1.0948, "step": 1075 }, { "epoch": 0.29984673261808553, "grad_norm": 0.027068551017338092, "learning_rate": 0.00026495876532017847, "loss": 1.1692, "step": 1076 }, { "epoch": 0.3001254005852027, "grad_norm": 0.027640372830409467, "learning_rate": 0.0002648649632141536, "loss": 1.2435, "step": 1077 }, { "epoch": 0.30040406855231994, "grad_norm": 0.025888008412051265, "learning_rate": 0.0002647710523776753, "loss": 1.1843, "step": 1078 }, { "epoch": 0.3006827365194371, "grad_norm": 0.0284852050886714, "learning_rate": 0.00026467703289963907, "loss": 1.1856, "step": 1079 }, { "epoch": 0.3009614044865543, "grad_norm": 0.026235770799366617, "learning_rate": 0.0002645829048690431, "loss": 1.2491, "step": 1080 }, { "epoch": 0.30124007245367146, "grad_norm": 0.028010433296734, "learning_rate": 0.00026448866837498844, "loss": 1.1675, "step": 1081 }, { "epoch": 0.30151874042078863, "grad_norm": 0.027488849963098933, "learning_rate": 0.0002643943235066787, "loss": 1.1735, "step": 1082 }, { "epoch": 0.3017974083879058, "grad_norm": 0.02913451868953299, "learning_rate": 0.0002642998703534202, "loss": 1.2069, "step": 1083 }, { "epoch": 0.302076076355023, "grad_norm": 0.028983576794078543, "learning_rate": 0.00026420530900462164, "loss": 1.1647, "step": 1084 }, { "epoch": 0.30235474432214016, "grad_norm": 0.024278013816423163, "learning_rate": 0.00026411063954979427, "loss": 1.1787, "step": 1085 }, { "epoch": 0.30263341228925733, "grad_norm": 0.02934022037729058, "learning_rate": 0.0002640158620785516, "loss": 1.1911, "step": 1086 }, { "epoch": 0.3029120802563745, "grad_norm": 0.02696163672330174, "learning_rate": 0.00026392097668060933, "loss": 1.2415, "step": 1087 }, { "epoch": 0.30319074822349174, "grad_norm": 0.027347418141268738, "learning_rate": 0.0002638259834457854, "loss": 1.1244, "step": 1088 }, { "epoch": 0.3034694161906089, "grad_norm": 0.0322439916739625, "learning_rate": 0.0002637308824639999, "loss": 1.2086, "step": 1089 }, { "epoch": 0.3037480841577261, "grad_norm": 0.027639587719408953, "learning_rate": 0.0002636356738252746, "loss": 1.2209, "step": 1090 }, { "epoch": 0.30402675212484326, "grad_norm": 0.02697274115671578, "learning_rate": 0.0002635403576197336, "loss": 1.1308, "step": 1091 }, { "epoch": 0.30430542009196043, "grad_norm": 0.026347949987039467, "learning_rate": 0.0002634449339376026, "loss": 1.177, "step": 1092 }, { "epoch": 0.3045840880590776, "grad_norm": 0.030302372418001825, "learning_rate": 0.0002633494028692091, "loss": 1.1915, "step": 1093 }, { "epoch": 0.3048627560261948, "grad_norm": 0.03112014194142545, "learning_rate": 0.00026325376450498197, "loss": 1.1549, "step": 1094 }, { "epoch": 0.30514142399331196, "grad_norm": 0.026834320232639985, "learning_rate": 0.00026315801893545215, "loss": 1.1949, "step": 1095 }, { "epoch": 0.30542009196042913, "grad_norm": 0.02656317023963919, "learning_rate": 0.0002630621662512517, "loss": 1.1949, "step": 1096 }, { "epoch": 0.3056987599275463, "grad_norm": 0.027272992005802323, "learning_rate": 0.0002629662065431141, "loss": 1.1412, "step": 1097 }, { "epoch": 0.30597742789466353, "grad_norm": 0.030411682429310697, "learning_rate": 0.00026287013990187433, "loss": 1.1707, "step": 1098 }, { "epoch": 0.3062560958617807, "grad_norm": 0.02917294675671277, "learning_rate": 0.00026277396641846836, "loss": 1.1851, "step": 1099 }, { "epoch": 0.3065347638288979, "grad_norm": 0.027877070665800828, "learning_rate": 0.00026267768618393346, "loss": 1.1127, "step": 1100 }, { "epoch": 0.30681343179601506, "grad_norm": 0.02975720506468015, "learning_rate": 0.00026258129928940794, "loss": 1.1486, "step": 1101 }, { "epoch": 0.30709209976313223, "grad_norm": 0.028111150117244636, "learning_rate": 0.000262484805826131, "loss": 1.2039, "step": 1102 }, { "epoch": 0.3073707677302494, "grad_norm": 0.028380522080340267, "learning_rate": 0.0002623882058854427, "loss": 1.1506, "step": 1103 }, { "epoch": 0.3076494356973666, "grad_norm": 0.026151119004527674, "learning_rate": 0.00026229149955878393, "loss": 1.1515, "step": 1104 }, { "epoch": 0.30792810366448375, "grad_norm": 0.029809729084949797, "learning_rate": 0.0002621946869376964, "loss": 1.1361, "step": 1105 }, { "epoch": 0.30820677163160093, "grad_norm": 0.028563357995502647, "learning_rate": 0.0002620977681138222, "loss": 1.1744, "step": 1106 }, { "epoch": 0.3084854395987181, "grad_norm": 0.029740106994492515, "learning_rate": 0.0002620007431789042, "loss": 1.1995, "step": 1107 }, { "epoch": 0.30876410756583533, "grad_norm": 0.024857241627236824, "learning_rate": 0.0002619036122247855, "loss": 1.1729, "step": 1108 }, { "epoch": 0.3090427755329525, "grad_norm": 0.0315272492662345, "learning_rate": 0.0002618063753434097, "loss": 1.1902, "step": 1109 }, { "epoch": 0.3093214435000697, "grad_norm": 0.030720630492385523, "learning_rate": 0.00026170903262682057, "loss": 1.2113, "step": 1110 }, { "epoch": 0.30960011146718686, "grad_norm": 0.02753562671930939, "learning_rate": 0.0002616115841671622, "loss": 1.1887, "step": 1111 }, { "epoch": 0.30987877943430403, "grad_norm": 0.02828184344249186, "learning_rate": 0.00026151403005667857, "loss": 1.1696, "step": 1112 }, { "epoch": 0.3101574474014212, "grad_norm": 0.02697992863937695, "learning_rate": 0.0002614163703877139, "loss": 1.2221, "step": 1113 }, { "epoch": 0.3104361153685384, "grad_norm": 0.02579925793465696, "learning_rate": 0.00026131860525271223, "loss": 1.1233, "step": 1114 }, { "epoch": 0.31071478333565555, "grad_norm": 0.028007871315908466, "learning_rate": 0.00026122073474421734, "loss": 1.2226, "step": 1115 }, { "epoch": 0.3109934513027727, "grad_norm": 0.02646301391961401, "learning_rate": 0.0002611227589548729, "loss": 1.1821, "step": 1116 }, { "epoch": 0.3112721192698899, "grad_norm": 0.03014879395346492, "learning_rate": 0.0002610246779774221, "loss": 1.1453, "step": 1117 }, { "epoch": 0.31155078723700713, "grad_norm": 0.031148086274540607, "learning_rate": 0.0002609264919047078, "loss": 1.1978, "step": 1118 }, { "epoch": 0.3118294552041243, "grad_norm": 0.030165113845583572, "learning_rate": 0.00026082820082967245, "loss": 1.2064, "step": 1119 }, { "epoch": 0.3121081231712415, "grad_norm": 0.02938487665888267, "learning_rate": 0.0002607298048453576, "loss": 1.1453, "step": 1120 }, { "epoch": 0.31238679113835865, "grad_norm": 0.030041755297738032, "learning_rate": 0.0002606313040449043, "loss": 1.191, "step": 1121 }, { "epoch": 0.31266545910547583, "grad_norm": 0.027232670097278762, "learning_rate": 0.0002605326985215529, "loss": 1.2236, "step": 1122 }, { "epoch": 0.312944127072593, "grad_norm": 0.031170990597912382, "learning_rate": 0.0002604339883686427, "loss": 1.2212, "step": 1123 }, { "epoch": 0.3132227950397102, "grad_norm": 0.028837036490614726, "learning_rate": 0.0002603351736796121, "loss": 1.2315, "step": 1124 }, { "epoch": 0.31350146300682735, "grad_norm": 0.02680611743364442, "learning_rate": 0.0002602362545479984, "loss": 1.1807, "step": 1125 }, { "epoch": 0.3137801309739445, "grad_norm": 0.02592393469350422, "learning_rate": 0.00026013723106743794, "loss": 1.2157, "step": 1126 }, { "epoch": 0.3140587989410617, "grad_norm": 0.028184711944313823, "learning_rate": 0.00026003810333166574, "loss": 1.1369, "step": 1127 }, { "epoch": 0.31433746690817893, "grad_norm": 0.02834001960695853, "learning_rate": 0.00025993887143451535, "loss": 1.2169, "step": 1128 }, { "epoch": 0.3146161348752961, "grad_norm": 0.026042011435554135, "learning_rate": 0.0002598395354699192, "loss": 1.1414, "step": 1129 }, { "epoch": 0.3148948028424133, "grad_norm": 0.02934782344183197, "learning_rate": 0.000259740095531908, "loss": 1.1553, "step": 1130 }, { "epoch": 0.31517347080953045, "grad_norm": 0.026886256861881244, "learning_rate": 0.00025964055171461106, "loss": 1.1699, "step": 1131 }, { "epoch": 0.31545213877664763, "grad_norm": 0.02566502456156286, "learning_rate": 0.00025954090411225587, "loss": 1.2165, "step": 1132 }, { "epoch": 0.3157308067437648, "grad_norm": 0.0270692613964059, "learning_rate": 0.00025944115281916825, "loss": 1.1982, "step": 1133 }, { "epoch": 0.316009474710882, "grad_norm": 0.03296409226740868, "learning_rate": 0.0002593412979297721, "loss": 1.2164, "step": 1134 }, { "epoch": 0.31628814267799915, "grad_norm": 0.03362938080061514, "learning_rate": 0.0002592413395385895, "loss": 1.2023, "step": 1135 }, { "epoch": 0.3165668106451163, "grad_norm": 0.026033775969653672, "learning_rate": 0.00025914127774024037, "loss": 1.1312, "step": 1136 }, { "epoch": 0.3168454786122335, "grad_norm": 0.028415597547764914, "learning_rate": 0.00025904111262944267, "loss": 1.2757, "step": 1137 }, { "epoch": 0.31712414657935073, "grad_norm": 0.027947521390718107, "learning_rate": 0.00025894084430101195, "loss": 1.1721, "step": 1138 }, { "epoch": 0.3174028145464679, "grad_norm": 0.03292795460014622, "learning_rate": 0.0002588404728498617, "loss": 1.2272, "step": 1139 }, { "epoch": 0.3176814825135851, "grad_norm": 0.027692578163460095, "learning_rate": 0.0002587399983710028, "loss": 1.2492, "step": 1140 }, { "epoch": 0.31796015048070225, "grad_norm": 0.027743623259755264, "learning_rate": 0.0002586394209595438, "loss": 1.1791, "step": 1141 }, { "epoch": 0.3182388184478194, "grad_norm": 0.029460949964223448, "learning_rate": 0.0002585387407106907, "loss": 1.1502, "step": 1142 }, { "epoch": 0.3185174864149366, "grad_norm": 0.026608604277111144, "learning_rate": 0.0002584379577197467, "loss": 1.2039, "step": 1143 }, { "epoch": 0.3187961543820538, "grad_norm": 0.02657757983609727, "learning_rate": 0.00025833707208211244, "loss": 1.1834, "step": 1144 }, { "epoch": 0.31907482234917095, "grad_norm": 0.02375526713109457, "learning_rate": 0.00025823608389328556, "loss": 1.1484, "step": 1145 }, { "epoch": 0.3193534903162881, "grad_norm": 0.026876559415468806, "learning_rate": 0.00025813499324886085, "loss": 1.2871, "step": 1146 }, { "epoch": 0.3196321582834053, "grad_norm": 0.02865945741999037, "learning_rate": 0.0002580338002445301, "loss": 1.1431, "step": 1147 }, { "epoch": 0.31991082625052253, "grad_norm": 0.026407292929078917, "learning_rate": 0.00025793250497608205, "loss": 1.181, "step": 1148 }, { "epoch": 0.3201894942176397, "grad_norm": 0.028192951818898963, "learning_rate": 0.000257831107539402, "loss": 1.1615, "step": 1149 }, { "epoch": 0.3204681621847569, "grad_norm": 0.02651905873431019, "learning_rate": 0.00025772960803047227, "loss": 1.161, "step": 1150 }, { "epoch": 0.32074683015187405, "grad_norm": 0.02623316451438844, "learning_rate": 0.0002576280065453716, "loss": 1.2041, "step": 1151 }, { "epoch": 0.3210254981189912, "grad_norm": 0.026072342200468147, "learning_rate": 0.00025752630318027527, "loss": 1.1295, "step": 1152 }, { "epoch": 0.3213041660861084, "grad_norm": 0.027763748648163285, "learning_rate": 0.0002574244980314551, "loss": 1.1679, "step": 1153 }, { "epoch": 0.3215828340532256, "grad_norm": 0.02603945411098843, "learning_rate": 0.0002573225911952793, "loss": 1.1635, "step": 1154 }, { "epoch": 0.32186150202034275, "grad_norm": 0.025954294415624652, "learning_rate": 0.00025722058276821206, "loss": 1.1981, "step": 1155 }, { "epoch": 0.3221401699874599, "grad_norm": 0.025933302811104664, "learning_rate": 0.00025711847284681403, "loss": 1.1764, "step": 1156 }, { "epoch": 0.3224188379545771, "grad_norm": 0.026012083583245513, "learning_rate": 0.0002570162615277418, "loss": 1.1902, "step": 1157 }, { "epoch": 0.3226975059216943, "grad_norm": 0.02655236852551799, "learning_rate": 0.00025691394890774795, "loss": 1.1964, "step": 1158 }, { "epoch": 0.3229761738888115, "grad_norm": 0.029622681686559637, "learning_rate": 0.000256811535083681, "loss": 1.1545, "step": 1159 }, { "epoch": 0.3232548418559287, "grad_norm": 0.03189708644052946, "learning_rate": 0.00025670902015248513, "loss": 1.178, "step": 1160 }, { "epoch": 0.32353350982304585, "grad_norm": 0.027253226629113045, "learning_rate": 0.00025660640421120047, "loss": 1.2245, "step": 1161 }, { "epoch": 0.323812177790163, "grad_norm": 0.026705581159381053, "learning_rate": 0.0002565036873569625, "loss": 1.1098, "step": 1162 }, { "epoch": 0.3240908457572802, "grad_norm": 0.03822618869384687, "learning_rate": 0.0002564008696870025, "loss": 1.2074, "step": 1163 }, { "epoch": 0.3243695137243974, "grad_norm": 0.032169070680800986, "learning_rate": 0.00025629795129864685, "loss": 1.1963, "step": 1164 }, { "epoch": 0.32464818169151455, "grad_norm": 0.028352281201035185, "learning_rate": 0.0002561949322893176, "loss": 1.1757, "step": 1165 }, { "epoch": 0.3249268496586317, "grad_norm": 0.02541456746791608, "learning_rate": 0.0002560918127565318, "loss": 1.1084, "step": 1166 }, { "epoch": 0.3252055176257489, "grad_norm": 0.030048323742358954, "learning_rate": 0.00025598859279790177, "loss": 1.1536, "step": 1167 }, { "epoch": 0.3254841855928661, "grad_norm": 0.028175308717411047, "learning_rate": 0.000255885272511135, "loss": 1.1929, "step": 1168 }, { "epoch": 0.3257628535599833, "grad_norm": 0.024811996961344927, "learning_rate": 0.0002557818519940336, "loss": 1.2202, "step": 1169 }, { "epoch": 0.3260415215271005, "grad_norm": 0.029269162161675185, "learning_rate": 0.000255678331344495, "loss": 1.2487, "step": 1170 }, { "epoch": 0.32632018949421765, "grad_norm": 0.02632871261807094, "learning_rate": 0.0002555747106605111, "loss": 1.1392, "step": 1171 }, { "epoch": 0.3265988574613348, "grad_norm": 0.024989422305669473, "learning_rate": 0.00025547099004016855, "loss": 1.2056, "step": 1172 }, { "epoch": 0.326877525428452, "grad_norm": 0.024978762680764825, "learning_rate": 0.0002553671695816487, "loss": 1.1682, "step": 1173 }, { "epoch": 0.3271561933955692, "grad_norm": 0.02556089646857691, "learning_rate": 0.00025526324938322727, "loss": 1.1788, "step": 1174 }, { "epoch": 0.32743486136268635, "grad_norm": 0.02501941632757472, "learning_rate": 0.00025515922954327455, "loss": 1.1614, "step": 1175 }, { "epoch": 0.3277135293298035, "grad_norm": 0.025914741925286842, "learning_rate": 0.00025505511016025507, "loss": 1.2097, "step": 1176 }, { "epoch": 0.3279921972969207, "grad_norm": 0.02604809485953847, "learning_rate": 0.00025495089133272756, "loss": 1.1677, "step": 1177 }, { "epoch": 0.3282708652640379, "grad_norm": 0.02665786421256033, "learning_rate": 0.0002548465731593449, "loss": 1.2348, "step": 1178 }, { "epoch": 0.3285495332311551, "grad_norm": 0.026831924123172868, "learning_rate": 0.000254742155738854, "loss": 1.1633, "step": 1179 }, { "epoch": 0.3288282011982723, "grad_norm": 0.025247018712768698, "learning_rate": 0.00025463763917009575, "loss": 1.1671, "step": 1180 }, { "epoch": 0.32910686916538945, "grad_norm": 0.027237658587235078, "learning_rate": 0.0002545330235520049, "loss": 1.1921, "step": 1181 }, { "epoch": 0.3293855371325066, "grad_norm": 0.027414324617426118, "learning_rate": 0.00025442830898361, "loss": 1.2056, "step": 1182 }, { "epoch": 0.3296642050996238, "grad_norm": 0.025759287829976805, "learning_rate": 0.000254323495564033, "loss": 1.1711, "step": 1183 }, { "epoch": 0.32994287306674097, "grad_norm": 0.0271844498769646, "learning_rate": 0.00025421858339248976, "loss": 1.2059, "step": 1184 }, { "epoch": 0.33022154103385815, "grad_norm": 0.02766460317561159, "learning_rate": 0.0002541135725682895, "loss": 1.1756, "step": 1185 }, { "epoch": 0.3305002090009753, "grad_norm": 0.026529616243204865, "learning_rate": 0.00025400846319083477, "loss": 1.2061, "step": 1186 }, { "epoch": 0.3307788769680925, "grad_norm": 0.024154915325164866, "learning_rate": 0.0002539032553596215, "loss": 1.2164, "step": 1187 }, { "epoch": 0.3310575449352097, "grad_norm": 0.027141767977797923, "learning_rate": 0.0002537979491742387, "loss": 1.1923, "step": 1188 }, { "epoch": 0.3313362129023269, "grad_norm": 0.02996622925353945, "learning_rate": 0.0002536925447343686, "loss": 1.21, "step": 1189 }, { "epoch": 0.3316148808694441, "grad_norm": 0.027020574705353708, "learning_rate": 0.0002535870421397863, "loss": 1.1467, "step": 1190 }, { "epoch": 0.33189354883656125, "grad_norm": 0.02884628302556411, "learning_rate": 0.0002534814414903599, "loss": 1.2137, "step": 1191 }, { "epoch": 0.3321722168036784, "grad_norm": 0.027123446380337896, "learning_rate": 0.00025337574288605044, "loss": 1.1941, "step": 1192 }, { "epoch": 0.3324508847707956, "grad_norm": 0.029821110662198128, "learning_rate": 0.00025326994642691143, "loss": 1.1882, "step": 1193 }, { "epoch": 0.33272955273791277, "grad_norm": 0.028609952233325863, "learning_rate": 0.00025316405221308916, "loss": 1.1887, "step": 1194 }, { "epoch": 0.33300822070502994, "grad_norm": 0.025973554284255223, "learning_rate": 0.00025305806034482247, "loss": 1.1513, "step": 1195 }, { "epoch": 0.3332868886721471, "grad_norm": 0.026387201919282576, "learning_rate": 0.00025295197092244255, "loss": 1.0912, "step": 1196 }, { "epoch": 0.3335655566392643, "grad_norm": 0.02680739827173711, "learning_rate": 0.000252845784046373, "loss": 1.1488, "step": 1197 }, { "epoch": 0.3338442246063815, "grad_norm": 0.026986110878546587, "learning_rate": 0.00025273949981712967, "loss": 1.2997, "step": 1198 }, { "epoch": 0.3341228925734987, "grad_norm": 0.02957001913453815, "learning_rate": 0.0002526331183353205, "loss": 1.1816, "step": 1199 }, { "epoch": 0.33440156054061587, "grad_norm": 0.025242953115745904, "learning_rate": 0.0002525266397016456, "loss": 1.1525, "step": 1200 }, { "epoch": 0.33468022850773305, "grad_norm": 0.027154950227969616, "learning_rate": 0.00025242006401689687, "loss": 1.2575, "step": 1201 }, { "epoch": 0.3349588964748502, "grad_norm": 0.027862995534221616, "learning_rate": 0.00025231339138195825, "loss": 1.1557, "step": 1202 }, { "epoch": 0.3352375644419674, "grad_norm": 0.025667957663896856, "learning_rate": 0.0002522066218978054, "loss": 1.1546, "step": 1203 }, { "epoch": 0.33551623240908457, "grad_norm": 0.027454677574200323, "learning_rate": 0.00025209975566550563, "loss": 1.1729, "step": 1204 }, { "epoch": 0.33579490037620174, "grad_norm": 0.027905468784740563, "learning_rate": 0.0002519927927862179, "loss": 1.1754, "step": 1205 }, { "epoch": 0.3360735683433189, "grad_norm": 0.026444895008419404, "learning_rate": 0.0002518857333611925, "loss": 1.2001, "step": 1206 }, { "epoch": 0.3363522363104361, "grad_norm": 0.02556413002293826, "learning_rate": 0.00025177857749177123, "loss": 1.0945, "step": 1207 }, { "epoch": 0.3366309042775533, "grad_norm": 0.028348713617129654, "learning_rate": 0.0002516713252793872, "loss": 1.1462, "step": 1208 }, { "epoch": 0.3369095722446705, "grad_norm": 0.027645269587359522, "learning_rate": 0.0002515639768255647, "loss": 1.1343, "step": 1209 }, { "epoch": 0.33718824021178767, "grad_norm": 0.029729507131085887, "learning_rate": 0.000251456532231919, "loss": 1.2275, "step": 1210 }, { "epoch": 0.33746690817890485, "grad_norm": 0.03034803354390931, "learning_rate": 0.0002513489916001566, "loss": 1.2013, "step": 1211 }, { "epoch": 0.337745576146022, "grad_norm": 0.03242317923275021, "learning_rate": 0.0002512413550320748, "loss": 1.1892, "step": 1212 }, { "epoch": 0.3380242441131392, "grad_norm": 0.027237485657779427, "learning_rate": 0.0002511336226295616, "loss": 1.1796, "step": 1213 }, { "epoch": 0.33830291208025637, "grad_norm": 0.027294005580943737, "learning_rate": 0.0002510257944945958, "loss": 1.1429, "step": 1214 }, { "epoch": 0.33858158004737354, "grad_norm": 0.02770788879216525, "learning_rate": 0.00025091787072924687, "loss": 1.2451, "step": 1215 }, { "epoch": 0.3388602480144907, "grad_norm": 0.026239414297351465, "learning_rate": 0.00025080985143567484, "loss": 1.1074, "step": 1216 }, { "epoch": 0.3391389159816079, "grad_norm": 0.02975469055628978, "learning_rate": 0.00025070173671613003, "loss": 1.1795, "step": 1217 }, { "epoch": 0.3394175839487251, "grad_norm": 0.027918124130413283, "learning_rate": 0.0002505935266729532, "loss": 1.1704, "step": 1218 }, { "epoch": 0.3396962519158423, "grad_norm": 0.025755198248822973, "learning_rate": 0.00025048522140857523, "loss": 1.1479, "step": 1219 }, { "epoch": 0.33997491988295947, "grad_norm": 0.030566578835397287, "learning_rate": 0.0002503768210255173, "loss": 1.1978, "step": 1220 }, { "epoch": 0.34025358785007664, "grad_norm": 0.02638484834917839, "learning_rate": 0.0002502683256263904, "loss": 1.1729, "step": 1221 }, { "epoch": 0.3405322558171938, "grad_norm": 0.02714693754935073, "learning_rate": 0.0002501597353138957, "loss": 1.1938, "step": 1222 }, { "epoch": 0.340810923784311, "grad_norm": 0.02619134891720077, "learning_rate": 0.00025005105019082404, "loss": 1.1608, "step": 1223 }, { "epoch": 0.34108959175142817, "grad_norm": 0.029287248552578707, "learning_rate": 0.00024994227036005613, "loss": 1.2625, "step": 1224 }, { "epoch": 0.34136825971854534, "grad_norm": 0.02555862968977744, "learning_rate": 0.00024983339592456226, "loss": 1.2416, "step": 1225 }, { "epoch": 0.3416469276856625, "grad_norm": 0.026798062669092154, "learning_rate": 0.0002497244269874023, "loss": 1.1838, "step": 1226 }, { "epoch": 0.3419255956527797, "grad_norm": 0.027707777371291018, "learning_rate": 0.00024961536365172555, "loss": 1.2028, "step": 1227 }, { "epoch": 0.3422042636198969, "grad_norm": 0.024508149636616648, "learning_rate": 0.0002495062060207706, "loss": 1.1723, "step": 1228 }, { "epoch": 0.3424829315870141, "grad_norm": 0.028213228039885824, "learning_rate": 0.0002493969541978655, "loss": 1.1473, "step": 1229 }, { "epoch": 0.34276159955413127, "grad_norm": 0.024838377592872295, "learning_rate": 0.00024928760828642736, "loss": 1.192, "step": 1230 }, { "epoch": 0.34304026752124844, "grad_norm": 0.02657367899051005, "learning_rate": 0.0002491781683899622, "loss": 1.161, "step": 1231 }, { "epoch": 0.3433189354883656, "grad_norm": 0.025275941161473133, "learning_rate": 0.00024906863461206523, "loss": 1.129, "step": 1232 }, { "epoch": 0.3435976034554828, "grad_norm": 0.02518508628346297, "learning_rate": 0.0002489590070564204, "loss": 1.2086, "step": 1233 }, { "epoch": 0.34387627142259997, "grad_norm": 0.026489454789510435, "learning_rate": 0.0002488492858268004, "loss": 1.1797, "step": 1234 }, { "epoch": 0.34415493938971714, "grad_norm": 0.02753010633136901, "learning_rate": 0.00024873947102706684, "loss": 1.1531, "step": 1235 }, { "epoch": 0.3444336073568343, "grad_norm": 0.02645062625539858, "learning_rate": 0.0002486295627611696, "loss": 1.1536, "step": 1236 }, { "epoch": 0.3447122753239515, "grad_norm": 0.03048530998898532, "learning_rate": 0.0002485195611331471, "loss": 1.3358, "step": 1237 }, { "epoch": 0.3449909432910687, "grad_norm": 0.02504201017127965, "learning_rate": 0.00024840946624712636, "loss": 1.2022, "step": 1238 }, { "epoch": 0.3452696112581859, "grad_norm": 0.029531895310768663, "learning_rate": 0.00024829927820732236, "loss": 1.175, "step": 1239 }, { "epoch": 0.34554827922530307, "grad_norm": 0.02593715602043461, "learning_rate": 0.00024818899711803846, "loss": 1.1969, "step": 1240 }, { "epoch": 0.34582694719242024, "grad_norm": 0.025515769042100716, "learning_rate": 0.00024807862308366603, "loss": 1.2083, "step": 1241 }, { "epoch": 0.3461056151595374, "grad_norm": 0.026873436110842218, "learning_rate": 0.0002479681562086845, "loss": 1.1533, "step": 1242 }, { "epoch": 0.3463842831266546, "grad_norm": 0.02793592282256251, "learning_rate": 0.00024785759659766107, "loss": 1.1962, "step": 1243 }, { "epoch": 0.34666295109377177, "grad_norm": 0.028657786925261584, "learning_rate": 0.0002477469443552507, "loss": 1.1662, "step": 1244 }, { "epoch": 0.34694161906088894, "grad_norm": 0.028855727016098193, "learning_rate": 0.00024763619958619623, "loss": 1.1744, "step": 1245 }, { "epoch": 0.3472202870280061, "grad_norm": 0.025791251520888694, "learning_rate": 0.00024752536239532795, "loss": 1.1426, "step": 1246 }, { "epoch": 0.3474989549951233, "grad_norm": 0.026305914239996474, "learning_rate": 0.0002474144328875636, "loss": 1.1244, "step": 1247 }, { "epoch": 0.3477776229622405, "grad_norm": 0.02743165683006253, "learning_rate": 0.0002473034111679083, "loss": 1.2155, "step": 1248 }, { "epoch": 0.3480562909293577, "grad_norm": 0.027348656954161006, "learning_rate": 0.0002471922973414547, "loss": 1.1867, "step": 1249 }, { "epoch": 0.34833495889647487, "grad_norm": 0.024462383426835453, "learning_rate": 0.0002470810915133823, "loss": 1.0898, "step": 1250 }, { "epoch": 0.34861362686359204, "grad_norm": 0.02794548290431838, "learning_rate": 0.00024696979378895784, "loss": 1.1406, "step": 1251 }, { "epoch": 0.3488922948307092, "grad_norm": 0.027659285070464435, "learning_rate": 0.00024685840427353514, "loss": 1.2295, "step": 1252 }, { "epoch": 0.3491709627978264, "grad_norm": 0.028414819815917758, "learning_rate": 0.00024674692307255475, "loss": 1.2729, "step": 1253 }, { "epoch": 0.34944963076494356, "grad_norm": 0.025200870596907645, "learning_rate": 0.0002466353502915441, "loss": 1.1473, "step": 1254 }, { "epoch": 0.34972829873206074, "grad_norm": 0.025983108413851014, "learning_rate": 0.0002465236860361172, "loss": 1.1711, "step": 1255 }, { "epoch": 0.3500069666991779, "grad_norm": 0.027101578505669716, "learning_rate": 0.0002464119304119748, "loss": 1.1743, "step": 1256 }, { "epoch": 0.3502856346662951, "grad_norm": 0.03131612696110953, "learning_rate": 0.00024630008352490414, "loss": 1.1811, "step": 1257 }, { "epoch": 0.3505643026334123, "grad_norm": 0.02515642158757001, "learning_rate": 0.00024618814548077873, "loss": 1.2134, "step": 1258 }, { "epoch": 0.3508429706005295, "grad_norm": 0.025302843401473908, "learning_rate": 0.00024607611638555833, "loss": 1.2148, "step": 1259 }, { "epoch": 0.35112163856764667, "grad_norm": 0.027536337543600027, "learning_rate": 0.0002459639963452892, "loss": 1.1835, "step": 1260 }, { "epoch": 0.35140030653476384, "grad_norm": 0.02789641194778509, "learning_rate": 0.0002458517854661032, "loss": 1.2051, "step": 1261 }, { "epoch": 0.351678974501881, "grad_norm": 0.02761636108596932, "learning_rate": 0.0002457394838542186, "loss": 1.1018, "step": 1262 }, { "epoch": 0.3519576424689982, "grad_norm": 0.02502560346247, "learning_rate": 0.00024562709161593933, "loss": 1.1841, "step": 1263 }, { "epoch": 0.35223631043611536, "grad_norm": 0.026892330566289688, "learning_rate": 0.0002455146088576552, "loss": 1.146, "step": 1264 }, { "epoch": 0.35251497840323254, "grad_norm": 0.027585927570560952, "learning_rate": 0.00024540203568584185, "loss": 1.1552, "step": 1265 }, { "epoch": 0.3527936463703497, "grad_norm": 0.025855921416639664, "learning_rate": 0.00024528937220706003, "loss": 1.0908, "step": 1266 }, { "epoch": 0.3530723143374669, "grad_norm": 0.02667226667878841, "learning_rate": 0.0002451766185279565, "loss": 1.1496, "step": 1267 }, { "epoch": 0.35335098230458406, "grad_norm": 0.026581877578535133, "learning_rate": 0.00024506377475526315, "loss": 1.1759, "step": 1268 }, { "epoch": 0.3536296502717013, "grad_norm": 0.03091343696528204, "learning_rate": 0.0002449508409957971, "loss": 1.2035, "step": 1269 }, { "epoch": 0.35390831823881846, "grad_norm": 0.023946010271258984, "learning_rate": 0.0002448378173564608, "loss": 1.1512, "step": 1270 }, { "epoch": 0.35418698620593564, "grad_norm": 0.02758537088555505, "learning_rate": 0.0002447247039442418, "loss": 1.1448, "step": 1271 }, { "epoch": 0.3544656541730528, "grad_norm": 0.03012086967187611, "learning_rate": 0.0002446115008662124, "loss": 1.1408, "step": 1272 }, { "epoch": 0.35474432214017, "grad_norm": 0.027622367887265208, "learning_rate": 0.00024449820822953, "loss": 1.2024, "step": 1273 }, { "epoch": 0.35502299010728716, "grad_norm": 0.027470647773414028, "learning_rate": 0.00024438482614143667, "loss": 1.1701, "step": 1274 }, { "epoch": 0.35530165807440434, "grad_norm": 0.024311933678894653, "learning_rate": 0.0002442713547092592, "loss": 1.0981, "step": 1275 }, { "epoch": 0.3555803260415215, "grad_norm": 0.025004900202295666, "learning_rate": 0.00024415779404040895, "loss": 1.1311, "step": 1276 }, { "epoch": 0.3558589940086387, "grad_norm": 0.027397524362180535, "learning_rate": 0.00024404414424238175, "loss": 1.1917, "step": 1277 }, { "epoch": 0.35613766197575586, "grad_norm": 0.02974489451242068, "learning_rate": 0.00024393040542275768, "loss": 1.1918, "step": 1278 }, { "epoch": 0.3564163299428731, "grad_norm": 0.029979797852580713, "learning_rate": 0.00024381657768920135, "loss": 1.2198, "step": 1279 }, { "epoch": 0.35669499790999026, "grad_norm": 0.027241618448900343, "learning_rate": 0.00024370266114946123, "loss": 1.1626, "step": 1280 }, { "epoch": 0.35697366587710744, "grad_norm": 0.02568399796756098, "learning_rate": 0.0002435886559113701, "loss": 1.2758, "step": 1281 }, { "epoch": 0.3572523338442246, "grad_norm": 0.027560332638693025, "learning_rate": 0.00024347456208284458, "loss": 1.1938, "step": 1282 }, { "epoch": 0.3575310018113418, "grad_norm": 0.02608089655851119, "learning_rate": 0.00024336037977188516, "loss": 1.1645, "step": 1283 }, { "epoch": 0.35780966977845896, "grad_norm": 0.03214771081594851, "learning_rate": 0.0002432461090865761, "loss": 1.1696, "step": 1284 }, { "epoch": 0.35808833774557614, "grad_norm": 0.025834261255856753, "learning_rate": 0.00024313175013508531, "loss": 1.2165, "step": 1285 }, { "epoch": 0.3583670057126933, "grad_norm": 0.024406309094884438, "learning_rate": 0.00024301730302566426, "loss": 1.2151, "step": 1286 }, { "epoch": 0.3586456736798105, "grad_norm": 0.025277762162028216, "learning_rate": 0.0002429027678666479, "loss": 1.169, "step": 1287 }, { "epoch": 0.35892434164692766, "grad_norm": 0.03177636433678779, "learning_rate": 0.00024278814476645443, "loss": 1.1789, "step": 1288 }, { "epoch": 0.3592030096140449, "grad_norm": 0.027997720663773793, "learning_rate": 0.00024267343383358537, "loss": 1.1891, "step": 1289 }, { "epoch": 0.35948167758116206, "grad_norm": 0.02562483689556478, "learning_rate": 0.00024255863517662544, "loss": 1.1961, "step": 1290 }, { "epoch": 0.35976034554827924, "grad_norm": 0.02582361595694272, "learning_rate": 0.00024244374890424223, "loss": 1.1103, "step": 1291 }, { "epoch": 0.3600390135153964, "grad_norm": 0.02507785235108137, "learning_rate": 0.00024232877512518646, "loss": 1.1643, "step": 1292 }, { "epoch": 0.3603176814825136, "grad_norm": 0.025182245736075904, "learning_rate": 0.00024221371394829148, "loss": 1.1577, "step": 1293 }, { "epoch": 0.36059634944963076, "grad_norm": 0.026932869609571666, "learning_rate": 0.0002420985654824736, "loss": 1.1549, "step": 1294 }, { "epoch": 0.36087501741674793, "grad_norm": 0.02699703484424311, "learning_rate": 0.00024198332983673153, "loss": 1.1953, "step": 1295 }, { "epoch": 0.3611536853838651, "grad_norm": 0.026433493608989226, "learning_rate": 0.00024186800712014666, "loss": 1.1996, "step": 1296 }, { "epoch": 0.3614323533509823, "grad_norm": 0.02751586458722841, "learning_rate": 0.00024175259744188275, "loss": 1.1413, "step": 1297 }, { "epoch": 0.36171102131809946, "grad_norm": 0.02836457443472876, "learning_rate": 0.00024163710091118588, "loss": 1.2198, "step": 1298 }, { "epoch": 0.3619896892852167, "grad_norm": 0.024925209958313155, "learning_rate": 0.00024152151763738426, "loss": 1.1106, "step": 1299 }, { "epoch": 0.36226835725233386, "grad_norm": 0.025183164683313865, "learning_rate": 0.0002414058477298884, "loss": 1.1455, "step": 1300 }, { "epoch": 0.36254702521945104, "grad_norm": 0.026058479808329503, "learning_rate": 0.00024129009129819073, "loss": 1.2091, "step": 1301 }, { "epoch": 0.3628256931865682, "grad_norm": 0.02577636074948259, "learning_rate": 0.00024117424845186545, "loss": 1.2656, "step": 1302 }, { "epoch": 0.3631043611536854, "grad_norm": 0.02469534653864619, "learning_rate": 0.00024105831930056873, "loss": 1.1762, "step": 1303 }, { "epoch": 0.36338302912080256, "grad_norm": 0.025206010922912955, "learning_rate": 0.00024094230395403833, "loss": 1.2031, "step": 1304 }, { "epoch": 0.36366169708791973, "grad_norm": 0.029165309755209298, "learning_rate": 0.00024082620252209371, "loss": 1.1782, "step": 1305 }, { "epoch": 0.3639403650550369, "grad_norm": 0.025989463918134892, "learning_rate": 0.00024071001511463574, "loss": 1.1834, "step": 1306 }, { "epoch": 0.3642190330221541, "grad_norm": 0.02721137592312575, "learning_rate": 0.00024059374184164672, "loss": 1.1294, "step": 1307 }, { "epoch": 0.36449770098927126, "grad_norm": 0.02554844825180305, "learning_rate": 0.00024047738281319015, "loss": 1.1592, "step": 1308 }, { "epoch": 0.3647763689563885, "grad_norm": 0.0281396558237999, "learning_rate": 0.0002403609381394107, "loss": 1.182, "step": 1309 }, { "epoch": 0.36505503692350566, "grad_norm": 0.030102289268151988, "learning_rate": 0.00024024440793053435, "loss": 1.1844, "step": 1310 }, { "epoch": 0.36533370489062283, "grad_norm": 0.026154649165174685, "learning_rate": 0.00024012779229686768, "loss": 1.1162, "step": 1311 }, { "epoch": 0.36561237285774, "grad_norm": 0.024606207294384584, "learning_rate": 0.00024001109134879842, "loss": 1.2021, "step": 1312 }, { "epoch": 0.3658910408248572, "grad_norm": 0.024089964936983038, "learning_rate": 0.00023989430519679494, "loss": 1.1595, "step": 1313 }, { "epoch": 0.36616970879197436, "grad_norm": 0.024127478501408006, "learning_rate": 0.00023977743395140624, "loss": 1.1249, "step": 1314 }, { "epoch": 0.36644837675909153, "grad_norm": 0.026811060037940922, "learning_rate": 0.00023966047772326198, "loss": 1.173, "step": 1315 }, { "epoch": 0.3667270447262087, "grad_norm": 0.027690935224980243, "learning_rate": 0.00023954343662307208, "loss": 1.1932, "step": 1316 }, { "epoch": 0.3670057126933259, "grad_norm": 0.027299589159153145, "learning_rate": 0.00023942631076162704, "loss": 1.158, "step": 1317 }, { "epoch": 0.36728438066044305, "grad_norm": 0.02741392576352588, "learning_rate": 0.00023930910024979734, "loss": 1.2855, "step": 1318 }, { "epoch": 0.3675630486275603, "grad_norm": 0.02475252614427713, "learning_rate": 0.0002391918051985338, "loss": 1.1712, "step": 1319 }, { "epoch": 0.36784171659467746, "grad_norm": 0.025383418086920646, "learning_rate": 0.0002390744257188672, "loss": 1.1637, "step": 1320 }, { "epoch": 0.36812038456179463, "grad_norm": 0.0296934071461273, "learning_rate": 0.00023895696192190811, "loss": 1.1741, "step": 1321 }, { "epoch": 0.3683990525289118, "grad_norm": 0.025311868312886187, "learning_rate": 0.0002388394139188471, "loss": 1.2301, "step": 1322 }, { "epoch": 0.368677720496029, "grad_norm": 0.025633847908541323, "learning_rate": 0.0002387217818209544, "loss": 1.1094, "step": 1323 }, { "epoch": 0.36895638846314616, "grad_norm": 0.02607106413015197, "learning_rate": 0.00023860406573957975, "loss": 1.157, "step": 1324 }, { "epoch": 0.36923505643026333, "grad_norm": 0.02692300385746753, "learning_rate": 0.0002384862657861525, "loss": 1.1534, "step": 1325 }, { "epoch": 0.3695137243973805, "grad_norm": 0.02651063715965926, "learning_rate": 0.0002383683820721813, "loss": 1.1596, "step": 1326 }, { "epoch": 0.3697923923644977, "grad_norm": 0.025927549167871138, "learning_rate": 0.00023825041470925412, "loss": 1.0929, "step": 1327 }, { "epoch": 0.37007106033161485, "grad_norm": 0.025698431565413665, "learning_rate": 0.00023813236380903824, "loss": 1.1506, "step": 1328 }, { "epoch": 0.3703497282987321, "grad_norm": 0.027361612397104836, "learning_rate": 0.0002380142294832798, "loss": 1.135, "step": 1329 }, { "epoch": 0.37062839626584926, "grad_norm": 0.02747756505673088, "learning_rate": 0.00023789601184380404, "loss": 1.1828, "step": 1330 }, { "epoch": 0.37090706423296643, "grad_norm": 0.024816208064978224, "learning_rate": 0.00023777771100251503, "loss": 1.1926, "step": 1331 }, { "epoch": 0.3711857322000836, "grad_norm": 0.02774245077198858, "learning_rate": 0.0002376593270713956, "loss": 1.228, "step": 1332 }, { "epoch": 0.3714644001672008, "grad_norm": 0.02698263257946628, "learning_rate": 0.00023754086016250723, "loss": 1.1785, "step": 1333 }, { "epoch": 0.37174306813431796, "grad_norm": 0.02808736941694947, "learning_rate": 0.00023742231038799, "loss": 1.1859, "step": 1334 }, { "epoch": 0.37202173610143513, "grad_norm": 0.026160852011237006, "learning_rate": 0.00023730367786006237, "loss": 1.1784, "step": 1335 }, { "epoch": 0.3723004040685523, "grad_norm": 0.029600047537774857, "learning_rate": 0.00023718496269102107, "loss": 1.1508, "step": 1336 }, { "epoch": 0.3725790720356695, "grad_norm": 0.027266936721573518, "learning_rate": 0.00023706616499324123, "loss": 1.1602, "step": 1337 }, { "epoch": 0.37285774000278665, "grad_norm": 0.027566101451457915, "learning_rate": 0.00023694728487917596, "loss": 1.1987, "step": 1338 }, { "epoch": 0.3731364079699039, "grad_norm": 0.02598566445433983, "learning_rate": 0.0002368283224613564, "loss": 1.1981, "step": 1339 }, { "epoch": 0.37341507593702106, "grad_norm": 0.029418051926482714, "learning_rate": 0.00023670927785239172, "loss": 1.1476, "step": 1340 }, { "epoch": 0.37369374390413823, "grad_norm": 0.031177196442980355, "learning_rate": 0.0002365901511649687, "loss": 1.1772, "step": 1341 }, { "epoch": 0.3739724118712554, "grad_norm": 0.026983992307147802, "learning_rate": 0.00023647094251185195, "loss": 1.1247, "step": 1342 }, { "epoch": 0.3742510798383726, "grad_norm": 0.02908936335819354, "learning_rate": 0.00023635165200588368, "loss": 1.1331, "step": 1343 }, { "epoch": 0.37452974780548975, "grad_norm": 0.029312591019817463, "learning_rate": 0.00023623227975998347, "loss": 1.1902, "step": 1344 }, { "epoch": 0.37480841577260693, "grad_norm": 0.03000677760081856, "learning_rate": 0.00023611282588714838, "loss": 1.2106, "step": 1345 }, { "epoch": 0.3750870837397241, "grad_norm": 0.027174615904211784, "learning_rate": 0.00023599329050045267, "loss": 1.1627, "step": 1346 }, { "epoch": 0.3753657517068413, "grad_norm": 0.025842534429749087, "learning_rate": 0.00023587367371304783, "loss": 1.1605, "step": 1347 }, { "epoch": 0.37564441967395845, "grad_norm": 0.027356924830884918, "learning_rate": 0.00023575397563816228, "loss": 1.2012, "step": 1348 }, { "epoch": 0.3759230876410757, "grad_norm": 0.02528068996245779, "learning_rate": 0.00023563419638910154, "loss": 1.2073, "step": 1349 }, { "epoch": 0.37620175560819286, "grad_norm": 0.029088621964515907, "learning_rate": 0.00023551433607924788, "loss": 1.2229, "step": 1350 }, { "epoch": 0.37648042357531003, "grad_norm": 0.02812987393254225, "learning_rate": 0.00023539439482206026, "loss": 1.1419, "step": 1351 }, { "epoch": 0.3767590915424272, "grad_norm": 0.0249869680487412, "learning_rate": 0.0002352743727310744, "loss": 1.1976, "step": 1352 }, { "epoch": 0.3770377595095444, "grad_norm": 0.027131422606277975, "learning_rate": 0.00023515426991990246, "loss": 1.1422, "step": 1353 }, { "epoch": 0.37731642747666155, "grad_norm": 0.02856579477479852, "learning_rate": 0.00023503408650223293, "loss": 1.2505, "step": 1354 }, { "epoch": 0.3775950954437787, "grad_norm": 0.025133937917527407, "learning_rate": 0.00023491382259183073, "loss": 1.1424, "step": 1355 }, { "epoch": 0.3778737634108959, "grad_norm": 0.027242165525072328, "learning_rate": 0.00023479347830253694, "loss": 1.1161, "step": 1356 }, { "epoch": 0.3781524313780131, "grad_norm": 0.02677318843194074, "learning_rate": 0.00023467305374826863, "loss": 1.1427, "step": 1357 }, { "epoch": 0.37843109934513025, "grad_norm": 0.026744596791061476, "learning_rate": 0.00023455254904301904, "loss": 1.1541, "step": 1358 }, { "epoch": 0.3787097673122475, "grad_norm": 0.0280840558018753, "learning_rate": 0.0002344319643008571, "loss": 1.1778, "step": 1359 }, { "epoch": 0.37898843527936465, "grad_norm": 0.02464101798189147, "learning_rate": 0.00023431129963592757, "loss": 1.1988, "step": 1360 }, { "epoch": 0.37926710324648183, "grad_norm": 0.02815132803150877, "learning_rate": 0.00023419055516245085, "loss": 1.1719, "step": 1361 }, { "epoch": 0.379545771213599, "grad_norm": 0.025021418790123262, "learning_rate": 0.000234069730994723, "loss": 1.1643, "step": 1362 }, { "epoch": 0.3798244391807162, "grad_norm": 0.02785227371944514, "learning_rate": 0.00023394882724711525, "loss": 1.1377, "step": 1363 }, { "epoch": 0.38010310714783335, "grad_norm": 0.026670361982889454, "learning_rate": 0.0002338278440340745, "loss": 1.2519, "step": 1364 }, { "epoch": 0.3803817751149505, "grad_norm": 0.027278269667909295, "learning_rate": 0.0002337067814701226, "loss": 1.1406, "step": 1365 }, { "epoch": 0.3806604430820677, "grad_norm": 0.02538471067650925, "learning_rate": 0.00023358563966985663, "loss": 1.1948, "step": 1366 }, { "epoch": 0.3809391110491849, "grad_norm": 0.027879372443695476, "learning_rate": 0.00023346441874794866, "loss": 1.2009, "step": 1367 }, { "epoch": 0.38121777901630205, "grad_norm": 0.02694013945571788, "learning_rate": 0.00023334311881914566, "loss": 1.2176, "step": 1368 }, { "epoch": 0.3814964469834193, "grad_norm": 0.02581749203584238, "learning_rate": 0.00023322173999826938, "loss": 1.2417, "step": 1369 }, { "epoch": 0.38177511495053645, "grad_norm": 0.025021537923017128, "learning_rate": 0.00023310028240021628, "loss": 1.2354, "step": 1370 }, { "epoch": 0.38205378291765363, "grad_norm": 0.026398578213839292, "learning_rate": 0.0002329787461399573, "loss": 1.1501, "step": 1371 }, { "epoch": 0.3823324508847708, "grad_norm": 0.029861304003231837, "learning_rate": 0.00023285713133253794, "loss": 1.1624, "step": 1372 }, { "epoch": 0.382611118851888, "grad_norm": 0.026789306092017937, "learning_rate": 0.00023273543809307804, "loss": 1.1962, "step": 1373 }, { "epoch": 0.38288978681900515, "grad_norm": 0.025632382448008438, "learning_rate": 0.0002326136665367716, "loss": 1.1719, "step": 1374 }, { "epoch": 0.3831684547861223, "grad_norm": 0.02497096382924006, "learning_rate": 0.00023249181677888677, "loss": 1.1545, "step": 1375 }, { "epoch": 0.3834471227532395, "grad_norm": 0.02678238045565379, "learning_rate": 0.00023236988893476592, "loss": 1.243, "step": 1376 }, { "epoch": 0.3837257907203567, "grad_norm": 0.02407153760058344, "learning_rate": 0.00023224788311982506, "loss": 1.1568, "step": 1377 }, { "epoch": 0.38400445868747385, "grad_norm": 0.024390798480229584, "learning_rate": 0.00023212579944955415, "loss": 1.1874, "step": 1378 }, { "epoch": 0.3842831266545911, "grad_norm": 0.026125463770480365, "learning_rate": 0.00023200363803951686, "loss": 1.1199, "step": 1379 }, { "epoch": 0.38456179462170825, "grad_norm": 0.02783118691539508, "learning_rate": 0.00023188139900535038, "loss": 1.2363, "step": 1380 }, { "epoch": 0.3848404625888254, "grad_norm": 0.028911460664556802, "learning_rate": 0.00023175908246276543, "loss": 1.1682, "step": 1381 }, { "epoch": 0.3851191305559426, "grad_norm": 0.08818255779294254, "learning_rate": 0.00023163668852754606, "loss": 1.1772, "step": 1382 }, { "epoch": 0.3853977985230598, "grad_norm": 0.025753531474619826, "learning_rate": 0.00023151421731554964, "loss": 1.1332, "step": 1383 }, { "epoch": 0.38567646649017695, "grad_norm": 0.027013275586429217, "learning_rate": 0.0002313916689427066, "loss": 1.1564, "step": 1384 }, { "epoch": 0.3859551344572941, "grad_norm": 0.02627857902706677, "learning_rate": 0.00023126904352502046, "loss": 1.1398, "step": 1385 }, { "epoch": 0.3862338024244113, "grad_norm": 0.026467767717596805, "learning_rate": 0.00023114634117856776, "loss": 1.2259, "step": 1386 }, { "epoch": 0.3865124703915285, "grad_norm": 0.025414961802994903, "learning_rate": 0.00023102356201949774, "loss": 1.1892, "step": 1387 }, { "epoch": 0.38679113835864565, "grad_norm": 0.026600115848812188, "learning_rate": 0.00023090070616403225, "loss": 1.1595, "step": 1388 }, { "epoch": 0.3870698063257629, "grad_norm": 0.02451979597624931, "learning_rate": 0.00023077777372846605, "loss": 1.1283, "step": 1389 }, { "epoch": 0.38734847429288005, "grad_norm": 0.02420460646737474, "learning_rate": 0.00023065476482916612, "loss": 1.1434, "step": 1390 }, { "epoch": 0.3876271422599972, "grad_norm": 0.0251424080297605, "learning_rate": 0.00023053167958257193, "loss": 1.2141, "step": 1391 }, { "epoch": 0.3879058102271144, "grad_norm": 0.02436522399766943, "learning_rate": 0.00023040851810519517, "loss": 1.197, "step": 1392 }, { "epoch": 0.3881844781942316, "grad_norm": 0.026944389810562023, "learning_rate": 0.00023028528051361976, "loss": 1.1229, "step": 1393 }, { "epoch": 0.38846314616134875, "grad_norm": 0.02710890209519847, "learning_rate": 0.00023016196692450164, "loss": 1.1694, "step": 1394 }, { "epoch": 0.3887418141284659, "grad_norm": 0.028710237104119084, "learning_rate": 0.00023003857745456868, "loss": 1.1983, "step": 1395 }, { "epoch": 0.3890204820955831, "grad_norm": 0.027073438635016274, "learning_rate": 0.00022991511222062054, "loss": 1.11, "step": 1396 }, { "epoch": 0.38929915006270027, "grad_norm": 0.023433079288590557, "learning_rate": 0.0002297915713395286, "loss": 1.2168, "step": 1397 }, { "epoch": 0.38957781802981745, "grad_norm": 0.02623357677417451, "learning_rate": 0.00022966795492823604, "loss": 1.134, "step": 1398 }, { "epoch": 0.3898564859969347, "grad_norm": 0.025568075187115093, "learning_rate": 0.00022954426310375722, "loss": 1.1801, "step": 1399 }, { "epoch": 0.39013515396405185, "grad_norm": 0.026254629383334213, "learning_rate": 0.0002294204959831781, "loss": 1.1655, "step": 1400 }, { "epoch": 0.390413821931169, "grad_norm": 0.024705770076245408, "learning_rate": 0.00022929665368365588, "loss": 1.2243, "step": 1401 }, { "epoch": 0.3906924898982862, "grad_norm": 0.023590725561671737, "learning_rate": 0.0002291727363224189, "loss": 1.1467, "step": 1402 }, { "epoch": 0.3909711578654034, "grad_norm": 0.027607689306426705, "learning_rate": 0.00022904874401676647, "loss": 1.1728, "step": 1403 }, { "epoch": 0.39124982583252055, "grad_norm": 0.02721749875909265, "learning_rate": 0.0002289246768840691, "loss": 1.2604, "step": 1404 }, { "epoch": 0.3915284937996377, "grad_norm": 0.025354176263978803, "learning_rate": 0.00022880053504176779, "loss": 1.176, "step": 1405 }, { "epoch": 0.3918071617667549, "grad_norm": 0.02573936277579562, "learning_rate": 0.0002286763186073745, "loss": 1.1651, "step": 1406 }, { "epoch": 0.39208582973387207, "grad_norm": 0.02385295070464909, "learning_rate": 0.00022855202769847177, "loss": 1.1469, "step": 1407 }, { "epoch": 0.39236449770098925, "grad_norm": 0.025779431532366753, "learning_rate": 0.00022842766243271254, "loss": 1.1802, "step": 1408 }, { "epoch": 0.3926431656681065, "grad_norm": 0.02409256902632266, "learning_rate": 0.00022830322292782018, "loss": 1.1819, "step": 1409 }, { "epoch": 0.39292183363522365, "grad_norm": 0.02521191025587221, "learning_rate": 0.00022817870930158835, "loss": 1.2199, "step": 1410 }, { "epoch": 0.3932005016023408, "grad_norm": 0.0279183669562282, "learning_rate": 0.0002280541216718809, "loss": 1.1858, "step": 1411 }, { "epoch": 0.393479169569458, "grad_norm": 0.025947546654096966, "learning_rate": 0.00022792946015663172, "loss": 1.1536, "step": 1412 }, { "epoch": 0.3937578375365752, "grad_norm": 0.027214167274323388, "learning_rate": 0.0002278047248738445, "loss": 1.1391, "step": 1413 }, { "epoch": 0.39403650550369235, "grad_norm": 0.02443633890762942, "learning_rate": 0.00022767991594159302, "loss": 1.1534, "step": 1414 }, { "epoch": 0.3943151734708095, "grad_norm": 0.02737960700046622, "learning_rate": 0.00022755503347802057, "loss": 1.1634, "step": 1415 }, { "epoch": 0.3945938414379267, "grad_norm": 0.026726331609955054, "learning_rate": 0.00022743007760134008, "loss": 1.1548, "step": 1416 }, { "epoch": 0.39487250940504387, "grad_norm": 0.02573208570220635, "learning_rate": 0.000227305048429834, "loss": 1.2301, "step": 1417 }, { "epoch": 0.39515117737216104, "grad_norm": 0.026222495448760756, "learning_rate": 0.0002271799460818542, "loss": 1.1075, "step": 1418 }, { "epoch": 0.3954298453392783, "grad_norm": 0.02707162517777988, "learning_rate": 0.00022705477067582177, "loss": 1.1625, "step": 1419 }, { "epoch": 0.39570851330639545, "grad_norm": 0.02598671356787567, "learning_rate": 0.00022692952233022696, "loss": 1.2129, "step": 1420 }, { "epoch": 0.3959871812735126, "grad_norm": 0.026081504218733745, "learning_rate": 0.00022680420116362904, "loss": 1.2241, "step": 1421 }, { "epoch": 0.3962658492406298, "grad_norm": 0.025422448295080474, "learning_rate": 0.00022667880729465624, "loss": 1.1947, "step": 1422 }, { "epoch": 0.39654451720774697, "grad_norm": 0.024251246523210822, "learning_rate": 0.0002265533408420057, "loss": 1.1767, "step": 1423 }, { "epoch": 0.39682318517486415, "grad_norm": 0.024493724943410535, "learning_rate": 0.00022642780192444305, "loss": 1.0968, "step": 1424 }, { "epoch": 0.3971018531419813, "grad_norm": 0.024412731012216655, "learning_rate": 0.00022630219066080274, "loss": 1.1636, "step": 1425 }, { "epoch": 0.3973805211090985, "grad_norm": 0.027202120635981705, "learning_rate": 0.0002261765071699875, "loss": 1.1785, "step": 1426 }, { "epoch": 0.39765918907621567, "grad_norm": 0.02511523695199156, "learning_rate": 0.00022605075157096862, "loss": 1.1754, "step": 1427 }, { "epoch": 0.39793785704333284, "grad_norm": 0.02321005402717016, "learning_rate": 0.0002259249239827856, "loss": 1.143, "step": 1428 }, { "epoch": 0.3982165250104501, "grad_norm": 0.024753235007338836, "learning_rate": 0.0002257990245245459, "loss": 1.1673, "step": 1429 }, { "epoch": 0.39849519297756725, "grad_norm": 0.030944292201288782, "learning_rate": 0.00022567305331542525, "loss": 1.1698, "step": 1430 }, { "epoch": 0.3987738609446844, "grad_norm": 0.02468828146458426, "learning_rate": 0.00022554701047466724, "loss": 1.213, "step": 1431 }, { "epoch": 0.3990525289118016, "grad_norm": 0.02621019480891229, "learning_rate": 0.00022542089612158318, "loss": 1.1221, "step": 1432 }, { "epoch": 0.39933119687891877, "grad_norm": 0.027587461839104155, "learning_rate": 0.0002252947103755521, "loss": 1.1652, "step": 1433 }, { "epoch": 0.39960986484603594, "grad_norm": 0.026354905575872604, "learning_rate": 0.00022516845335602068, "loss": 1.1703, "step": 1434 }, { "epoch": 0.3998885328131531, "grad_norm": 0.02817083104981065, "learning_rate": 0.000225042125182503, "loss": 1.1983, "step": 1435 }, { "epoch": 0.4001672007802703, "grad_norm": 0.0259402320390469, "learning_rate": 0.00022491572597458054, "loss": 1.1708, "step": 1436 }, { "epoch": 0.40044586874738747, "grad_norm": 0.02783238929468636, "learning_rate": 0.000224789255851902, "loss": 1.1464, "step": 1437 }, { "epoch": 0.40072453671450464, "grad_norm": 0.030158039173344846, "learning_rate": 0.00022466271493418318, "loss": 1.2077, "step": 1438 }, { "epoch": 0.40100320468162187, "grad_norm": 0.029373179592388724, "learning_rate": 0.00022453610334120694, "loss": 1.1122, "step": 1439 }, { "epoch": 0.40128187264873905, "grad_norm": 0.02565851001603647, "learning_rate": 0.00022440942119282298, "loss": 1.2312, "step": 1440 }, { "epoch": 0.4015605406158562, "grad_norm": 0.027489866950531038, "learning_rate": 0.00022428266860894788, "loss": 1.1494, "step": 1441 }, { "epoch": 0.4018392085829734, "grad_norm": 0.02644260261165752, "learning_rate": 0.00022415584570956474, "loss": 1.1694, "step": 1442 }, { "epoch": 0.40211787655009057, "grad_norm": 0.024923958712136974, "learning_rate": 0.0002240289526147235, "loss": 1.1574, "step": 1443 }, { "epoch": 0.40239654451720774, "grad_norm": 0.024625499062450557, "learning_rate": 0.0002239019894445402, "loss": 1.1384, "step": 1444 }, { "epoch": 0.4026752124843249, "grad_norm": 0.02726437837871602, "learning_rate": 0.00022377495631919743, "loss": 1.174, "step": 1445 }, { "epoch": 0.4029538804514421, "grad_norm": 0.026119177711174456, "learning_rate": 0.00022364785335894402, "loss": 1.1514, "step": 1446 }, { "epoch": 0.40323254841855927, "grad_norm": 0.028405995418160596, "learning_rate": 0.00022352068068409473, "loss": 1.1479, "step": 1447 }, { "epoch": 0.40351121638567644, "grad_norm": 0.026332266454557024, "learning_rate": 0.0002233934384150305, "loss": 1.2185, "step": 1448 }, { "epoch": 0.40378988435279367, "grad_norm": 0.025271596831551415, "learning_rate": 0.00022326612667219797, "loss": 1.2136, "step": 1449 }, { "epoch": 0.40406855231991085, "grad_norm": 0.028186720317634074, "learning_rate": 0.00022313874557610973, "loss": 1.2287, "step": 1450 }, { "epoch": 0.404347220287028, "grad_norm": 0.027428219737632077, "learning_rate": 0.0002230112952473439, "loss": 1.1298, "step": 1451 }, { "epoch": 0.4046258882541452, "grad_norm": 0.0285221367892951, "learning_rate": 0.00022288377580654412, "loss": 1.2232, "step": 1452 }, { "epoch": 0.40490455622126237, "grad_norm": 0.027343626962678766, "learning_rate": 0.0002227561873744195, "loss": 1.2682, "step": 1453 }, { "epoch": 0.40518322418837954, "grad_norm": 0.026578162891410947, "learning_rate": 0.00022262853007174452, "loss": 1.1314, "step": 1454 }, { "epoch": 0.4054618921554967, "grad_norm": 0.026155791841490292, "learning_rate": 0.0002225008040193587, "loss": 1.1192, "step": 1455 }, { "epoch": 0.4057405601226139, "grad_norm": 0.027015698513126563, "learning_rate": 0.0002223730093381668, "loss": 1.1084, "step": 1456 }, { "epoch": 0.40601922808973107, "grad_norm": 0.026768620023543632, "learning_rate": 0.00022224514614913835, "loss": 1.1281, "step": 1457 }, { "epoch": 0.40629789605684824, "grad_norm": 0.02952526278712422, "learning_rate": 0.00022211721457330793, "loss": 1.134, "step": 1458 }, { "epoch": 0.40657656402396547, "grad_norm": 0.02978365552382484, "learning_rate": 0.00022198921473177473, "loss": 1.2419, "step": 1459 }, { "epoch": 0.40685523199108264, "grad_norm": 0.02550098355111874, "learning_rate": 0.00022186114674570265, "loss": 1.1316, "step": 1460 }, { "epoch": 0.4071338999581998, "grad_norm": 0.027461971947188697, "learning_rate": 0.00022173301073632004, "loss": 1.1873, "step": 1461 }, { "epoch": 0.407412567925317, "grad_norm": 0.027524888944190872, "learning_rate": 0.0002216048068249195, "loss": 1.1311, "step": 1462 }, { "epoch": 0.40769123589243417, "grad_norm": 0.024719202663679656, "learning_rate": 0.00022147653513285824, "loss": 1.1924, "step": 1463 }, { "epoch": 0.40796990385955134, "grad_norm": 0.026432681320928814, "learning_rate": 0.0002213481957815573, "loss": 1.1054, "step": 1464 }, { "epoch": 0.4082485718266685, "grad_norm": 0.02726060437065479, "learning_rate": 0.000221219788892502, "loss": 1.146, "step": 1465 }, { "epoch": 0.4085272397937857, "grad_norm": 0.03024411090087629, "learning_rate": 0.00022109131458724143, "loss": 1.1904, "step": 1466 }, { "epoch": 0.40880590776090286, "grad_norm": 0.028947405393759448, "learning_rate": 0.0002209627729873886, "loss": 1.1967, "step": 1467 }, { "epoch": 0.40908457572802004, "grad_norm": 0.02335248866436576, "learning_rate": 0.00022083416421462017, "loss": 1.1607, "step": 1468 }, { "epoch": 0.40936324369513727, "grad_norm": 0.025872474957568562, "learning_rate": 0.0002207054883906764, "loss": 1.2105, "step": 1469 }, { "epoch": 0.40964191166225444, "grad_norm": 0.02521317862827763, "learning_rate": 0.00022057674563736096, "loss": 1.1718, "step": 1470 }, { "epoch": 0.4099205796293716, "grad_norm": 0.024829911517630057, "learning_rate": 0.000220447936076541, "loss": 1.1789, "step": 1471 }, { "epoch": 0.4101992475964888, "grad_norm": 0.026021988792070516, "learning_rate": 0.00022031905983014685, "loss": 1.1624, "step": 1472 }, { "epoch": 0.41047791556360597, "grad_norm": 0.025328802538838794, "learning_rate": 0.00022019011702017192, "loss": 1.1942, "step": 1473 }, { "epoch": 0.41075658353072314, "grad_norm": 0.027841072526629485, "learning_rate": 0.0002200611077686727, "loss": 1.184, "step": 1474 }, { "epoch": 0.4110352514978403, "grad_norm": 0.024488883061644103, "learning_rate": 0.00021993203219776848, "loss": 1.147, "step": 1475 }, { "epoch": 0.4113139194649575, "grad_norm": 0.024721365357318986, "learning_rate": 0.00021980289042964143, "loss": 1.1557, "step": 1476 }, { "epoch": 0.41159258743207466, "grad_norm": 0.02493936658491961, "learning_rate": 0.00021967368258653635, "loss": 1.1847, "step": 1477 }, { "epoch": 0.41187125539919184, "grad_norm": 0.024977367165719834, "learning_rate": 0.00021954440879076053, "loss": 1.1078, "step": 1478 }, { "epoch": 0.41214992336630907, "grad_norm": 0.0286447580727617, "learning_rate": 0.0002194150691646837, "loss": 1.1377, "step": 1479 }, { "epoch": 0.41242859133342624, "grad_norm": 0.024556837663169337, "learning_rate": 0.0002192856638307381, "loss": 1.1438, "step": 1480 }, { "epoch": 0.4127072593005434, "grad_norm": 0.026844841365269694, "learning_rate": 0.00021915619291141785, "loss": 1.1915, "step": 1481 }, { "epoch": 0.4129859272676606, "grad_norm": 0.026880030493037776, "learning_rate": 0.00021902665652927933, "loss": 1.211, "step": 1482 }, { "epoch": 0.41326459523477777, "grad_norm": 0.026517975687018712, "learning_rate": 0.00021889705480694093, "loss": 1.177, "step": 1483 }, { "epoch": 0.41354326320189494, "grad_norm": 0.027089338022601172, "learning_rate": 0.00021876738786708277, "loss": 1.1173, "step": 1484 }, { "epoch": 0.4138219311690121, "grad_norm": 0.02689244922456818, "learning_rate": 0.00021863765583244677, "loss": 1.1742, "step": 1485 }, { "epoch": 0.4141005991361293, "grad_norm": 0.028624235868822605, "learning_rate": 0.0002185078588258365, "loss": 1.1402, "step": 1486 }, { "epoch": 0.41437926710324646, "grad_norm": 0.025584849620533166, "learning_rate": 0.000218377996970117, "loss": 1.273, "step": 1487 }, { "epoch": 0.41465793507036364, "grad_norm": 0.023754905722009338, "learning_rate": 0.0002182480703882146, "loss": 1.2391, "step": 1488 }, { "epoch": 0.41493660303748087, "grad_norm": 0.025853387808288907, "learning_rate": 0.00021811807920311705, "loss": 1.2595, "step": 1489 }, { "epoch": 0.41521527100459804, "grad_norm": 0.024137911529916168, "learning_rate": 0.00021798802353787322, "loss": 1.2132, "step": 1490 }, { "epoch": 0.4154939389717152, "grad_norm": 0.02692165599919016, "learning_rate": 0.0002178579035155929, "loss": 1.1559, "step": 1491 }, { "epoch": 0.4157726069388324, "grad_norm": 0.02572837766362873, "learning_rate": 0.00021772771925944698, "loss": 1.2274, "step": 1492 }, { "epoch": 0.41605127490594956, "grad_norm": 0.02559287313790339, "learning_rate": 0.00021759747089266697, "loss": 1.1622, "step": 1493 }, { "epoch": 0.41632994287306674, "grad_norm": 0.026430688815555013, "learning_rate": 0.00021746715853854522, "loss": 1.1796, "step": 1494 }, { "epoch": 0.4166086108401839, "grad_norm": 0.02648812179954875, "learning_rate": 0.00021733678232043456, "loss": 1.1445, "step": 1495 }, { "epoch": 0.4168872788073011, "grad_norm": 0.025008772563271553, "learning_rate": 0.00021720634236174833, "loss": 1.1854, "step": 1496 }, { "epoch": 0.41716594677441826, "grad_norm": 0.024644509020272615, "learning_rate": 0.00021707583878596015, "loss": 1.1933, "step": 1497 }, { "epoch": 0.41744461474153544, "grad_norm": 0.024342379917617025, "learning_rate": 0.00021694527171660388, "loss": 1.1311, "step": 1498 }, { "epoch": 0.41772328270865267, "grad_norm": 0.024158540848807745, "learning_rate": 0.00021681464127727349, "loss": 1.2068, "step": 1499 }, { "epoch": 0.41800195067576984, "grad_norm": 0.023177826306738116, "learning_rate": 0.00021668394759162297, "loss": 1.1187, "step": 1500 }, { "epoch": 0.418280618642887, "grad_norm": 0.025724802920954404, "learning_rate": 0.00021655319078336607, "loss": 1.1895, "step": 1501 }, { "epoch": 0.4185592866100042, "grad_norm": 0.024955704589719938, "learning_rate": 0.00021642237097627644, "loss": 1.2003, "step": 1502 }, { "epoch": 0.41883795457712136, "grad_norm": 0.023476870828693413, "learning_rate": 0.00021629148829418722, "loss": 1.2307, "step": 1503 }, { "epoch": 0.41911662254423854, "grad_norm": 0.025948535757235797, "learning_rate": 0.00021616054286099126, "loss": 1.2498, "step": 1504 }, { "epoch": 0.4193952905113557, "grad_norm": 0.024013697343135538, "learning_rate": 0.0002160295348006405, "loss": 1.1337, "step": 1505 }, { "epoch": 0.4196739584784729, "grad_norm": 0.025157166158807445, "learning_rate": 0.00021589846423714649, "loss": 1.1633, "step": 1506 }, { "epoch": 0.41995262644559006, "grad_norm": 0.026103954988014235, "learning_rate": 0.00021576733129457975, "loss": 1.2328, "step": 1507 }, { "epoch": 0.42023129441270723, "grad_norm": 0.02389124813219406, "learning_rate": 0.00021563613609706993, "loss": 1.23, "step": 1508 }, { "epoch": 0.42050996237982446, "grad_norm": 0.02314427168053699, "learning_rate": 0.00021550487876880557, "loss": 1.1283, "step": 1509 }, { "epoch": 0.42078863034694164, "grad_norm": 0.026247450888418344, "learning_rate": 0.000215373559434034, "loss": 1.2266, "step": 1510 }, { "epoch": 0.4210672983140588, "grad_norm": 0.02743077099802653, "learning_rate": 0.00021524217821706132, "loss": 1.1924, "step": 1511 }, { "epoch": 0.421345966281176, "grad_norm": 0.023172562122626725, "learning_rate": 0.00021511073524225208, "loss": 1.1979, "step": 1512 }, { "epoch": 0.42162463424829316, "grad_norm": 0.025871933581717597, "learning_rate": 0.00021497923063402955, "loss": 1.187, "step": 1513 }, { "epoch": 0.42190330221541034, "grad_norm": 0.026316725905149444, "learning_rate": 0.000214847664516875, "loss": 1.1418, "step": 1514 }, { "epoch": 0.4221819701825275, "grad_norm": 0.02560074237703529, "learning_rate": 0.00021471603701532816, "loss": 1.1438, "step": 1515 }, { "epoch": 0.4224606381496447, "grad_norm": 0.026013012482143345, "learning_rate": 0.00021458434825398677, "loss": 1.189, "step": 1516 }, { "epoch": 0.42273930611676186, "grad_norm": 0.023724784055177934, "learning_rate": 0.00021445259835750662, "loss": 1.077, "step": 1517 }, { "epoch": 0.42301797408387903, "grad_norm": 0.026456317741172672, "learning_rate": 0.00021432078745060136, "loss": 1.2273, "step": 1518 }, { "epoch": 0.42329664205099626, "grad_norm": 0.02639694768711205, "learning_rate": 0.00021418891565804226, "loss": 1.1096, "step": 1519 }, { "epoch": 0.42357531001811344, "grad_norm": 0.023861460229380606, "learning_rate": 0.00021405698310465843, "loss": 1.1521, "step": 1520 }, { "epoch": 0.4238539779852306, "grad_norm": 0.026621256257799965, "learning_rate": 0.00021392498991533635, "loss": 1.1108, "step": 1521 }, { "epoch": 0.4241326459523478, "grad_norm": 0.025226956305437767, "learning_rate": 0.00021379293621501993, "loss": 1.1844, "step": 1522 }, { "epoch": 0.42441131391946496, "grad_norm": 0.025922547616678244, "learning_rate": 0.00021366082212871047, "loss": 1.2084, "step": 1523 }, { "epoch": 0.42468998188658214, "grad_norm": 0.02563644656051483, "learning_rate": 0.00021352864778146624, "loss": 1.1934, "step": 1524 }, { "epoch": 0.4249686498536993, "grad_norm": 0.02785798996099577, "learning_rate": 0.00021339641329840264, "loss": 1.1361, "step": 1525 }, { "epoch": 0.4252473178208165, "grad_norm": 0.02540434136495641, "learning_rate": 0.0002132641188046921, "loss": 1.2281, "step": 1526 }, { "epoch": 0.42552598578793366, "grad_norm": 0.02512418114259579, "learning_rate": 0.00021313176442556365, "loss": 1.1704, "step": 1527 }, { "epoch": 0.42580465375505083, "grad_norm": 0.027149177615493892, "learning_rate": 0.00021299935028630315, "loss": 1.1627, "step": 1528 }, { "epoch": 0.42608332172216806, "grad_norm": 0.02443636828597061, "learning_rate": 0.00021286687651225303, "loss": 1.1976, "step": 1529 }, { "epoch": 0.42636198968928524, "grad_norm": 0.02508000855268363, "learning_rate": 0.0002127343432288121, "loss": 1.1648, "step": 1530 }, { "epoch": 0.4266406576564024, "grad_norm": 0.028191888728086454, "learning_rate": 0.00021260175056143552, "loss": 1.1647, "step": 1531 }, { "epoch": 0.4269193256235196, "grad_norm": 0.02616077954427935, "learning_rate": 0.00021246909863563475, "loss": 1.1967, "step": 1532 }, { "epoch": 0.42719799359063676, "grad_norm": 0.02627620484934783, "learning_rate": 0.0002123363875769772, "loss": 1.2001, "step": 1533 }, { "epoch": 0.42747666155775393, "grad_norm": 0.023752807918112965, "learning_rate": 0.00021220361751108627, "loss": 1.1543, "step": 1534 }, { "epoch": 0.4277553295248711, "grad_norm": 0.027619836716061677, "learning_rate": 0.00021207078856364144, "loss": 1.1964, "step": 1535 }, { "epoch": 0.4280339974919883, "grad_norm": 0.025124291118004162, "learning_rate": 0.00021193790086037763, "loss": 1.1845, "step": 1536 }, { "epoch": 0.42831266545910546, "grad_norm": 0.02730414618139301, "learning_rate": 0.0002118049545270855, "loss": 1.1303, "step": 1537 }, { "epoch": 0.42859133342622263, "grad_norm": 0.023300090895176696, "learning_rate": 0.00021167194968961132, "loss": 1.1388, "step": 1538 }, { "epoch": 0.42887000139333986, "grad_norm": 0.02496628894397982, "learning_rate": 0.0002115388864738565, "loss": 1.2003, "step": 1539 }, { "epoch": 0.42914866936045704, "grad_norm": 0.025214385850723397, "learning_rate": 0.00021140576500577798, "loss": 1.1198, "step": 1540 }, { "epoch": 0.4294273373275742, "grad_norm": 0.0264454389734036, "learning_rate": 0.0002112725854113876, "loss": 1.2065, "step": 1541 }, { "epoch": 0.4297060052946914, "grad_norm": 0.02482182768369133, "learning_rate": 0.00021113934781675237, "loss": 1.1572, "step": 1542 }, { "epoch": 0.42998467326180856, "grad_norm": 0.027765280452560465, "learning_rate": 0.00021100605234799415, "loss": 1.1525, "step": 1543 }, { "epoch": 0.43026334122892573, "grad_norm": 0.022412816796622114, "learning_rate": 0.00021087269913128961, "loss": 1.1876, "step": 1544 }, { "epoch": 0.4305420091960429, "grad_norm": 0.02408802190810017, "learning_rate": 0.0002107392882928701, "loss": 1.1038, "step": 1545 }, { "epoch": 0.4308206771631601, "grad_norm": 0.02510262446102031, "learning_rate": 0.00021060581995902138, "loss": 1.1906, "step": 1546 }, { "epoch": 0.43109934513027726, "grad_norm": 0.026130182949791952, "learning_rate": 0.00021047229425608384, "loss": 1.1452, "step": 1547 }, { "epoch": 0.43137801309739443, "grad_norm": 0.026446422614163183, "learning_rate": 0.00021033871131045202, "loss": 1.2137, "step": 1548 }, { "epoch": 0.43165668106451166, "grad_norm": 0.027739818736679276, "learning_rate": 0.00021020507124857473, "loss": 1.2171, "step": 1549 }, { "epoch": 0.43193534903162883, "grad_norm": 0.023943150372433004, "learning_rate": 0.0002100713741969548, "loss": 1.1968, "step": 1550 }, { "epoch": 0.432214016998746, "grad_norm": 0.025029595713515537, "learning_rate": 0.00020993762028214893, "loss": 1.1394, "step": 1551 }, { "epoch": 0.4324926849658632, "grad_norm": 0.027551701861897284, "learning_rate": 0.00020980380963076792, "loss": 1.233, "step": 1552 }, { "epoch": 0.43277135293298036, "grad_norm": 0.02575331409196783, "learning_rate": 0.0002096699423694759, "loss": 1.1496, "step": 1553 }, { "epoch": 0.43305002090009753, "grad_norm": 0.025465664969122732, "learning_rate": 0.00020953601862499098, "loss": 1.1279, "step": 1554 }, { "epoch": 0.4333286888672147, "grad_norm": 0.03084606469013556, "learning_rate": 0.0002094020385240844, "loss": 1.1614, "step": 1555 }, { "epoch": 0.4336073568343319, "grad_norm": 0.027414470297654, "learning_rate": 0.00020926800219358088, "loss": 1.1672, "step": 1556 }, { "epoch": 0.43388602480144906, "grad_norm": 0.027031549132788185, "learning_rate": 0.00020913390976035843, "loss": 1.1171, "step": 1557 }, { "epoch": 0.43416469276856623, "grad_norm": 0.024829424262171128, "learning_rate": 0.00020899976135134804, "loss": 1.1567, "step": 1558 }, { "epoch": 0.43444336073568346, "grad_norm": 0.027739635408528786, "learning_rate": 0.00020886555709353382, "loss": 1.1857, "step": 1559 }, { "epoch": 0.43472202870280063, "grad_norm": 0.026158305847549034, "learning_rate": 0.0002087312971139527, "loss": 1.115, "step": 1560 }, { "epoch": 0.4350006966699178, "grad_norm": 0.027126518426178182, "learning_rate": 0.0002085969815396942, "loss": 1.1866, "step": 1561 }, { "epoch": 0.435279364637035, "grad_norm": 0.026890997880074506, "learning_rate": 0.0002084626104979007, "loss": 1.1889, "step": 1562 }, { "epoch": 0.43555803260415216, "grad_norm": 0.025299926215703913, "learning_rate": 0.000208328184115767, "loss": 1.1793, "step": 1563 }, { "epoch": 0.43583670057126933, "grad_norm": 0.023945857642281108, "learning_rate": 0.00020819370252054019, "loss": 1.1516, "step": 1564 }, { "epoch": 0.4361153685383865, "grad_norm": 0.024101136866942395, "learning_rate": 0.00020805916583951982, "loss": 1.1796, "step": 1565 }, { "epoch": 0.4363940365055037, "grad_norm": 0.025905967224530545, "learning_rate": 0.00020792457420005736, "loss": 1.1932, "step": 1566 }, { "epoch": 0.43667270447262085, "grad_norm": 0.025578161011615787, "learning_rate": 0.00020778992772955645, "loss": 1.128, "step": 1567 }, { "epoch": 0.43695137243973803, "grad_norm": 0.024843470643754452, "learning_rate": 0.00020765522655547257, "loss": 1.152, "step": 1568 }, { "epoch": 0.43723004040685526, "grad_norm": 0.023726542227700518, "learning_rate": 0.00020752047080531308, "loss": 1.1774, "step": 1569 }, { "epoch": 0.43750870837397243, "grad_norm": 0.02415820373618974, "learning_rate": 0.00020738566060663684, "loss": 1.2491, "step": 1570 }, { "epoch": 0.4377873763410896, "grad_norm": 0.02723929880385772, "learning_rate": 0.00020725079608705443, "loss": 1.2112, "step": 1571 }, { "epoch": 0.4380660443082068, "grad_norm": 0.026402121561320924, "learning_rate": 0.00020711587737422764, "loss": 1.0867, "step": 1572 }, { "epoch": 0.43834471227532396, "grad_norm": 0.02243030918214032, "learning_rate": 0.00020698090459586978, "loss": 1.1133, "step": 1573 }, { "epoch": 0.43862338024244113, "grad_norm": 0.024005617195761757, "learning_rate": 0.00020684587787974518, "loss": 1.0913, "step": 1574 }, { "epoch": 0.4389020482095583, "grad_norm": 0.023923351296945082, "learning_rate": 0.0002067107973536693, "loss": 1.1557, "step": 1575 }, { "epoch": 0.4391807161766755, "grad_norm": 0.024349592958104852, "learning_rate": 0.00020657566314550852, "loss": 1.1502, "step": 1576 }, { "epoch": 0.43945938414379265, "grad_norm": 0.025902789465568694, "learning_rate": 0.00020644047538318001, "loss": 1.1233, "step": 1577 }, { "epoch": 0.4397380521109098, "grad_norm": 0.024404764613513732, "learning_rate": 0.0002063052341946517, "loss": 1.1936, "step": 1578 }, { "epoch": 0.44001672007802706, "grad_norm": 0.024007668543485523, "learning_rate": 0.00020616993970794205, "loss": 1.1976, "step": 1579 }, { "epoch": 0.44029538804514423, "grad_norm": 0.023620581112956302, "learning_rate": 0.00020603459205111992, "loss": 1.107, "step": 1580 }, { "epoch": 0.4405740560122614, "grad_norm": 0.02593183754175461, "learning_rate": 0.0002058991913523046, "loss": 1.1256, "step": 1581 }, { "epoch": 0.4408527239793786, "grad_norm": 0.023973874172034226, "learning_rate": 0.0002057637377396655, "loss": 1.1459, "step": 1582 }, { "epoch": 0.44113139194649575, "grad_norm": 0.023230320506796572, "learning_rate": 0.0002056282313414222, "loss": 1.1238, "step": 1583 }, { "epoch": 0.44141005991361293, "grad_norm": 0.02659996961137214, "learning_rate": 0.0002054926722858442, "loss": 1.1541, "step": 1584 }, { "epoch": 0.4416887278807301, "grad_norm": 0.023945965039349253, "learning_rate": 0.00020535706070125086, "loss": 1.1623, "step": 1585 }, { "epoch": 0.4419673958478473, "grad_norm": 0.024923797032605777, "learning_rate": 0.00020522139671601118, "loss": 1.1581, "step": 1586 }, { "epoch": 0.44224606381496445, "grad_norm": 0.025738463184374862, "learning_rate": 0.00020508568045854393, "loss": 1.2037, "step": 1587 }, { "epoch": 0.4425247317820816, "grad_norm": 0.024545439788699913, "learning_rate": 0.00020494991205731724, "loss": 1.1577, "step": 1588 }, { "epoch": 0.44280339974919886, "grad_norm": 0.024615950063866684, "learning_rate": 0.00020481409164084858, "loss": 1.1318, "step": 1589 }, { "epoch": 0.44308206771631603, "grad_norm": 0.025696025114572023, "learning_rate": 0.00020467821933770479, "loss": 1.1811, "step": 1590 }, { "epoch": 0.4433607356834332, "grad_norm": 0.025882213101516307, "learning_rate": 0.00020454229527650168, "loss": 1.1764, "step": 1591 }, { "epoch": 0.4436394036505504, "grad_norm": 0.024656030273283426, "learning_rate": 0.00020440631958590407, "loss": 1.1727, "step": 1592 }, { "epoch": 0.44391807161766755, "grad_norm": 0.023470017761744122, "learning_rate": 0.0002042702923946258, "loss": 1.2267, "step": 1593 }, { "epoch": 0.4441967395847847, "grad_norm": 0.024288949524878922, "learning_rate": 0.0002041342138314293, "loss": 1.1428, "step": 1594 }, { "epoch": 0.4444754075519019, "grad_norm": 0.025969083168423824, "learning_rate": 0.00020399808402512566, "loss": 1.1987, "step": 1595 }, { "epoch": 0.4447540755190191, "grad_norm": 0.02562227271106915, "learning_rate": 0.0002038619031045746, "loss": 1.1331, "step": 1596 }, { "epoch": 0.44503274348613625, "grad_norm": 0.023556914273332473, "learning_rate": 0.00020372567119868399, "loss": 1.1691, "step": 1597 }, { "epoch": 0.4453114114532534, "grad_norm": 0.023636203470480486, "learning_rate": 0.0002035893884364102, "loss": 1.1176, "step": 1598 }, { "epoch": 0.44559007942037065, "grad_norm": 0.02537603664375156, "learning_rate": 0.0002034530549467576, "loss": 1.1749, "step": 1599 }, { "epoch": 0.44586874738748783, "grad_norm": 0.024926774373441204, "learning_rate": 0.00020331667085877862, "loss": 1.2075, "step": 1600 }, { "epoch": 0.446147415354605, "grad_norm": 0.02430315800871197, "learning_rate": 0.00020318023630157357, "loss": 1.1205, "step": 1601 }, { "epoch": 0.4464260833217222, "grad_norm": 0.024468278415437347, "learning_rate": 0.00020304375140429051, "loss": 1.2412, "step": 1602 }, { "epoch": 0.44670475128883935, "grad_norm": 0.02576681722165859, "learning_rate": 0.00020290721629612527, "loss": 1.1997, "step": 1603 }, { "epoch": 0.4469834192559565, "grad_norm": 0.02382789517581475, "learning_rate": 0.00020277063110632108, "loss": 1.1265, "step": 1604 }, { "epoch": 0.4472620872230737, "grad_norm": 0.0239631991140649, "learning_rate": 0.00020263399596416864, "loss": 1.178, "step": 1605 }, { "epoch": 0.4475407551901909, "grad_norm": 0.02519073680896622, "learning_rate": 0.00020249731099900592, "loss": 1.1365, "step": 1606 }, { "epoch": 0.44781942315730805, "grad_norm": 0.02236555321394603, "learning_rate": 0.00020236057634021802, "loss": 1.1563, "step": 1607 }, { "epoch": 0.4480980911244252, "grad_norm": 0.02409935835933986, "learning_rate": 0.00020222379211723714, "loss": 1.1376, "step": 1608 }, { "epoch": 0.44837675909154245, "grad_norm": 0.02346207369159516, "learning_rate": 0.00020208695845954234, "loss": 1.164, "step": 1609 }, { "epoch": 0.44865542705865963, "grad_norm": 0.028084306151963327, "learning_rate": 0.00020195007549665955, "loss": 1.1595, "step": 1610 }, { "epoch": 0.4489340950257768, "grad_norm": 0.023602371165663803, "learning_rate": 0.00020181314335816127, "loss": 1.1651, "step": 1611 }, { "epoch": 0.449212762992894, "grad_norm": 0.02483033669692067, "learning_rate": 0.0002016761621736666, "loss": 1.1692, "step": 1612 }, { "epoch": 0.44949143096001115, "grad_norm": 0.025062099636604586, "learning_rate": 0.00020153913207284117, "loss": 1.1167, "step": 1613 }, { "epoch": 0.4497700989271283, "grad_norm": 0.02656390635296763, "learning_rate": 0.00020140205318539667, "loss": 1.1741, "step": 1614 }, { "epoch": 0.4500487668942455, "grad_norm": 0.024546842860524214, "learning_rate": 0.00020126492564109124, "loss": 1.1937, "step": 1615 }, { "epoch": 0.4503274348613627, "grad_norm": 0.02408396163438303, "learning_rate": 0.00020112774956972882, "loss": 1.0799, "step": 1616 }, { "epoch": 0.45060610282847985, "grad_norm": 0.026071588734665294, "learning_rate": 0.00020099052510115956, "loss": 1.0959, "step": 1617 }, { "epoch": 0.450884770795597, "grad_norm": 0.024702320239948155, "learning_rate": 0.0002008532523652792, "loss": 1.1202, "step": 1618 }, { "epoch": 0.45116343876271425, "grad_norm": 0.02273290331824224, "learning_rate": 0.00020071593149202923, "loss": 1.1832, "step": 1619 }, { "epoch": 0.4514421067298314, "grad_norm": 0.024538384441338188, "learning_rate": 0.0002005785626113968, "loss": 1.1723, "step": 1620 }, { "epoch": 0.4517207746969486, "grad_norm": 0.024850539744582983, "learning_rate": 0.0002004411458534144, "loss": 1.148, "step": 1621 }, { "epoch": 0.4519994426640658, "grad_norm": 0.02562314583081585, "learning_rate": 0.00020030368134815981, "loss": 1.1258, "step": 1622 }, { "epoch": 0.45227811063118295, "grad_norm": 0.026299270022211874, "learning_rate": 0.0002001661692257562, "loss": 1.1531, "step": 1623 }, { "epoch": 0.4525567785983001, "grad_norm": 0.024635262052043343, "learning_rate": 0.00020002860961637156, "loss": 1.0676, "step": 1624 }, { "epoch": 0.4528354465654173, "grad_norm": 0.02637802954350839, "learning_rate": 0.000199891002650219, "loss": 1.1786, "step": 1625 }, { "epoch": 0.4531141145325345, "grad_norm": 0.025006764358784746, "learning_rate": 0.0001997533484575564, "loss": 1.2232, "step": 1626 }, { "epoch": 0.45339278249965165, "grad_norm": 0.027679535108097945, "learning_rate": 0.0001996156471686864, "loss": 1.1124, "step": 1627 }, { "epoch": 0.4536714504667688, "grad_norm": 0.024842009094251365, "learning_rate": 0.0001994778989139561, "loss": 1.2171, "step": 1628 }, { "epoch": 0.45395011843388605, "grad_norm": 0.025159659380485363, "learning_rate": 0.00019934010382375718, "loss": 1.1538, "step": 1629 }, { "epoch": 0.4542287864010032, "grad_norm": 0.024858357837689065, "learning_rate": 0.00019920226202852558, "loss": 1.1747, "step": 1630 }, { "epoch": 0.4545074543681204, "grad_norm": 0.024120463057778155, "learning_rate": 0.00019906437365874153, "loss": 1.1099, "step": 1631 }, { "epoch": 0.4547861223352376, "grad_norm": 0.02262844555002272, "learning_rate": 0.0001989264388449292, "loss": 1.2386, "step": 1632 }, { "epoch": 0.45506479030235475, "grad_norm": 0.02403408729565159, "learning_rate": 0.00019878845771765697, "loss": 1.1487, "step": 1633 }, { "epoch": 0.4553434582694719, "grad_norm": 0.02519307640407194, "learning_rate": 0.00019865043040753676, "loss": 1.2241, "step": 1634 }, { "epoch": 0.4556221262365891, "grad_norm": 0.029847115474303302, "learning_rate": 0.00019851235704522447, "loss": 1.1145, "step": 1635 }, { "epoch": 0.45590079420370627, "grad_norm": 0.024418316719218326, "learning_rate": 0.00019837423776141943, "loss": 1.2378, "step": 1636 }, { "epoch": 0.45617946217082345, "grad_norm": 0.026084926662496136, "learning_rate": 0.00019823607268686444, "loss": 1.1675, "step": 1637 }, { "epoch": 0.4564581301379406, "grad_norm": 0.025873445138592535, "learning_rate": 0.00019809786195234576, "loss": 1.1857, "step": 1638 }, { "epoch": 0.45673679810505785, "grad_norm": 0.024019046348472226, "learning_rate": 0.00019795960568869277, "loss": 1.1923, "step": 1639 }, { "epoch": 0.457015466072175, "grad_norm": 0.024958290887717522, "learning_rate": 0.00019782130402677797, "loss": 1.1934, "step": 1640 }, { "epoch": 0.4572941340392922, "grad_norm": 0.024075322222405906, "learning_rate": 0.00019768295709751688, "loss": 1.2103, "step": 1641 }, { "epoch": 0.4575728020064094, "grad_norm": 0.02494283230802397, "learning_rate": 0.00019754456503186786, "loss": 1.2107, "step": 1642 }, { "epoch": 0.45785146997352655, "grad_norm": 0.02845423481528982, "learning_rate": 0.00019740612796083186, "loss": 1.1557, "step": 1643 }, { "epoch": 0.4581301379406437, "grad_norm": 0.024170537967059376, "learning_rate": 0.0001972676460154527, "loss": 1.1765, "step": 1644 }, { "epoch": 0.4584088059077609, "grad_norm": 0.026344058218322888, "learning_rate": 0.00019712911932681635, "loss": 1.1332, "step": 1645 }, { "epoch": 0.45868747387487807, "grad_norm": 0.024986030682437023, "learning_rate": 0.00019699054802605146, "loss": 1.1251, "step": 1646 }, { "epoch": 0.45896614184199525, "grad_norm": 0.025162941054403688, "learning_rate": 0.00019685193224432867, "loss": 1.1545, "step": 1647 }, { "epoch": 0.4592448098091124, "grad_norm": 0.02455913493020572, "learning_rate": 0.00019671327211286075, "loss": 1.1369, "step": 1648 }, { "epoch": 0.45952347777622965, "grad_norm": 0.02566627993103397, "learning_rate": 0.00019657456776290267, "loss": 1.2063, "step": 1649 }, { "epoch": 0.4598021457433468, "grad_norm": 0.027540030870801466, "learning_rate": 0.00019643581932575097, "loss": 1.1978, "step": 1650 }, { "epoch": 0.460080813710464, "grad_norm": 0.024154875522037364, "learning_rate": 0.00019629702693274413, "loss": 1.1546, "step": 1651 }, { "epoch": 0.4603594816775812, "grad_norm": 0.023745385360663538, "learning_rate": 0.0001961581907152621, "loss": 1.1719, "step": 1652 }, { "epoch": 0.46063814964469835, "grad_norm": 0.023504295526674004, "learning_rate": 0.00019601931080472635, "loss": 1.1957, "step": 1653 }, { "epoch": 0.4609168176118155, "grad_norm": 0.027608405723115542, "learning_rate": 0.0001958803873325998, "loss": 1.129, "step": 1654 }, { "epoch": 0.4611954855789327, "grad_norm": 0.026627704978155305, "learning_rate": 0.00019574142043038656, "loss": 1.1716, "step": 1655 }, { "epoch": 0.46147415354604987, "grad_norm": 0.024262069077542366, "learning_rate": 0.0001956024102296317, "loss": 1.1188, "step": 1656 }, { "epoch": 0.46175282151316704, "grad_norm": 0.02507359078580312, "learning_rate": 0.00019546335686192155, "loss": 1.193, "step": 1657 }, { "epoch": 0.4620314894802842, "grad_norm": 0.025896144541302804, "learning_rate": 0.00019532426045888303, "loss": 1.1684, "step": 1658 }, { "epoch": 0.46231015744740145, "grad_norm": 0.025675400596120713, "learning_rate": 0.000195185121152184, "loss": 1.0934, "step": 1659 }, { "epoch": 0.4625888254145186, "grad_norm": 0.02441024386370666, "learning_rate": 0.00019504593907353286, "loss": 1.2109, "step": 1660 }, { "epoch": 0.4628674933816358, "grad_norm": 0.022318674042971467, "learning_rate": 0.00019490671435467842, "loss": 1.1828, "step": 1661 }, { "epoch": 0.46314616134875297, "grad_norm": 0.026232005951876462, "learning_rate": 0.00019476744712740998, "loss": 1.1105, "step": 1662 }, { "epoch": 0.46342482931587015, "grad_norm": 0.026422666039551884, "learning_rate": 0.00019462813752355702, "loss": 1.1029, "step": 1663 }, { "epoch": 0.4637034972829873, "grad_norm": 0.02576897107289728, "learning_rate": 0.00019448878567498916, "loss": 1.144, "step": 1664 }, { "epoch": 0.4639821652501045, "grad_norm": 0.025174010735518206, "learning_rate": 0.0001943493917136159, "loss": 1.1587, "step": 1665 }, { "epoch": 0.46426083321722167, "grad_norm": 0.024251179100672364, "learning_rate": 0.0001942099557713868, "loss": 1.1423, "step": 1666 }, { "epoch": 0.46453950118433884, "grad_norm": 0.023603928497201755, "learning_rate": 0.00019407047798029097, "loss": 1.1714, "step": 1667 }, { "epoch": 0.464818169151456, "grad_norm": 0.025881531659357786, "learning_rate": 0.00019393095847235724, "loss": 1.1863, "step": 1668 }, { "epoch": 0.46509683711857325, "grad_norm": 0.025386795370514956, "learning_rate": 0.00019379139737965388, "loss": 1.1849, "step": 1669 }, { "epoch": 0.4653755050856904, "grad_norm": 0.02387904351094167, "learning_rate": 0.0001936517948342886, "loss": 1.1465, "step": 1670 }, { "epoch": 0.4656541730528076, "grad_norm": 0.02312944805541275, "learning_rate": 0.00019351215096840817, "loss": 1.1552, "step": 1671 }, { "epoch": 0.46593284101992477, "grad_norm": 0.025313099978710837, "learning_rate": 0.00019337246591419877, "loss": 1.1948, "step": 1672 }, { "epoch": 0.46621150898704194, "grad_norm": 0.02397066673699919, "learning_rate": 0.00019323273980388528, "loss": 1.1868, "step": 1673 }, { "epoch": 0.4664901769541591, "grad_norm": 0.02691693884050899, "learning_rate": 0.00019309297276973153, "loss": 1.2333, "step": 1674 }, { "epoch": 0.4667688449212763, "grad_norm": 0.02474281498291457, "learning_rate": 0.00019295316494404027, "loss": 1.1788, "step": 1675 }, { "epoch": 0.46704751288839347, "grad_norm": 0.024478112799942067, "learning_rate": 0.00019281331645915257, "loss": 1.0958, "step": 1676 }, { "epoch": 0.46732618085551064, "grad_norm": 0.028509933785414474, "learning_rate": 0.00019267342744744813, "loss": 1.2062, "step": 1677 }, { "epoch": 0.4676048488226278, "grad_norm": 0.026398542221035503, "learning_rate": 0.0001925334980413451, "loss": 1.1179, "step": 1678 }, { "epoch": 0.46788351678974505, "grad_norm": 0.025278238530110058, "learning_rate": 0.00019239352837329974, "loss": 1.1643, "step": 1679 }, { "epoch": 0.4681621847568622, "grad_norm": 0.02671616352647338, "learning_rate": 0.0001922535185758064, "loss": 1.1891, "step": 1680 }, { "epoch": 0.4684408527239794, "grad_norm": 0.025461010768876472, "learning_rate": 0.0001921134687813976, "loss": 1.1683, "step": 1681 }, { "epoch": 0.46871952069109657, "grad_norm": 0.025973015758874313, "learning_rate": 0.00019197337912264348, "loss": 1.1359, "step": 1682 }, { "epoch": 0.46899818865821374, "grad_norm": 0.02759731839216504, "learning_rate": 0.00019183324973215206, "loss": 1.1518, "step": 1683 }, { "epoch": 0.4692768566253309, "grad_norm": 0.024336820033485557, "learning_rate": 0.00019169308074256899, "loss": 1.1295, "step": 1684 }, { "epoch": 0.4695555245924481, "grad_norm": 0.029265092908958915, "learning_rate": 0.00019155287228657728, "loss": 1.2039, "step": 1685 }, { "epoch": 0.46983419255956527, "grad_norm": 0.025582133550280422, "learning_rate": 0.00019141262449689743, "loss": 1.1635, "step": 1686 }, { "epoch": 0.47011286052668244, "grad_norm": 0.025188635951905174, "learning_rate": 0.00019127233750628714, "loss": 1.1747, "step": 1687 }, { "epoch": 0.4703915284937996, "grad_norm": 0.02497043559261305, "learning_rate": 0.0001911320114475411, "loss": 1.1418, "step": 1688 }, { "epoch": 0.4706701964609168, "grad_norm": 0.022349178666751658, "learning_rate": 0.0001909916464534912, "loss": 1.1585, "step": 1689 }, { "epoch": 0.470948864428034, "grad_norm": 0.02441028723547429, "learning_rate": 0.00019085124265700595, "loss": 1.1651, "step": 1690 }, { "epoch": 0.4712275323951512, "grad_norm": 0.025690928237464664, "learning_rate": 0.00019071080019099072, "loss": 1.1296, "step": 1691 }, { "epoch": 0.47150620036226837, "grad_norm": 0.023321120649959826, "learning_rate": 0.00019057031918838757, "loss": 1.1671, "step": 1692 }, { "epoch": 0.47178486832938554, "grad_norm": 0.02408824027248116, "learning_rate": 0.00019042979978217483, "loss": 1.155, "step": 1693 }, { "epoch": 0.4720635362965027, "grad_norm": 0.029684862032866178, "learning_rate": 0.00019028924210536734, "loss": 1.2274, "step": 1694 }, { "epoch": 0.4723422042636199, "grad_norm": 0.024946204055924633, "learning_rate": 0.00019014864629101608, "loss": 1.1971, "step": 1695 }, { "epoch": 0.47262087223073707, "grad_norm": 0.024229534629543727, "learning_rate": 0.00019000801247220816, "loss": 1.1166, "step": 1696 }, { "epoch": 0.47289954019785424, "grad_norm": 0.02489676537552863, "learning_rate": 0.00018986734078206674, "loss": 1.0965, "step": 1697 }, { "epoch": 0.4731782081649714, "grad_norm": 0.025248835996780013, "learning_rate": 0.00018972663135375064, "loss": 1.2126, "step": 1698 }, { "epoch": 0.4734568761320886, "grad_norm": 0.024026138341258378, "learning_rate": 0.00018958588432045465, "loss": 1.113, "step": 1699 }, { "epoch": 0.4737355440992058, "grad_norm": 0.026555403716927144, "learning_rate": 0.000189445099815409, "loss": 1.1576, "step": 1700 }, { "epoch": 0.474014212066323, "grad_norm": 0.023952431459650078, "learning_rate": 0.00018930427797187932, "loss": 1.0947, "step": 1701 }, { "epoch": 0.47429288003344017, "grad_norm": 0.03106424139460973, "learning_rate": 0.0001891634189231668, "loss": 1.2103, "step": 1702 }, { "epoch": 0.47457154800055734, "grad_norm": 0.029686385569865907, "learning_rate": 0.0001890225228026077, "loss": 1.1397, "step": 1703 }, { "epoch": 0.4748502159676745, "grad_norm": 0.023939201977325554, "learning_rate": 0.00018888158974357335, "loss": 1.1877, "step": 1704 }, { "epoch": 0.4751288839347917, "grad_norm": 0.029466802757705617, "learning_rate": 0.00018874061987947014, "loss": 1.196, "step": 1705 }, { "epoch": 0.47540755190190886, "grad_norm": 0.02312349616374357, "learning_rate": 0.00018859961334373928, "loss": 1.1416, "step": 1706 }, { "epoch": 0.47568621986902604, "grad_norm": 0.023782440698915414, "learning_rate": 0.00018845857026985664, "loss": 1.2426, "step": 1707 }, { "epoch": 0.4759648878361432, "grad_norm": 0.028020196122402967, "learning_rate": 0.00018831749079133272, "loss": 1.1892, "step": 1708 }, { "epoch": 0.4762435558032604, "grad_norm": 0.028054365844238014, "learning_rate": 0.00018817637504171247, "loss": 1.1753, "step": 1709 }, { "epoch": 0.4765222237703776, "grad_norm": 0.025394648376487628, "learning_rate": 0.00018803522315457517, "loss": 1.0785, "step": 1710 }, { "epoch": 0.4768008917374948, "grad_norm": 0.02488151961425068, "learning_rate": 0.00018789403526353428, "loss": 1.0993, "step": 1711 }, { "epoch": 0.47707955970461197, "grad_norm": 0.02459577851517362, "learning_rate": 0.00018775281150223744, "loss": 1.1424, "step": 1712 }, { "epoch": 0.47735822767172914, "grad_norm": 0.023902253914266293, "learning_rate": 0.00018761155200436612, "loss": 1.0968, "step": 1713 }, { "epoch": 0.4776368956388463, "grad_norm": 0.02645375488896201, "learning_rate": 0.00018747025690363567, "loss": 1.1508, "step": 1714 }, { "epoch": 0.4779155636059635, "grad_norm": 0.02487641665381476, "learning_rate": 0.00018732892633379527, "loss": 1.1581, "step": 1715 }, { "epoch": 0.47819423157308066, "grad_norm": 0.0233682398663411, "learning_rate": 0.00018718756042862737, "loss": 1.1419, "step": 1716 }, { "epoch": 0.47847289954019784, "grad_norm": 0.02741201755085294, "learning_rate": 0.00018704615932194812, "loss": 1.1656, "step": 1717 }, { "epoch": 0.478751567507315, "grad_norm": 0.02677457211177057, "learning_rate": 0.00018690472314760693, "loss": 1.1658, "step": 1718 }, { "epoch": 0.4790302354744322, "grad_norm": 0.02425705884592184, "learning_rate": 0.00018676325203948642, "loss": 1.1907, "step": 1719 }, { "epoch": 0.4793089034415494, "grad_norm": 0.02454247319598098, "learning_rate": 0.0001866217461315022, "loss": 1.2071, "step": 1720 }, { "epoch": 0.4795875714086666, "grad_norm": 0.024686314042075493, "learning_rate": 0.0001864802055576029, "loss": 1.1471, "step": 1721 }, { "epoch": 0.47986623937578377, "grad_norm": 0.025039152291191247, "learning_rate": 0.00018633863045176998, "loss": 1.1532, "step": 1722 }, { "epoch": 0.48014490734290094, "grad_norm": 0.024828005331945027, "learning_rate": 0.00018619702094801747, "loss": 1.1377, "step": 1723 }, { "epoch": 0.4804235753100181, "grad_norm": 0.024689136603341755, "learning_rate": 0.00018605537718039208, "loss": 1.171, "step": 1724 }, { "epoch": 0.4807022432771353, "grad_norm": 0.02404098273206446, "learning_rate": 0.00018591369928297295, "loss": 1.1516, "step": 1725 }, { "epoch": 0.48098091124425246, "grad_norm": 0.025703631660318218, "learning_rate": 0.00018577198738987138, "loss": 1.1649, "step": 1726 }, { "epoch": 0.48125957921136964, "grad_norm": 0.02513758969385734, "learning_rate": 0.00018563024163523105, "loss": 1.1513, "step": 1727 }, { "epoch": 0.4815382471784868, "grad_norm": 0.02471052492705831, "learning_rate": 0.0001854884621532276, "loss": 1.0935, "step": 1728 }, { "epoch": 0.481816915145604, "grad_norm": 0.02328626925250442, "learning_rate": 0.0001853466490780685, "loss": 1.1074, "step": 1729 }, { "epoch": 0.4820955831127212, "grad_norm": 0.02516084968869626, "learning_rate": 0.00018520480254399327, "loss": 1.1635, "step": 1730 }, { "epoch": 0.4823742510798384, "grad_norm": 0.02424676864813352, "learning_rate": 0.00018506292268527283, "loss": 1.0945, "step": 1731 }, { "epoch": 0.48265291904695556, "grad_norm": 0.024969372714886598, "learning_rate": 0.00018492100963620977, "loss": 1.1709, "step": 1732 }, { "epoch": 0.48293158701407274, "grad_norm": 0.025982078153140555, "learning_rate": 0.00018477906353113818, "loss": 1.1846, "step": 1733 }, { "epoch": 0.4832102549811899, "grad_norm": 0.023103525421107055, "learning_rate": 0.00018463708450442332, "loss": 1.181, "step": 1734 }, { "epoch": 0.4834889229483071, "grad_norm": 0.02614277138074464, "learning_rate": 0.00018449507269046152, "loss": 1.1538, "step": 1735 }, { "epoch": 0.48376759091542426, "grad_norm": 0.02578878371141229, "learning_rate": 0.00018435302822368047, "loss": 1.195, "step": 1736 }, { "epoch": 0.48404625888254144, "grad_norm": 0.02446397660267788, "learning_rate": 0.0001842109512385384, "loss": 1.1672, "step": 1737 }, { "epoch": 0.4843249268496586, "grad_norm": 0.024492154012716195, "learning_rate": 0.00018406884186952465, "loss": 1.1564, "step": 1738 }, { "epoch": 0.4846035948167758, "grad_norm": 0.02355306907383123, "learning_rate": 0.00018392670025115895, "loss": 1.16, "step": 1739 }, { "epoch": 0.484882262783893, "grad_norm": 0.02382264315426165, "learning_rate": 0.00018378452651799166, "loss": 1.1648, "step": 1740 }, { "epoch": 0.4851609307510102, "grad_norm": 0.02511506246106325, "learning_rate": 0.00018364232080460356, "loss": 1.127, "step": 1741 }, { "epoch": 0.48543959871812736, "grad_norm": 0.026544896110418454, "learning_rate": 0.00018350008324560574, "loss": 1.1972, "step": 1742 }, { "epoch": 0.48571826668524454, "grad_norm": 0.02580010551042308, "learning_rate": 0.00018335781397563935, "loss": 1.1727, "step": 1743 }, { "epoch": 0.4859969346523617, "grad_norm": 0.02434460478743905, "learning_rate": 0.00018321551312937552, "loss": 1.1157, "step": 1744 }, { "epoch": 0.4862756026194789, "grad_norm": 0.028291779665673315, "learning_rate": 0.00018307318084151544, "loss": 1.1038, "step": 1745 }, { "epoch": 0.48655427058659606, "grad_norm": 0.026892136462868737, "learning_rate": 0.00018293081724678992, "loss": 1.1737, "step": 1746 }, { "epoch": 0.48683293855371323, "grad_norm": 0.024100538683870104, "learning_rate": 0.0001827884224799594, "loss": 1.1327, "step": 1747 }, { "epoch": 0.4871116065208304, "grad_norm": 0.02393567067393253, "learning_rate": 0.00018264599667581395, "loss": 1.1872, "step": 1748 }, { "epoch": 0.4873902744879476, "grad_norm": 0.024177891624378747, "learning_rate": 0.00018250353996917293, "loss": 1.2274, "step": 1749 }, { "epoch": 0.4876689424550648, "grad_norm": 0.026962230386852564, "learning_rate": 0.0001823610524948849, "loss": 1.1265, "step": 1750 }, { "epoch": 0.487947610422182, "grad_norm": 0.024531087169738983, "learning_rate": 0.00018221853438782769, "loss": 1.2404, "step": 1751 }, { "epoch": 0.48822627838929916, "grad_norm": 0.02628781475763451, "learning_rate": 0.000182075985782908, "loss": 1.187, "step": 1752 }, { "epoch": 0.48850494635641634, "grad_norm": 0.027612410810177193, "learning_rate": 0.00018193340681506138, "loss": 1.1684, "step": 1753 }, { "epoch": 0.4887836143235335, "grad_norm": 0.024350711809383106, "learning_rate": 0.00018179079761925234, "loss": 1.146, "step": 1754 }, { "epoch": 0.4890622822906507, "grad_norm": 0.022452102186426477, "learning_rate": 0.00018164815833047372, "loss": 1.1407, "step": 1755 }, { "epoch": 0.48934095025776786, "grad_norm": 0.025957161894894304, "learning_rate": 0.00018150548908374702, "loss": 1.2034, "step": 1756 }, { "epoch": 0.48961961822488503, "grad_norm": 0.025538459666406006, "learning_rate": 0.0001813627900141221, "loss": 1.1429, "step": 1757 }, { "epoch": 0.4898982861920022, "grad_norm": 0.025974264629916435, "learning_rate": 0.00018122006125667695, "loss": 1.1458, "step": 1758 }, { "epoch": 0.4901769541591194, "grad_norm": 0.028302981691349996, "learning_rate": 0.00018107730294651767, "loss": 1.2587, "step": 1759 }, { "epoch": 0.4904556221262366, "grad_norm": 0.023018193696695992, "learning_rate": 0.0001809345152187785, "loss": 1.1871, "step": 1760 }, { "epoch": 0.4907342900933538, "grad_norm": 0.02418031259298892, "learning_rate": 0.00018079169820862134, "loss": 1.2022, "step": 1761 }, { "epoch": 0.49101295806047096, "grad_norm": 0.021438966673991724, "learning_rate": 0.00018064885205123582, "loss": 1.1322, "step": 1762 }, { "epoch": 0.49129162602758814, "grad_norm": 0.025459142648308394, "learning_rate": 0.00018050597688183933, "loss": 1.153, "step": 1763 }, { "epoch": 0.4915702939947053, "grad_norm": 0.02353401508816481, "learning_rate": 0.00018036307283567657, "loss": 1.136, "step": 1764 }, { "epoch": 0.4918489619618225, "grad_norm": 0.026109163886593786, "learning_rate": 0.00018022014004801957, "loss": 1.1658, "step": 1765 }, { "epoch": 0.49212762992893966, "grad_norm": 0.023881705786080957, "learning_rate": 0.00018007717865416764, "loss": 1.1372, "step": 1766 }, { "epoch": 0.49240629789605683, "grad_norm": 0.025386770020225033, "learning_rate": 0.00017993418878944716, "loss": 1.1943, "step": 1767 }, { "epoch": 0.492684965863174, "grad_norm": 0.025622829211586047, "learning_rate": 0.00017979117058921138, "loss": 1.1744, "step": 1768 }, { "epoch": 0.4929636338302912, "grad_norm": 0.02551461982392515, "learning_rate": 0.00017964812418884047, "loss": 1.1004, "step": 1769 }, { "epoch": 0.4932423017974084, "grad_norm": 0.025631682749739947, "learning_rate": 0.0001795050497237413, "loss": 1.1037, "step": 1770 }, { "epoch": 0.4935209697645256, "grad_norm": 0.028851246266982634, "learning_rate": 0.00017936194732934713, "loss": 1.1083, "step": 1771 }, { "epoch": 0.49379963773164276, "grad_norm": 0.023657843217273357, "learning_rate": 0.00017921881714111793, "loss": 1.1678, "step": 1772 }, { "epoch": 0.49407830569875993, "grad_norm": 0.02463261688807563, "learning_rate": 0.00017907565929453978, "loss": 1.0915, "step": 1773 }, { "epoch": 0.4943569736658771, "grad_norm": 0.02461702773801821, "learning_rate": 0.00017893247392512495, "loss": 1.1966, "step": 1774 }, { "epoch": 0.4946356416329943, "grad_norm": 0.023434224233865507, "learning_rate": 0.0001787892611684119, "loss": 1.1705, "step": 1775 }, { "epoch": 0.49491430960011146, "grad_norm": 0.02557059267254964, "learning_rate": 0.0001786460211599649, "loss": 1.1421, "step": 1776 }, { "epoch": 0.49519297756722863, "grad_norm": 0.022899308206743806, "learning_rate": 0.000178502754035374, "loss": 1.1113, "step": 1777 }, { "epoch": 0.4954716455343458, "grad_norm": 0.02606570562824978, "learning_rate": 0.00017835945993025496, "loss": 1.1405, "step": 1778 }, { "epoch": 0.495750313501463, "grad_norm": 0.02518713186283713, "learning_rate": 0.00017821613898024916, "loss": 1.1904, "step": 1779 }, { "epoch": 0.4960289814685802, "grad_norm": 0.025657479609674627, "learning_rate": 0.0001780727913210233, "loss": 1.1199, "step": 1780 }, { "epoch": 0.4963076494356974, "grad_norm": 0.024032990605059635, "learning_rate": 0.00017792941708826923, "loss": 1.1113, "step": 1781 }, { "epoch": 0.49658631740281456, "grad_norm": 0.022670564758132707, "learning_rate": 0.00017778601641770428, "loss": 1.1769, "step": 1782 }, { "epoch": 0.49686498536993173, "grad_norm": 0.026157538464098837, "learning_rate": 0.00017764258944507052, "loss": 1.1365, "step": 1783 }, { "epoch": 0.4971436533370489, "grad_norm": 0.02598529587270922, "learning_rate": 0.0001774991363061351, "loss": 1.192, "step": 1784 }, { "epoch": 0.4974223213041661, "grad_norm": 0.02890486854836874, "learning_rate": 0.00017735565713668976, "loss": 1.1911, "step": 1785 }, { "epoch": 0.49770098927128326, "grad_norm": 0.025285914164277896, "learning_rate": 0.00017721215207255105, "loss": 1.174, "step": 1786 }, { "epoch": 0.49797965723840043, "grad_norm": 0.024143239993669696, "learning_rate": 0.00017706862124956, "loss": 1.1723, "step": 1787 }, { "epoch": 0.4982583252055176, "grad_norm": 0.02431211889312795, "learning_rate": 0.00017692506480358186, "loss": 1.165, "step": 1788 }, { "epoch": 0.4985369931726348, "grad_norm": 0.025016138275136, "learning_rate": 0.0001767814828705064, "loss": 1.1379, "step": 1789 }, { "epoch": 0.498815661139752, "grad_norm": 0.024952315663374202, "learning_rate": 0.00017663787558624727, "loss": 1.1453, "step": 1790 }, { "epoch": 0.4990943291068692, "grad_norm": 0.025249085957547122, "learning_rate": 0.0001764942430867423, "loss": 1.1485, "step": 1791 }, { "epoch": 0.49937299707398636, "grad_norm": 0.02559346198553103, "learning_rate": 0.00017635058550795308, "loss": 1.2545, "step": 1792 }, { "epoch": 0.49965166504110353, "grad_norm": 0.023438346219043473, "learning_rate": 0.00017620690298586491, "loss": 1.1948, "step": 1793 }, { "epoch": 0.4999303330082207, "grad_norm": 0.024528610448889737, "learning_rate": 0.0001760631956564869, "loss": 1.1744, "step": 1794 }, { "epoch": 0.5002090009753379, "grad_norm": 0.024014828099792735, "learning_rate": 0.00017591946365585145, "loss": 1.1841, "step": 1795 }, { "epoch": 0.5004876689424551, "grad_norm": 0.02592951542578914, "learning_rate": 0.0001757757071200143, "loss": 1.1715, "step": 1796 }, { "epoch": 0.5007663369095723, "grad_norm": 0.024425482067620352, "learning_rate": 0.00017563192618505455, "loss": 1.1925, "step": 1797 }, { "epoch": 0.5010450048766895, "grad_norm": 0.022572500054721938, "learning_rate": 0.00017548812098707432, "loss": 1.1856, "step": 1798 }, { "epoch": 0.5013236728438066, "grad_norm": 0.02432617189822781, "learning_rate": 0.0001753442916621987, "loss": 1.1447, "step": 1799 }, { "epoch": 0.5016023408109238, "grad_norm": 0.02308166058736092, "learning_rate": 0.00017520043834657565, "loss": 1.1563, "step": 1800 }, { "epoch": 0.501881008778041, "grad_norm": 0.0235133862563712, "learning_rate": 0.00017505656117637583, "loss": 1.1623, "step": 1801 }, { "epoch": 0.5021596767451582, "grad_norm": 0.023737936880592475, "learning_rate": 0.00017491266028779237, "loss": 1.1318, "step": 1802 }, { "epoch": 0.5024383447122753, "grad_norm": 0.023970472897052876, "learning_rate": 0.00017476873581704104, "loss": 1.1419, "step": 1803 }, { "epoch": 0.5027170126793925, "grad_norm": 0.022815568526976922, "learning_rate": 0.00017462478790035982, "loss": 1.1625, "step": 1804 }, { "epoch": 0.5029956806465097, "grad_norm": 0.02439694394776628, "learning_rate": 0.00017448081667400888, "loss": 1.1085, "step": 1805 }, { "epoch": 0.5032743486136269, "grad_norm": 0.026432247438822176, "learning_rate": 0.00017433682227427054, "loss": 1.169, "step": 1806 }, { "epoch": 0.503553016580744, "grad_norm": 0.024863673659109964, "learning_rate": 0.00017419280483744893, "loss": 1.1823, "step": 1807 }, { "epoch": 0.5038316845478612, "grad_norm": 0.02491397897612491, "learning_rate": 0.00017404876449987006, "loss": 1.2283, "step": 1808 }, { "epoch": 0.5041103525149784, "grad_norm": 0.025390143062776583, "learning_rate": 0.00017390470139788165, "loss": 1.2743, "step": 1809 }, { "epoch": 0.5043890204820956, "grad_norm": 0.023111132458329915, "learning_rate": 0.0001737606156678529, "loss": 1.1322, "step": 1810 }, { "epoch": 0.5046676884492127, "grad_norm": 0.029096896529517125, "learning_rate": 0.00017361650744617444, "loss": 1.138, "step": 1811 }, { "epoch": 0.5049463564163299, "grad_norm": 0.024116079029949396, "learning_rate": 0.00017347237686925825, "loss": 1.1632, "step": 1812 }, { "epoch": 0.5052250243834471, "grad_norm": 0.02384624763041423, "learning_rate": 0.0001733282240735374, "loss": 1.1431, "step": 1813 }, { "epoch": 0.5055036923505642, "grad_norm": 0.023940688237840783, "learning_rate": 0.00017318404919546603, "loss": 1.1542, "step": 1814 }, { "epoch": 0.5057823603176815, "grad_norm": 0.026757152342794084, "learning_rate": 0.0001730398523715192, "loss": 1.1103, "step": 1815 }, { "epoch": 0.5060610282847987, "grad_norm": 0.023863782148435677, "learning_rate": 0.00017289563373819268, "loss": 1.1858, "step": 1816 }, { "epoch": 0.5063396962519159, "grad_norm": 0.023923162156453938, "learning_rate": 0.0001727513934320029, "loss": 1.1218, "step": 1817 }, { "epoch": 0.5066183642190331, "grad_norm": 0.025070759728936673, "learning_rate": 0.00017260713158948692, "loss": 1.2453, "step": 1818 }, { "epoch": 0.5068970321861502, "grad_norm": 0.026662800190617245, "learning_rate": 0.00017246284834720205, "loss": 1.1354, "step": 1819 }, { "epoch": 0.5071757001532674, "grad_norm": 0.023545661138986825, "learning_rate": 0.00017231854384172583, "loss": 1.1451, "step": 1820 }, { "epoch": 0.5074543681203846, "grad_norm": 0.025659177074887365, "learning_rate": 0.0001721742182096561, "loss": 1.1586, "step": 1821 }, { "epoch": 0.5077330360875018, "grad_norm": 0.02541832620884695, "learning_rate": 0.00017202987158761057, "loss": 1.1639, "step": 1822 }, { "epoch": 0.5080117040546189, "grad_norm": 0.02374589056560375, "learning_rate": 0.00017188550411222683, "loss": 1.0897, "step": 1823 }, { "epoch": 0.5082903720217361, "grad_norm": 0.02280668413710658, "learning_rate": 0.00017174111592016224, "loss": 1.1126, "step": 1824 }, { "epoch": 0.5085690399888533, "grad_norm": 0.025019554048878956, "learning_rate": 0.00017159670714809373, "loss": 1.1568, "step": 1825 }, { "epoch": 0.5088477079559705, "grad_norm": 0.02301409759692741, "learning_rate": 0.00017145227793271775, "loss": 1.1732, "step": 1826 }, { "epoch": 0.5091263759230876, "grad_norm": 0.025474102295794263, "learning_rate": 0.00017130782841075015, "loss": 1.1659, "step": 1827 }, { "epoch": 0.5094050438902048, "grad_norm": 0.02361473800929889, "learning_rate": 0.00017116335871892585, "loss": 1.1445, "step": 1828 }, { "epoch": 0.509683711857322, "grad_norm": 0.023115137714946783, "learning_rate": 0.00017101886899399896, "loss": 1.1735, "step": 1829 }, { "epoch": 0.5099623798244391, "grad_norm": 0.023221674681276082, "learning_rate": 0.00017087435937274264, "loss": 1.1359, "step": 1830 }, { "epoch": 0.5102410477915563, "grad_norm": 0.025115643731132384, "learning_rate": 0.00017072982999194868, "loss": 1.1413, "step": 1831 }, { "epoch": 0.5105197157586735, "grad_norm": 0.024108524472041832, "learning_rate": 0.00017058528098842774, "loss": 1.2001, "step": 1832 }, { "epoch": 0.5107983837257907, "grad_norm": 0.023175911475369838, "learning_rate": 0.000170440712499009, "loss": 1.1513, "step": 1833 }, { "epoch": 0.5110770516929078, "grad_norm": 0.021974634733929674, "learning_rate": 0.00017029612466054007, "loss": 1.1593, "step": 1834 }, { "epoch": 0.5113557196600251, "grad_norm": 0.024668666089750074, "learning_rate": 0.00017015151760988695, "loss": 1.2006, "step": 1835 }, { "epoch": 0.5116343876271423, "grad_norm": 0.023219537371875928, "learning_rate": 0.00017000689148393366, "loss": 1.1246, "step": 1836 }, { "epoch": 0.5119130555942595, "grad_norm": 0.024075955313052004, "learning_rate": 0.00016986224641958245, "loss": 1.1832, "step": 1837 }, { "epoch": 0.5121917235613767, "grad_norm": 0.023506396621104108, "learning_rate": 0.00016971758255375342, "loss": 1.1669, "step": 1838 }, { "epoch": 0.5124703915284938, "grad_norm": 0.023199163233280792, "learning_rate": 0.00016957290002338448, "loss": 1.1261, "step": 1839 }, { "epoch": 0.512749059495611, "grad_norm": 0.02370397489402682, "learning_rate": 0.00016942819896543124, "loss": 1.1548, "step": 1840 }, { "epoch": 0.5130277274627282, "grad_norm": 0.022185210573773206, "learning_rate": 0.0001692834795168667, "loss": 1.1408, "step": 1841 }, { "epoch": 0.5133063954298454, "grad_norm": 0.022597591867991026, "learning_rate": 0.0001691387418146815, "loss": 1.1399, "step": 1842 }, { "epoch": 0.5135850633969625, "grad_norm": 0.022791668118878426, "learning_rate": 0.00016899398599588342, "loss": 1.1258, "step": 1843 }, { "epoch": 0.5138637313640797, "grad_norm": 0.02266984656584699, "learning_rate": 0.00016884921219749736, "loss": 1.1895, "step": 1844 }, { "epoch": 0.5141423993311969, "grad_norm": 0.023012428614018407, "learning_rate": 0.00016870442055656528, "loss": 1.139, "step": 1845 }, { "epoch": 0.514421067298314, "grad_norm": 0.0228339340649838, "learning_rate": 0.00016855961121014617, "loss": 1.1061, "step": 1846 }, { "epoch": 0.5146997352654312, "grad_norm": 0.023830124736004238, "learning_rate": 0.00016841478429531546, "loss": 1.1041, "step": 1847 }, { "epoch": 0.5149784032325484, "grad_norm": 0.02668421093549402, "learning_rate": 0.00016826993994916554, "loss": 1.1645, "step": 1848 }, { "epoch": 0.5152570711996656, "grad_norm": 0.024389101429260363, "learning_rate": 0.00016812507830880509, "loss": 1.2154, "step": 1849 }, { "epoch": 0.5155357391667827, "grad_norm": 0.02384567728774905, "learning_rate": 0.0001679801995113593, "loss": 1.1189, "step": 1850 }, { "epoch": 0.5158144071338999, "grad_norm": 0.02674525395183201, "learning_rate": 0.00016783530369396944, "loss": 1.1426, "step": 1851 }, { "epoch": 0.5160930751010171, "grad_norm": 0.02407103739004482, "learning_rate": 0.00016769039099379302, "loss": 1.154, "step": 1852 }, { "epoch": 0.5163717430681343, "grad_norm": 0.024921500992866778, "learning_rate": 0.0001675454615480035, "loss": 1.1481, "step": 1853 }, { "epoch": 0.5166504110352514, "grad_norm": 0.024202705674359164, "learning_rate": 0.00016740051549379015, "loss": 1.2002, "step": 1854 }, { "epoch": 0.5169290790023687, "grad_norm": 0.02535645277691854, "learning_rate": 0.00016725555296835808, "loss": 1.1523, "step": 1855 }, { "epoch": 0.5172077469694859, "grad_norm": 0.02374523570618612, "learning_rate": 0.00016711057410892787, "loss": 1.1521, "step": 1856 }, { "epoch": 0.5174864149366031, "grad_norm": 0.02485445383452194, "learning_rate": 0.00016696557905273551, "loss": 1.1552, "step": 1857 }, { "epoch": 0.5177650829037203, "grad_norm": 0.025977084602861758, "learning_rate": 0.00016682056793703253, "loss": 1.1489, "step": 1858 }, { "epoch": 0.5180437508708374, "grad_norm": 0.024815599510868938, "learning_rate": 0.00016667554089908548, "loss": 1.1269, "step": 1859 }, { "epoch": 0.5183224188379546, "grad_norm": 0.022926744574874557, "learning_rate": 0.00016653049807617603, "loss": 1.1164, "step": 1860 }, { "epoch": 0.5186010868050718, "grad_norm": 0.026207586308354184, "learning_rate": 0.00016638543960560088, "loss": 1.1264, "step": 1861 }, { "epoch": 0.518879754772189, "grad_norm": 0.022644501655401085, "learning_rate": 0.0001662403656246714, "loss": 1.1161, "step": 1862 }, { "epoch": 0.5191584227393061, "grad_norm": 0.024109136521076473, "learning_rate": 0.00016609527627071372, "loss": 1.1579, "step": 1863 }, { "epoch": 0.5194370907064233, "grad_norm": 0.02309009683275374, "learning_rate": 0.00016595017168106853, "loss": 1.1468, "step": 1864 }, { "epoch": 0.5197157586735405, "grad_norm": 0.02534576740830142, "learning_rate": 0.00016580505199309095, "loss": 1.1439, "step": 1865 }, { "epoch": 0.5199944266406576, "grad_norm": 0.023325204422963063, "learning_rate": 0.00016565991734415026, "loss": 1.1713, "step": 1866 }, { "epoch": 0.5202730946077748, "grad_norm": 0.025309628426561752, "learning_rate": 0.00016551476787163015, "loss": 1.1741, "step": 1867 }, { "epoch": 0.520551762574892, "grad_norm": 0.023319537595517872, "learning_rate": 0.00016536960371292813, "loss": 1.1883, "step": 1868 }, { "epoch": 0.5208304305420092, "grad_norm": 0.025354185503818, "learning_rate": 0.00016522442500545564, "loss": 1.1828, "step": 1869 }, { "epoch": 0.5211090985091263, "grad_norm": 0.022907484524358864, "learning_rate": 0.000165079231886638, "loss": 1.145, "step": 1870 }, { "epoch": 0.5213877664762435, "grad_norm": 0.02195598472147525, "learning_rate": 0.0001649340244939141, "loss": 1.1907, "step": 1871 }, { "epoch": 0.5216664344433607, "grad_norm": 0.023426612564890974, "learning_rate": 0.00016478880296473628, "loss": 1.1893, "step": 1872 }, { "epoch": 0.5219451024104779, "grad_norm": 0.02405975022582663, "learning_rate": 0.00016464356743657038, "loss": 1.1529, "step": 1873 }, { "epoch": 0.522223770377595, "grad_norm": 0.023674796881500586, "learning_rate": 0.0001644983180468955, "loss": 1.1387, "step": 1874 }, { "epoch": 0.5225024383447123, "grad_norm": 0.022898980424742185, "learning_rate": 0.0001643530549332037, "loss": 1.1792, "step": 1875 }, { "epoch": 0.5227811063118295, "grad_norm": 0.02311353984260154, "learning_rate": 0.00016420777823300013, "loss": 1.1463, "step": 1876 }, { "epoch": 0.5230597742789467, "grad_norm": 0.024537592792811112, "learning_rate": 0.00016406248808380287, "loss": 1.2018, "step": 1877 }, { "epoch": 0.5233384422460639, "grad_norm": 0.026840182362888258, "learning_rate": 0.00016391718462314264, "loss": 1.1968, "step": 1878 }, { "epoch": 0.523617110213181, "grad_norm": 0.024007775935981576, "learning_rate": 0.0001637718679885628, "loss": 1.1584, "step": 1879 }, { "epoch": 0.5238957781802982, "grad_norm": 0.02367511678595606, "learning_rate": 0.0001636265383176191, "loss": 1.1527, "step": 1880 }, { "epoch": 0.5241744461474154, "grad_norm": 0.02465321363657391, "learning_rate": 0.00016348119574787975, "loss": 1.106, "step": 1881 }, { "epoch": 0.5244531141145325, "grad_norm": 0.02322266282317088, "learning_rate": 0.0001633358404169251, "loss": 1.1648, "step": 1882 }, { "epoch": 0.5247317820816497, "grad_norm": 0.023453502069420623, "learning_rate": 0.0001631904724623476, "loss": 1.1017, "step": 1883 }, { "epoch": 0.5250104500487669, "grad_norm": 0.02459287118203962, "learning_rate": 0.00016304509202175164, "loss": 1.0888, "step": 1884 }, { "epoch": 0.5252891180158841, "grad_norm": 0.023962981808889238, "learning_rate": 0.00016289969923275344, "loss": 1.1413, "step": 1885 }, { "epoch": 0.5255677859830012, "grad_norm": 0.023676642553625966, "learning_rate": 0.00016275429423298092, "loss": 1.1626, "step": 1886 }, { "epoch": 0.5258464539501184, "grad_norm": 0.024299943440213998, "learning_rate": 0.00016260887716007348, "loss": 1.1421, "step": 1887 }, { "epoch": 0.5261251219172356, "grad_norm": 0.022135945088226087, "learning_rate": 0.00016246344815168206, "loss": 1.1552, "step": 1888 }, { "epoch": 0.5264037898843528, "grad_norm": 0.02334080498656536, "learning_rate": 0.00016231800734546885, "loss": 1.1952, "step": 1889 }, { "epoch": 0.5266824578514699, "grad_norm": 0.02363663788137995, "learning_rate": 0.00016217255487910713, "loss": 1.1428, "step": 1890 }, { "epoch": 0.5269611258185871, "grad_norm": 0.02620151539264208, "learning_rate": 0.00016202709089028138, "loss": 1.1723, "step": 1891 }, { "epoch": 0.5272397937857043, "grad_norm": 0.024724540542306862, "learning_rate": 0.00016188161551668686, "loss": 1.1356, "step": 1892 }, { "epoch": 0.5275184617528215, "grad_norm": 0.024065372823930613, "learning_rate": 0.00016173612889602964, "loss": 1.1098, "step": 1893 }, { "epoch": 0.5277971297199386, "grad_norm": 0.025188707501299742, "learning_rate": 0.00016159063116602648, "loss": 1.113, "step": 1894 }, { "epoch": 0.5280757976870559, "grad_norm": 0.023233349701176312, "learning_rate": 0.00016144512246440458, "loss": 1.1577, "step": 1895 }, { "epoch": 0.5283544656541731, "grad_norm": 0.02252376023773309, "learning_rate": 0.00016129960292890158, "loss": 1.1041, "step": 1896 }, { "epoch": 0.5286331336212903, "grad_norm": 0.02303050502043964, "learning_rate": 0.00016115407269726536, "loss": 1.1244, "step": 1897 }, { "epoch": 0.5289118015884074, "grad_norm": 0.02683739618381062, "learning_rate": 0.0001610085319072539, "loss": 1.1346, "step": 1898 }, { "epoch": 0.5291904695555246, "grad_norm": 0.024381080236659153, "learning_rate": 0.0001608629806966352, "loss": 1.1696, "step": 1899 }, { "epoch": 0.5294691375226418, "grad_norm": 0.025993417124853757, "learning_rate": 0.0001607174192031872, "loss": 1.1621, "step": 1900 }, { "epoch": 0.529747805489759, "grad_norm": 0.023207482644005503, "learning_rate": 0.00016057184756469739, "loss": 1.1769, "step": 1901 }, { "epoch": 0.5300264734568761, "grad_norm": 0.02248196751536383, "learning_rate": 0.00016042626591896304, "loss": 1.1721, "step": 1902 }, { "epoch": 0.5303051414239933, "grad_norm": 0.02641877156023248, "learning_rate": 0.00016028067440379076, "loss": 1.1482, "step": 1903 }, { "epoch": 0.5305838093911105, "grad_norm": 0.02641015744557696, "learning_rate": 0.00016013507315699665, "loss": 1.1389, "step": 1904 }, { "epoch": 0.5308624773582277, "grad_norm": 0.02516112743721901, "learning_rate": 0.00015998946231640584, "loss": 1.1053, "step": 1905 }, { "epoch": 0.5311411453253448, "grad_norm": 0.02331222996070048, "learning_rate": 0.00015984384201985272, "loss": 1.1775, "step": 1906 }, { "epoch": 0.531419813292462, "grad_norm": 0.023631516189438602, "learning_rate": 0.00015969821240518055, "loss": 1.1471, "step": 1907 }, { "epoch": 0.5316984812595792, "grad_norm": 0.026282753894319895, "learning_rate": 0.00015955257361024132, "loss": 1.145, "step": 1908 }, { "epoch": 0.5319771492266964, "grad_norm": 0.025241317438279925, "learning_rate": 0.00015940692577289586, "loss": 1.1606, "step": 1909 }, { "epoch": 0.5322558171938135, "grad_norm": 0.02451282961071583, "learning_rate": 0.0001592612690310135, "loss": 1.1877, "step": 1910 }, { "epoch": 0.5325344851609307, "grad_norm": 0.024089423293278824, "learning_rate": 0.00015911560352247203, "loss": 1.1683, "step": 1911 }, { "epoch": 0.5328131531280479, "grad_norm": 0.025673521695218294, "learning_rate": 0.0001589699293851574, "loss": 1.1473, "step": 1912 }, { "epoch": 0.5330918210951651, "grad_norm": 0.0245796563252376, "learning_rate": 0.00015882424675696397, "loss": 1.1388, "step": 1913 }, { "epoch": 0.5333704890622822, "grad_norm": 0.02467363837593647, "learning_rate": 0.0001586785557757939, "loss": 1.1382, "step": 1914 }, { "epoch": 0.5336491570293995, "grad_norm": 0.024800557363038635, "learning_rate": 0.00015853285657955739, "loss": 1.1674, "step": 1915 }, { "epoch": 0.5339278249965167, "grad_norm": 0.02457206205921836, "learning_rate": 0.00015838714930617237, "loss": 1.237, "step": 1916 }, { "epoch": 0.5342064929636339, "grad_norm": 0.02305075112981261, "learning_rate": 0.00015824143409356447, "loss": 1.1414, "step": 1917 }, { "epoch": 0.534485160930751, "grad_norm": 0.0231089877894376, "learning_rate": 0.00015809571107966677, "loss": 1.1161, "step": 1918 }, { "epoch": 0.5347638288978682, "grad_norm": 0.02309339248777379, "learning_rate": 0.00015794998040241979, "loss": 1.1854, "step": 1919 }, { "epoch": 0.5350424968649854, "grad_norm": 0.0233228647714231, "learning_rate": 0.00015780424219977122, "loss": 1.1301, "step": 1920 }, { "epoch": 0.5353211648321026, "grad_norm": 0.024352817624688057, "learning_rate": 0.00015765849660967594, "loss": 1.1292, "step": 1921 }, { "epoch": 0.5355998327992197, "grad_norm": 0.025321247234847688, "learning_rate": 0.00015751274377009584, "loss": 1.1692, "step": 1922 }, { "epoch": 0.5358785007663369, "grad_norm": 0.023117237419759386, "learning_rate": 0.00015736698381899964, "loss": 1.1525, "step": 1923 }, { "epoch": 0.5361571687334541, "grad_norm": 0.028628346093362345, "learning_rate": 0.0001572212168943628, "loss": 1.2659, "step": 1924 }, { "epoch": 0.5364358367005713, "grad_norm": 0.02584086989410823, "learning_rate": 0.00015707544313416736, "loss": 1.1665, "step": 1925 }, { "epoch": 0.5367145046676884, "grad_norm": 0.02693576818880191, "learning_rate": 0.0001569296626764018, "loss": 1.1127, "step": 1926 }, { "epoch": 0.5369931726348056, "grad_norm": 0.02411493066287168, "learning_rate": 0.0001567838756590611, "loss": 1.0745, "step": 1927 }, { "epoch": 0.5372718406019228, "grad_norm": 0.026171639246334477, "learning_rate": 0.00015663808222014623, "loss": 1.1658, "step": 1928 }, { "epoch": 0.53755050856904, "grad_norm": 0.02336315077517536, "learning_rate": 0.0001564922824976644, "loss": 1.1856, "step": 1929 }, { "epoch": 0.5378291765361571, "grad_norm": 0.025021524695063063, "learning_rate": 0.0001563464766296287, "loss": 1.1655, "step": 1930 }, { "epoch": 0.5381078445032743, "grad_norm": 0.02603277233439057, "learning_rate": 0.00015620066475405804, "loss": 1.1694, "step": 1931 }, { "epoch": 0.5383865124703915, "grad_norm": 0.02430422891075571, "learning_rate": 0.00015605484700897704, "loss": 1.1561, "step": 1932 }, { "epoch": 0.5386651804375087, "grad_norm": 0.02361203175280372, "learning_rate": 0.00015590902353241584, "loss": 1.1025, "step": 1933 }, { "epoch": 0.5389438484046258, "grad_norm": 0.02437244871128802, "learning_rate": 0.00015576319446241008, "loss": 1.1269, "step": 1934 }, { "epoch": 0.5392225163717431, "grad_norm": 0.02596959914528882, "learning_rate": 0.00015561735993700063, "loss": 1.1858, "step": 1935 }, { "epoch": 0.5395011843388603, "grad_norm": 0.02313136502744154, "learning_rate": 0.00015547152009423343, "loss": 1.1816, "step": 1936 }, { "epoch": 0.5397798523059775, "grad_norm": 0.02421874720048326, "learning_rate": 0.0001553256750721597, "loss": 1.1543, "step": 1937 }, { "epoch": 0.5400585202730946, "grad_norm": 0.02511933267142888, "learning_rate": 0.00015517982500883536, "loss": 1.1885, "step": 1938 }, { "epoch": 0.5403371882402118, "grad_norm": 0.02401699164716795, "learning_rate": 0.00015503397004232116, "loss": 1.2172, "step": 1939 }, { "epoch": 0.540615856207329, "grad_norm": 0.024232839845880427, "learning_rate": 0.0001548881103106825, "loss": 1.1039, "step": 1940 }, { "epoch": 0.5408945241744462, "grad_norm": 0.0231836116976465, "learning_rate": 0.00015474224595198933, "loss": 1.1628, "step": 1941 }, { "epoch": 0.5411731921415633, "grad_norm": 0.022708712108271252, "learning_rate": 0.00015459637710431583, "loss": 1.1689, "step": 1942 }, { "epoch": 0.5414518601086805, "grad_norm": 0.02339326063204592, "learning_rate": 0.00015445050390574066, "loss": 1.1382, "step": 1943 }, { "epoch": 0.5417305280757977, "grad_norm": 0.022797973779775712, "learning_rate": 0.00015430462649434644, "loss": 1.1103, "step": 1944 }, { "epoch": 0.5420091960429149, "grad_norm": 0.023898739325758315, "learning_rate": 0.00015415874500821972, "loss": 1.1338, "step": 1945 }, { "epoch": 0.542287864010032, "grad_norm": 0.023814536860968023, "learning_rate": 0.00015401285958545113, "loss": 1.1396, "step": 1946 }, { "epoch": 0.5425665319771492, "grad_norm": 0.026110188054705014, "learning_rate": 0.00015386697036413484, "loss": 1.1604, "step": 1947 }, { "epoch": 0.5428451999442664, "grad_norm": 0.02404607043500464, "learning_rate": 0.00015372107748236864, "loss": 1.1706, "step": 1948 }, { "epoch": 0.5431238679113836, "grad_norm": 0.023401467330312734, "learning_rate": 0.00015357518107825388, "loss": 1.0898, "step": 1949 }, { "epoch": 0.5434025358785007, "grad_norm": 0.025251706232968626, "learning_rate": 0.00015342928128989514, "loss": 1.1537, "step": 1950 }, { "epoch": 0.5436812038456179, "grad_norm": 0.023777870970899343, "learning_rate": 0.00015328337825540022, "loss": 1.1513, "step": 1951 }, { "epoch": 0.5439598718127351, "grad_norm": 0.02416596111004471, "learning_rate": 0.0001531374721128801, "loss": 1.1472, "step": 1952 }, { "epoch": 0.5442385397798523, "grad_norm": 0.02483243012441013, "learning_rate": 0.00015299156300044853, "loss": 1.1912, "step": 1953 }, { "epoch": 0.5445172077469694, "grad_norm": 0.024895544607675343, "learning_rate": 0.00015284565105622218, "loss": 1.2015, "step": 1954 }, { "epoch": 0.5447958757140867, "grad_norm": 0.026244093022188567, "learning_rate": 0.0001526997364183204, "loss": 1.1912, "step": 1955 }, { "epoch": 0.5450745436812039, "grad_norm": 0.022719426356797235, "learning_rate": 0.0001525538192248651, "loss": 1.2165, "step": 1956 }, { "epoch": 0.5453532116483211, "grad_norm": 0.02492833435859944, "learning_rate": 0.0001524078996139805, "loss": 1.1261, "step": 1957 }, { "epoch": 0.5456318796154382, "grad_norm": 0.023577964399250584, "learning_rate": 0.0001522619777237933, "loss": 1.1344, "step": 1958 }, { "epoch": 0.5459105475825554, "grad_norm": 0.02747173681431677, "learning_rate": 0.00015211605369243217, "loss": 1.1139, "step": 1959 }, { "epoch": 0.5461892155496726, "grad_norm": 0.02380272839989376, "learning_rate": 0.0001519701276580278, "loss": 1.1751, "step": 1960 }, { "epoch": 0.5464678835167898, "grad_norm": 0.024317378833747374, "learning_rate": 0.00015182419975871305, "loss": 1.1285, "step": 1961 }, { "epoch": 0.5467465514839069, "grad_norm": 0.025785717206885786, "learning_rate": 0.00015167827013262225, "loss": 1.1785, "step": 1962 }, { "epoch": 0.5470252194510241, "grad_norm": 0.024099717364357568, "learning_rate": 0.0001515323389178914, "loss": 1.1423, "step": 1963 }, { "epoch": 0.5473038874181413, "grad_norm": 0.02568491437587146, "learning_rate": 0.0001513864062526582, "loss": 1.1623, "step": 1964 }, { "epoch": 0.5475825553852585, "grad_norm": 0.022869871435069175, "learning_rate": 0.00015124047227506158, "loss": 1.1699, "step": 1965 }, { "epoch": 0.5478612233523756, "grad_norm": 0.023723345242457772, "learning_rate": 0.00015109453712324164, "loss": 1.1731, "step": 1966 }, { "epoch": 0.5481398913194928, "grad_norm": 0.02455695455689923, "learning_rate": 0.00015094860093533972, "loss": 1.1549, "step": 1967 }, { "epoch": 0.54841855928661, "grad_norm": 0.024735282130571003, "learning_rate": 0.00015080266384949814, "loss": 1.1595, "step": 1968 }, { "epoch": 0.5486972272537272, "grad_norm": 0.02265207334121425, "learning_rate": 0.00015065672600385996, "loss": 1.1562, "step": 1969 }, { "epoch": 0.5489758952208443, "grad_norm": 0.02438593650110052, "learning_rate": 0.0001505107875365691, "loss": 1.1707, "step": 1970 }, { "epoch": 0.5492545631879615, "grad_norm": 0.023151604263621565, "learning_rate": 0.00015036484858576996, "loss": 1.1485, "step": 1971 }, { "epoch": 0.5495332311550787, "grad_norm": 0.024314836610632554, "learning_rate": 0.0001502189092896074, "loss": 1.1784, "step": 1972 }, { "epoch": 0.5498118991221959, "grad_norm": 0.02625959956579492, "learning_rate": 0.00015007296978622673, "loss": 1.21, "step": 1973 }, { "epoch": 0.550090567089313, "grad_norm": 0.025325189355310664, "learning_rate": 0.00014992703021377327, "loss": 1.1389, "step": 1974 }, { "epoch": 0.5503692350564303, "grad_norm": 0.02291412379781951, "learning_rate": 0.00014978109071039257, "loss": 1.1625, "step": 1975 }, { "epoch": 0.5506479030235475, "grad_norm": 0.024169439271879957, "learning_rate": 0.00014963515141423007, "loss": 1.1003, "step": 1976 }, { "epoch": 0.5509265709906647, "grad_norm": 0.023440115963987786, "learning_rate": 0.00014948921246343087, "loss": 1.1323, "step": 1977 }, { "epoch": 0.5512052389577818, "grad_norm": 0.02284320100507274, "learning_rate": 0.00014934327399614, "loss": 1.1284, "step": 1978 }, { "epoch": 0.551483906924899, "grad_norm": 0.026112662294686704, "learning_rate": 0.00014919733615050183, "loss": 1.1158, "step": 1979 }, { "epoch": 0.5517625748920162, "grad_norm": 0.025766268733666695, "learning_rate": 0.00014905139906466026, "loss": 1.1862, "step": 1980 }, { "epoch": 0.5520412428591334, "grad_norm": 0.02534141901610206, "learning_rate": 0.00014890546287675836, "loss": 1.1935, "step": 1981 }, { "epoch": 0.5523199108262505, "grad_norm": 0.023982876515593658, "learning_rate": 0.00014875952772493845, "loss": 1.1734, "step": 1982 }, { "epoch": 0.5525985787933677, "grad_norm": 0.024693882262160245, "learning_rate": 0.0001486135937473417, "loss": 1.1737, "step": 1983 }, { "epoch": 0.5528772467604849, "grad_norm": 0.023238578193065546, "learning_rate": 0.00014846766108210854, "loss": 1.1688, "step": 1984 }, { "epoch": 0.5531559147276021, "grad_norm": 0.024297747041670495, "learning_rate": 0.00014832172986737775, "loss": 1.1602, "step": 1985 }, { "epoch": 0.5534345826947192, "grad_norm": 0.024187789394374704, "learning_rate": 0.00014817580024128693, "loss": 1.1658, "step": 1986 }, { "epoch": 0.5537132506618364, "grad_norm": 0.024960186295182818, "learning_rate": 0.00014802987234197217, "loss": 1.1377, "step": 1987 }, { "epoch": 0.5539919186289536, "grad_norm": 0.024040342863484766, "learning_rate": 0.0001478839463075679, "loss": 1.1534, "step": 1988 }, { "epoch": 0.5542705865960708, "grad_norm": 0.02322482634919462, "learning_rate": 0.00014773802227620666, "loss": 1.1675, "step": 1989 }, { "epoch": 0.5545492545631879, "grad_norm": 0.023903201861759475, "learning_rate": 0.00014759210038601945, "loss": 1.1429, "step": 1990 }, { "epoch": 0.5548279225303051, "grad_norm": 0.02702245936706675, "learning_rate": 0.00014744618077513488, "loss": 1.2182, "step": 1991 }, { "epoch": 0.5551065904974223, "grad_norm": 0.023442055342658263, "learning_rate": 0.0001473002635816796, "loss": 1.1636, "step": 1992 }, { "epoch": 0.5553852584645395, "grad_norm": 0.025146669592367197, "learning_rate": 0.00014715434894377782, "loss": 1.1305, "step": 1993 }, { "epoch": 0.5556639264316566, "grad_norm": 0.024151747072993158, "learning_rate": 0.00014700843699955152, "loss": 1.1691, "step": 1994 }, { "epoch": 0.5559425943987739, "grad_norm": 0.023638373347387456, "learning_rate": 0.00014686252788711989, "loss": 1.1888, "step": 1995 }, { "epoch": 0.5562212623658911, "grad_norm": 0.02344271495015451, "learning_rate": 0.00014671662174459976, "loss": 1.1522, "step": 1996 }, { "epoch": 0.5564999303330083, "grad_norm": 0.02510037445051649, "learning_rate": 0.00014657071871010483, "loss": 1.1407, "step": 1997 }, { "epoch": 0.5567785983001254, "grad_norm": 0.023725433737178923, "learning_rate": 0.0001464248189217461, "loss": 1.1269, "step": 1998 }, { "epoch": 0.5570572662672426, "grad_norm": 0.023339753425336493, "learning_rate": 0.00014627892251763133, "loss": 1.1102, "step": 1999 }, { "epoch": 0.5573359342343598, "grad_norm": 0.02217427837993501, "learning_rate": 0.00014613302963586516, "loss": 1.0622, "step": 2000 }, { "epoch": 0.557614602201477, "grad_norm": 0.02842942661864808, "learning_rate": 0.00014598714041454879, "loss": 1.1361, "step": 2001 }, { "epoch": 0.5578932701685941, "grad_norm": 0.026619070807369444, "learning_rate": 0.0001458412549917802, "loss": 1.1225, "step": 2002 }, { "epoch": 0.5581719381357113, "grad_norm": 0.023858559757661192, "learning_rate": 0.00014569537350565356, "loss": 1.1367, "step": 2003 }, { "epoch": 0.5584506061028285, "grad_norm": 0.022403995031804538, "learning_rate": 0.0001455494960942593, "loss": 1.0818, "step": 2004 }, { "epoch": 0.5587292740699457, "grad_norm": 0.023277361432448154, "learning_rate": 0.00014540362289568415, "loss": 1.1815, "step": 2005 }, { "epoch": 0.5590079420370628, "grad_norm": 0.024093655136769893, "learning_rate": 0.0001452577540480107, "loss": 1.1386, "step": 2006 }, { "epoch": 0.55928661000418, "grad_norm": 0.0240948562234233, "learning_rate": 0.0001451118896893175, "loss": 1.121, "step": 2007 }, { "epoch": 0.5595652779712972, "grad_norm": 0.022491163317258145, "learning_rate": 0.00014496602995767881, "loss": 1.1163, "step": 2008 }, { "epoch": 0.5598439459384144, "grad_norm": 0.025597721377535753, "learning_rate": 0.00014482017499116464, "loss": 1.176, "step": 2009 }, { "epoch": 0.5601226139055315, "grad_norm": 0.022955344548350416, "learning_rate": 0.0001446743249278403, "loss": 1.1477, "step": 2010 }, { "epoch": 0.5604012818726487, "grad_norm": 0.024700170770499977, "learning_rate": 0.00014452847990576657, "loss": 1.1939, "step": 2011 }, { "epoch": 0.5606799498397659, "grad_norm": 0.02243423806682218, "learning_rate": 0.00014438264006299943, "loss": 1.2203, "step": 2012 }, { "epoch": 0.560958617806883, "grad_norm": 0.024261994925777963, "learning_rate": 0.0001442368055375899, "loss": 1.14, "step": 2013 }, { "epoch": 0.5612372857740002, "grad_norm": 0.024835091072596807, "learning_rate": 0.0001440909764675841, "loss": 1.1551, "step": 2014 }, { "epoch": 0.5615159537411175, "grad_norm": 0.023210186065986866, "learning_rate": 0.00014394515299102293, "loss": 1.1418, "step": 2015 }, { "epoch": 0.5617946217082347, "grad_norm": 0.026793706073546756, "learning_rate": 0.00014379933524594197, "loss": 1.157, "step": 2016 }, { "epoch": 0.5620732896753519, "grad_norm": 0.024632548941317157, "learning_rate": 0.0001436535233703713, "loss": 1.1454, "step": 2017 }, { "epoch": 0.562351957642469, "grad_norm": 0.023737038170426692, "learning_rate": 0.0001435077175023356, "loss": 1.1938, "step": 2018 }, { "epoch": 0.5626306256095862, "grad_norm": 0.02447114803748899, "learning_rate": 0.00014336191777985377, "loss": 1.2124, "step": 2019 }, { "epoch": 0.5629092935767034, "grad_norm": 0.023927899085419483, "learning_rate": 0.00014321612434093888, "loss": 1.2196, "step": 2020 }, { "epoch": 0.5631879615438206, "grad_norm": 0.026234590280885317, "learning_rate": 0.00014307033732359817, "loss": 1.1776, "step": 2021 }, { "epoch": 0.5634666295109377, "grad_norm": 0.026412689244626877, "learning_rate": 0.00014292455686583264, "loss": 1.1598, "step": 2022 }, { "epoch": 0.5637452974780549, "grad_norm": 0.023674108019684195, "learning_rate": 0.0001427787831056372, "loss": 1.1485, "step": 2023 }, { "epoch": 0.5640239654451721, "grad_norm": 0.023098415992163227, "learning_rate": 0.00014263301618100036, "loss": 1.1345, "step": 2024 }, { "epoch": 0.5643026334122893, "grad_norm": 0.02628801191490978, "learning_rate": 0.00014248725622990416, "loss": 1.1941, "step": 2025 }, { "epoch": 0.5645813013794064, "grad_norm": 0.023017971767117725, "learning_rate": 0.00014234150339032404, "loss": 1.136, "step": 2026 }, { "epoch": 0.5648599693465236, "grad_norm": 0.024498181829313383, "learning_rate": 0.00014219575780022878, "loss": 1.1407, "step": 2027 }, { "epoch": 0.5651386373136408, "grad_norm": 0.024360873488150466, "learning_rate": 0.00014205001959758022, "loss": 1.1241, "step": 2028 }, { "epoch": 0.565417305280758, "grad_norm": 0.028777307717658563, "learning_rate": 0.00014190428892033323, "loss": 1.1695, "step": 2029 }, { "epoch": 0.5656959732478751, "grad_norm": 0.0246241589517885, "learning_rate": 0.00014175856590643556, "loss": 1.1368, "step": 2030 }, { "epoch": 0.5659746412149923, "grad_norm": 0.02963326550156894, "learning_rate": 0.00014161285069382766, "loss": 1.1668, "step": 2031 }, { "epoch": 0.5662533091821095, "grad_norm": 0.02593865828854836, "learning_rate": 0.0001414671434204426, "loss": 1.0725, "step": 2032 }, { "epoch": 0.5665319771492267, "grad_norm": 0.02589193998542985, "learning_rate": 0.0001413214442242061, "loss": 1.1486, "step": 2033 }, { "epoch": 0.5668106451163438, "grad_norm": 0.02506578066298172, "learning_rate": 0.00014117575324303604, "loss": 1.1755, "step": 2034 }, { "epoch": 0.5670893130834611, "grad_norm": 0.02977341793806403, "learning_rate": 0.00014103007061484258, "loss": 1.1493, "step": 2035 }, { "epoch": 0.5673679810505783, "grad_norm": 0.02526125135494907, "learning_rate": 0.000140884396477528, "loss": 1.2204, "step": 2036 }, { "epoch": 0.5676466490176955, "grad_norm": 0.027590203073304252, "learning_rate": 0.0001407387309689865, "loss": 1.1582, "step": 2037 }, { "epoch": 0.5679253169848126, "grad_norm": 0.026793486273860696, "learning_rate": 0.00014059307422710412, "loss": 1.1516, "step": 2038 }, { "epoch": 0.5682039849519298, "grad_norm": 0.025978942857631544, "learning_rate": 0.00014044742638975866, "loss": 1.1135, "step": 2039 }, { "epoch": 0.568482652919047, "grad_norm": 0.02693451048780322, "learning_rate": 0.00014030178759481945, "loss": 1.232, "step": 2040 }, { "epoch": 0.5687613208861642, "grad_norm": 0.024365943317937108, "learning_rate": 0.00014015615798014725, "loss": 1.1078, "step": 2041 }, { "epoch": 0.5690399888532813, "grad_norm": 0.02628011892597662, "learning_rate": 0.00014001053768359416, "loss": 1.1577, "step": 2042 }, { "epoch": 0.5693186568203985, "grad_norm": 0.023026697917854645, "learning_rate": 0.00013986492684300338, "loss": 1.1726, "step": 2043 }, { "epoch": 0.5695973247875157, "grad_norm": 0.023802967587965747, "learning_rate": 0.00013971932559620918, "loss": 1.1117, "step": 2044 }, { "epoch": 0.5698759927546329, "grad_norm": 0.022923874905683487, "learning_rate": 0.00013957373408103694, "loss": 1.1472, "step": 2045 }, { "epoch": 0.57015466072175, "grad_norm": 0.026072114312269867, "learning_rate": 0.0001394281524353026, "loss": 1.1871, "step": 2046 }, { "epoch": 0.5704333286888672, "grad_norm": 0.022610890741474855, "learning_rate": 0.0001392825807968128, "loss": 1.0682, "step": 2047 }, { "epoch": 0.5707119966559844, "grad_norm": 0.025520647428503854, "learning_rate": 0.0001391370193033648, "loss": 1.159, "step": 2048 }, { "epoch": 0.5709906646231016, "grad_norm": 0.025821292417474363, "learning_rate": 0.00013899146809274613, "loss": 1.1701, "step": 2049 }, { "epoch": 0.5712693325902187, "grad_norm": 0.022654740360132826, "learning_rate": 0.00013884592730273462, "loss": 1.0887, "step": 2050 }, { "epoch": 0.5715480005573359, "grad_norm": 0.0234980282809617, "learning_rate": 0.0001387003970710984, "loss": 1.153, "step": 2051 }, { "epoch": 0.5718266685244531, "grad_norm": 0.022761858428597133, "learning_rate": 0.0001385548775355954, "loss": 1.1515, "step": 2052 }, { "epoch": 0.5721053364915702, "grad_norm": 0.024328716408898922, "learning_rate": 0.00013840936883397352, "loss": 1.1399, "step": 2053 }, { "epoch": 0.5723840044586874, "grad_norm": 0.023737914362504334, "learning_rate": 0.00013826387110397033, "loss": 1.0735, "step": 2054 }, { "epoch": 0.5726626724258047, "grad_norm": 0.026464423662651684, "learning_rate": 0.00013811838448331314, "loss": 1.1701, "step": 2055 }, { "epoch": 0.5729413403929219, "grad_norm": 0.024633078455254497, "learning_rate": 0.0001379729091097186, "loss": 1.1621, "step": 2056 }, { "epoch": 0.5732200083600391, "grad_norm": 0.024852967816228606, "learning_rate": 0.00013782744512089284, "loss": 1.11, "step": 2057 }, { "epoch": 0.5734986763271562, "grad_norm": 0.025311239046146055, "learning_rate": 0.00013768199265453115, "loss": 1.1527, "step": 2058 }, { "epoch": 0.5737773442942734, "grad_norm": 0.024344181326609402, "learning_rate": 0.00013753655184831792, "loss": 1.1764, "step": 2059 }, { "epoch": 0.5740560122613906, "grad_norm": 0.025689929181804256, "learning_rate": 0.00013739112283992652, "loss": 1.1665, "step": 2060 }, { "epoch": 0.5743346802285078, "grad_norm": 0.023619936212580264, "learning_rate": 0.00013724570576701908, "loss": 1.1522, "step": 2061 }, { "epoch": 0.5746133481956249, "grad_norm": 0.026297769970312117, "learning_rate": 0.0001371003007672465, "loss": 1.1632, "step": 2062 }, { "epoch": 0.5748920161627421, "grad_norm": 0.025225271445996057, "learning_rate": 0.0001369549079782483, "loss": 1.1907, "step": 2063 }, { "epoch": 0.5751706841298593, "grad_norm": 0.025090292439281736, "learning_rate": 0.00013680952753765237, "loss": 1.1491, "step": 2064 }, { "epoch": 0.5754493520969765, "grad_norm": 0.022533826624692388, "learning_rate": 0.00013666415958307488, "loss": 1.1596, "step": 2065 }, { "epoch": 0.5757280200640936, "grad_norm": 0.02686324916325988, "learning_rate": 0.00013651880425212022, "loss": 1.1317, "step": 2066 }, { "epoch": 0.5760066880312108, "grad_norm": 0.023533415736697705, "learning_rate": 0.0001363734616823809, "loss": 1.1437, "step": 2067 }, { "epoch": 0.576285355998328, "grad_norm": 0.025795090209292344, "learning_rate": 0.00013622813201143722, "loss": 1.1519, "step": 2068 }, { "epoch": 0.5765640239654451, "grad_norm": 0.027059443415587282, "learning_rate": 0.00013608281537685736, "loss": 1.1833, "step": 2069 }, { "epoch": 0.5768426919325623, "grad_norm": 0.02330186532368797, "learning_rate": 0.00013593751191619713, "loss": 1.1718, "step": 2070 }, { "epoch": 0.5771213598996795, "grad_norm": 0.022939322544480188, "learning_rate": 0.00013579222176699984, "loss": 1.1566, "step": 2071 }, { "epoch": 0.5774000278667967, "grad_norm": 0.023691200108298813, "learning_rate": 0.00013564694506679634, "loss": 1.2204, "step": 2072 }, { "epoch": 0.5776786958339138, "grad_norm": 0.022493129219513634, "learning_rate": 0.00013550168195310453, "loss": 1.2029, "step": 2073 }, { "epoch": 0.577957363801031, "grad_norm": 0.025228183487298092, "learning_rate": 0.00013535643256342962, "loss": 1.2024, "step": 2074 }, { "epoch": 0.5782360317681483, "grad_norm": 0.024522593960065064, "learning_rate": 0.0001352111970352637, "loss": 1.1295, "step": 2075 }, { "epoch": 0.5785146997352655, "grad_norm": 0.023489253394216585, "learning_rate": 0.0001350659755060859, "loss": 1.1424, "step": 2076 }, { "epoch": 0.5787933677023827, "grad_norm": 0.02263641141303275, "learning_rate": 0.000134920768113362, "loss": 1.1403, "step": 2077 }, { "epoch": 0.5790720356694998, "grad_norm": 0.022697091183642253, "learning_rate": 0.00013477557499454437, "loss": 1.0915, "step": 2078 }, { "epoch": 0.579350703636617, "grad_norm": 0.025986656283625148, "learning_rate": 0.0001346303962870719, "loss": 1.1631, "step": 2079 }, { "epoch": 0.5796293716037342, "grad_norm": 0.022560476635575574, "learning_rate": 0.00013448523212836988, "loss": 1.0725, "step": 2080 }, { "epoch": 0.5799080395708514, "grad_norm": 0.022561313216733676, "learning_rate": 0.00013434008265584969, "loss": 1.1962, "step": 2081 }, { "epoch": 0.5801867075379685, "grad_norm": 0.024491702807388858, "learning_rate": 0.00013419494800690905, "loss": 1.1142, "step": 2082 }, { "epoch": 0.5804653755050857, "grad_norm": 0.022327430162535564, "learning_rate": 0.00013404982831893145, "loss": 1.128, "step": 2083 }, { "epoch": 0.5807440434722029, "grad_norm": 0.023872006617444572, "learning_rate": 0.00013390472372928628, "loss": 1.2034, "step": 2084 }, { "epoch": 0.58102271143932, "grad_norm": 0.023540262639327174, "learning_rate": 0.0001337596343753286, "loss": 1.164, "step": 2085 }, { "epoch": 0.5813013794064372, "grad_norm": 0.025633908097744188, "learning_rate": 0.00013361456039439915, "loss": 1.2115, "step": 2086 }, { "epoch": 0.5815800473735544, "grad_norm": 0.022534457233698395, "learning_rate": 0.00013346950192382392, "loss": 1.1347, "step": 2087 }, { "epoch": 0.5818587153406716, "grad_norm": 0.02525911969319084, "learning_rate": 0.0001333244591009145, "loss": 1.1835, "step": 2088 }, { "epoch": 0.5821373833077887, "grad_norm": 0.025645367397806363, "learning_rate": 0.00013317943206296747, "loss": 1.2029, "step": 2089 }, { "epoch": 0.5824160512749059, "grad_norm": 0.024702255882847312, "learning_rate": 0.0001330344209472645, "loss": 1.1226, "step": 2090 }, { "epoch": 0.5826947192420231, "grad_norm": 0.025437274921044934, "learning_rate": 0.00013288942589107219, "loss": 1.1839, "step": 2091 }, { "epoch": 0.5829733872091403, "grad_norm": 0.022266560566892125, "learning_rate": 0.00013274444703164194, "loss": 1.2269, "step": 2092 }, { "epoch": 0.5832520551762574, "grad_norm": 0.024379081802767544, "learning_rate": 0.0001325994845062098, "loss": 1.1006, "step": 2093 }, { "epoch": 0.5835307231433746, "grad_norm": 0.02505015404450068, "learning_rate": 0.00013245453845199648, "loss": 1.0752, "step": 2094 }, { "epoch": 0.5838093911104919, "grad_norm": 0.024885832512326553, "learning_rate": 0.00013230960900620698, "loss": 1.0663, "step": 2095 }, { "epoch": 0.5840880590776091, "grad_norm": 0.024379942662791382, "learning_rate": 0.0001321646963060306, "loss": 1.1805, "step": 2096 }, { "epoch": 0.5843667270447263, "grad_norm": 0.025099430138495915, "learning_rate": 0.00013201980048864074, "loss": 1.1451, "step": 2097 }, { "epoch": 0.5846453950118434, "grad_norm": 0.02626359494191788, "learning_rate": 0.00013187492169119492, "loss": 1.2182, "step": 2098 }, { "epoch": 0.5849240629789606, "grad_norm": 0.02407562311452213, "learning_rate": 0.00013173006005083444, "loss": 1.1406, "step": 2099 }, { "epoch": 0.5852027309460778, "grad_norm": 0.022573724971194264, "learning_rate": 0.0001315852157046845, "loss": 1.1541, "step": 2100 }, { "epoch": 0.585481398913195, "grad_norm": 0.022835747785540404, "learning_rate": 0.00013144038878985386, "loss": 1.1544, "step": 2101 }, { "epoch": 0.5857600668803121, "grad_norm": 0.026231251471019863, "learning_rate": 0.0001312955794434347, "loss": 1.1979, "step": 2102 }, { "epoch": 0.5860387348474293, "grad_norm": 0.023458434395587834, "learning_rate": 0.00013115078780250267, "loss": 1.1373, "step": 2103 }, { "epoch": 0.5863174028145465, "grad_norm": 0.024405759835861674, "learning_rate": 0.00013100601400411663, "loss": 1.1886, "step": 2104 }, { "epoch": 0.5865960707816636, "grad_norm": 0.023736106225814915, "learning_rate": 0.00013086125818531847, "loss": 1.1249, "step": 2105 }, { "epoch": 0.5868747387487808, "grad_norm": 0.02313483082620872, "learning_rate": 0.00013071652048313325, "loss": 1.1518, "step": 2106 }, { "epoch": 0.587153406715898, "grad_norm": 0.029135414542487605, "learning_rate": 0.0001305718010345688, "loss": 1.1649, "step": 2107 }, { "epoch": 0.5874320746830152, "grad_norm": 0.028538212439251254, "learning_rate": 0.00013042709997661552, "loss": 1.123, "step": 2108 }, { "epoch": 0.5877107426501323, "grad_norm": 0.023495127410256954, "learning_rate": 0.00013028241744624658, "loss": 1.1772, "step": 2109 }, { "epoch": 0.5879894106172495, "grad_norm": 0.022912802462741293, "learning_rate": 0.00013013775358041753, "loss": 1.1923, "step": 2110 }, { "epoch": 0.5882680785843667, "grad_norm": 0.0259658661596542, "learning_rate": 0.00012999310851606632, "loss": 1.1222, "step": 2111 }, { "epoch": 0.5885467465514839, "grad_norm": 0.028241759776285624, "learning_rate": 0.00012984848239011305, "loss": 1.1501, "step": 2112 }, { "epoch": 0.588825414518601, "grad_norm": 0.025172873370625324, "learning_rate": 0.0001297038753394599, "loss": 1.0829, "step": 2113 }, { "epoch": 0.5891040824857182, "grad_norm": 0.02396653705489149, "learning_rate": 0.00012955928750099096, "loss": 1.1998, "step": 2114 }, { "epoch": 0.5893827504528354, "grad_norm": 0.023313750937991158, "learning_rate": 0.00012941471901157224, "loss": 1.1038, "step": 2115 }, { "epoch": 0.5896614184199527, "grad_norm": 0.026214917989158034, "learning_rate": 0.00012927017000805133, "loss": 1.1156, "step": 2116 }, { "epoch": 0.5899400863870699, "grad_norm": 0.025127519660089772, "learning_rate": 0.00012912564062725736, "loss": 1.24, "step": 2117 }, { "epoch": 0.590218754354187, "grad_norm": 0.024538566234064897, "learning_rate": 0.00012898113100600101, "loss": 1.1762, "step": 2118 }, { "epoch": 0.5904974223213042, "grad_norm": 0.024488743524991953, "learning_rate": 0.00012883664128107413, "loss": 1.2035, "step": 2119 }, { "epoch": 0.5907760902884214, "grad_norm": 0.022510237835411877, "learning_rate": 0.00012869217158924986, "loss": 1.1429, "step": 2120 }, { "epoch": 0.5910547582555385, "grad_norm": 0.024250320009847583, "learning_rate": 0.00012854772206728223, "loss": 1.2263, "step": 2121 }, { "epoch": 0.5913334262226557, "grad_norm": 0.024264295957192886, "learning_rate": 0.00012840329285190627, "loss": 1.2203, "step": 2122 }, { "epoch": 0.5916120941897729, "grad_norm": 0.024425562468785827, "learning_rate": 0.00012825888407983776, "loss": 1.1828, "step": 2123 }, { "epoch": 0.5918907621568901, "grad_norm": 0.02287807307545837, "learning_rate": 0.00012811449588777315, "loss": 1.1216, "step": 2124 }, { "epoch": 0.5921694301240072, "grad_norm": 0.022716261568582675, "learning_rate": 0.0001279701284123894, "loss": 1.1761, "step": 2125 }, { "epoch": 0.5924480980911244, "grad_norm": 0.02370809951453613, "learning_rate": 0.0001278257817903439, "loss": 1.1417, "step": 2126 }, { "epoch": 0.5927267660582416, "grad_norm": 0.02349294988833293, "learning_rate": 0.00012768145615827417, "loss": 1.1534, "step": 2127 }, { "epoch": 0.5930054340253588, "grad_norm": 0.02327986276814826, "learning_rate": 0.000127537151652798, "loss": 1.1281, "step": 2128 }, { "epoch": 0.5932841019924759, "grad_norm": 0.025882553976016246, "learning_rate": 0.00012739286841051303, "loss": 1.1185, "step": 2129 }, { "epoch": 0.5935627699595931, "grad_norm": 0.0227529112423486, "learning_rate": 0.00012724860656799705, "loss": 1.1304, "step": 2130 }, { "epoch": 0.5938414379267103, "grad_norm": 0.027641235866859765, "learning_rate": 0.00012710436626180732, "loss": 1.1465, "step": 2131 }, { "epoch": 0.5941201058938275, "grad_norm": 0.024336026505443222, "learning_rate": 0.0001269601476284808, "loss": 1.0939, "step": 2132 }, { "epoch": 0.5943987738609446, "grad_norm": 0.027393971826289904, "learning_rate": 0.00012681595080453397, "loss": 1.1072, "step": 2133 }, { "epoch": 0.5946774418280618, "grad_norm": 0.023405569881242566, "learning_rate": 0.00012667177592646262, "loss": 1.0871, "step": 2134 }, { "epoch": 0.594956109795179, "grad_norm": 0.023457779362493714, "learning_rate": 0.00012652762313074173, "loss": 1.1461, "step": 2135 }, { "epoch": 0.5952347777622963, "grad_norm": 0.024575460906312508, "learning_rate": 0.00012638349255382554, "loss": 1.1471, "step": 2136 }, { "epoch": 0.5955134457294134, "grad_norm": 0.02505783121587812, "learning_rate": 0.0001262393843321471, "loss": 1.1663, "step": 2137 }, { "epoch": 0.5957921136965306, "grad_norm": 0.024541599177929856, "learning_rate": 0.00012609529860211835, "loss": 1.1218, "step": 2138 }, { "epoch": 0.5960707816636478, "grad_norm": 0.022939436323692962, "learning_rate": 0.00012595123550012994, "loss": 1.1659, "step": 2139 }, { "epoch": 0.596349449630765, "grad_norm": 0.02479862972181484, "learning_rate": 0.0001258071951625511, "loss": 1.1499, "step": 2140 }, { "epoch": 0.5966281175978821, "grad_norm": 0.023502558755613332, "learning_rate": 0.00012566317772572948, "loss": 1.0589, "step": 2141 }, { "epoch": 0.5969067855649993, "grad_norm": 0.0227700270853753, "learning_rate": 0.00012551918332599107, "loss": 1.1373, "step": 2142 }, { "epoch": 0.5971854535321165, "grad_norm": 0.023174857670901803, "learning_rate": 0.00012537521209964015, "loss": 1.1388, "step": 2143 }, { "epoch": 0.5974641214992337, "grad_norm": 0.024669122237599272, "learning_rate": 0.00012523126418295896, "loss": 1.1224, "step": 2144 }, { "epoch": 0.5977427894663508, "grad_norm": 0.021305678379937572, "learning_rate": 0.00012508733971220763, "loss": 1.1542, "step": 2145 }, { "epoch": 0.598021457433468, "grad_norm": 0.026071027827368986, "learning_rate": 0.00012494343882362422, "loss": 1.1119, "step": 2146 }, { "epoch": 0.5983001254005852, "grad_norm": 0.02292201008878646, "learning_rate": 0.00012479956165342435, "loss": 1.1155, "step": 2147 }, { "epoch": 0.5985787933677024, "grad_norm": 0.02493462891170348, "learning_rate": 0.00012465570833780127, "loss": 1.1932, "step": 2148 }, { "epoch": 0.5988574613348195, "grad_norm": 0.027407436751660737, "learning_rate": 0.00012451187901292565, "loss": 1.1495, "step": 2149 }, { "epoch": 0.5991361293019367, "grad_norm": 0.02189078934991882, "learning_rate": 0.00012436807381494545, "loss": 1.113, "step": 2150 }, { "epoch": 0.5994147972690539, "grad_norm": 0.025553094262166794, "learning_rate": 0.0001242242928799857, "loss": 1.1356, "step": 2151 }, { "epoch": 0.5996934652361711, "grad_norm": 0.02618404847654866, "learning_rate": 0.00012408053634414858, "loss": 1.1179, "step": 2152 }, { "epoch": 0.5999721332032882, "grad_norm": 0.022467633804396366, "learning_rate": 0.00012393680434351307, "loss": 1.1535, "step": 2153 }, { "epoch": 0.6002508011704054, "grad_norm": 0.02375808126317987, "learning_rate": 0.00012379309701413503, "loss": 1.1195, "step": 2154 }, { "epoch": 0.6005294691375226, "grad_norm": 0.023923742877326938, "learning_rate": 0.00012364941449204692, "loss": 1.1608, "step": 2155 }, { "epoch": 0.6008081371046399, "grad_norm": 0.023958294286948078, "learning_rate": 0.0001235057569132577, "loss": 1.142, "step": 2156 }, { "epoch": 0.601086805071757, "grad_norm": 0.022878672846642113, "learning_rate": 0.00012336212441375273, "loss": 1.1418, "step": 2157 }, { "epoch": 0.6013654730388742, "grad_norm": 0.027614272968969733, "learning_rate": 0.0001232185171294936, "loss": 1.1322, "step": 2158 }, { "epoch": 0.6016441410059914, "grad_norm": 0.024081207961268756, "learning_rate": 0.00012307493519641812, "loss": 1.1956, "step": 2159 }, { "epoch": 0.6019228089731086, "grad_norm": 0.0247062014724576, "learning_rate": 0.00012293137875044, "loss": 1.1737, "step": 2160 }, { "epoch": 0.6022014769402257, "grad_norm": 0.024308372250485894, "learning_rate": 0.00012278784792744892, "loss": 1.1685, "step": 2161 }, { "epoch": 0.6024801449073429, "grad_norm": 0.024922035375965074, "learning_rate": 0.0001226443428633102, "loss": 1.1791, "step": 2162 }, { "epoch": 0.6027588128744601, "grad_norm": 0.02500428979514193, "learning_rate": 0.0001225008636938649, "loss": 1.142, "step": 2163 }, { "epoch": 0.6030374808415773, "grad_norm": 0.023390338935030907, "learning_rate": 0.00012235741055492946, "loss": 1.1199, "step": 2164 }, { "epoch": 0.6033161488086944, "grad_norm": 0.02530369075129976, "learning_rate": 0.00012221398358229575, "loss": 1.1451, "step": 2165 }, { "epoch": 0.6035948167758116, "grad_norm": 0.023131193382726657, "learning_rate": 0.00012207058291173074, "loss": 1.1786, "step": 2166 }, { "epoch": 0.6038734847429288, "grad_norm": 0.023193692801083368, "learning_rate": 0.00012192720867897673, "loss": 1.1407, "step": 2167 }, { "epoch": 0.604152152710046, "grad_norm": 0.0237620310732553, "learning_rate": 0.00012178386101975081, "loss": 1.1367, "step": 2168 }, { "epoch": 0.6044308206771631, "grad_norm": 0.023357522379795357, "learning_rate": 0.00012164054006974502, "loss": 1.0864, "step": 2169 }, { "epoch": 0.6047094886442803, "grad_norm": 0.024657425072228574, "learning_rate": 0.00012149724596462603, "loss": 1.1505, "step": 2170 }, { "epoch": 0.6049881566113975, "grad_norm": 0.025285069487574526, "learning_rate": 0.00012135397884003514, "loss": 1.1711, "step": 2171 }, { "epoch": 0.6052668245785147, "grad_norm": 0.022866667974351614, "learning_rate": 0.00012121073883158807, "loss": 1.1108, "step": 2172 }, { "epoch": 0.6055454925456318, "grad_norm": 0.0255813893346765, "learning_rate": 0.00012106752607487503, "loss": 1.1422, "step": 2173 }, { "epoch": 0.605824160512749, "grad_norm": 0.024457415517827333, "learning_rate": 0.00012092434070546023, "loss": 1.1517, "step": 2174 }, { "epoch": 0.6061028284798662, "grad_norm": 0.022288187451288896, "learning_rate": 0.00012078118285888207, "loss": 1.1121, "step": 2175 }, { "epoch": 0.6063814964469835, "grad_norm": 0.02356748356706444, "learning_rate": 0.00012063805267065285, "loss": 1.158, "step": 2176 }, { "epoch": 0.6066601644141006, "grad_norm": 0.024908996340698274, "learning_rate": 0.00012049495027625875, "loss": 1.1485, "step": 2177 }, { "epoch": 0.6069388323812178, "grad_norm": 0.02628194895802667, "learning_rate": 0.00012035187581115948, "loss": 1.1869, "step": 2178 }, { "epoch": 0.607217500348335, "grad_norm": 0.02346932575424681, "learning_rate": 0.0001202088294107886, "loss": 1.1831, "step": 2179 }, { "epoch": 0.6074961683154522, "grad_norm": 0.02379207833895706, "learning_rate": 0.00012006581121055284, "loss": 1.1313, "step": 2180 }, { "epoch": 0.6077748362825693, "grad_norm": 0.027531802695095187, "learning_rate": 0.00011992282134583235, "loss": 1.2244, "step": 2181 }, { "epoch": 0.6080535042496865, "grad_norm": 0.025103987216685098, "learning_rate": 0.00011977985995198043, "loss": 1.101, "step": 2182 }, { "epoch": 0.6083321722168037, "grad_norm": 0.02657365743758555, "learning_rate": 0.00011963692716432344, "loss": 1.1873, "step": 2183 }, { "epoch": 0.6086108401839209, "grad_norm": 0.02387078134627269, "learning_rate": 0.0001194940231181606, "loss": 1.1343, "step": 2184 }, { "epoch": 0.608889508151038, "grad_norm": 0.02385828660781904, "learning_rate": 0.00011935114794876411, "loss": 1.2123, "step": 2185 }, { "epoch": 0.6091681761181552, "grad_norm": 0.02304879867704655, "learning_rate": 0.00011920830179137866, "loss": 1.1463, "step": 2186 }, { "epoch": 0.6094468440852724, "grad_norm": 0.023517970791449645, "learning_rate": 0.0001190654847812215, "loss": 1.1466, "step": 2187 }, { "epoch": 0.6097255120523896, "grad_norm": 0.024940886114168934, "learning_rate": 0.00011892269705348231, "loss": 1.1413, "step": 2188 }, { "epoch": 0.6100041800195067, "grad_norm": 0.02307956652710866, "learning_rate": 0.00011877993874332309, "loss": 1.1148, "step": 2189 }, { "epoch": 0.6102828479866239, "grad_norm": 0.02490178468941625, "learning_rate": 0.00011863720998587786, "loss": 1.2365, "step": 2190 }, { "epoch": 0.6105615159537411, "grad_norm": 0.024684731024896902, "learning_rate": 0.00011849451091625293, "loss": 1.242, "step": 2191 }, { "epoch": 0.6108401839208583, "grad_norm": 0.024046332502411352, "learning_rate": 0.00011835184166952626, "loss": 1.1464, "step": 2192 }, { "epoch": 0.6111188518879754, "grad_norm": 0.022560507006075484, "learning_rate": 0.00011820920238074768, "loss": 1.1802, "step": 2193 }, { "epoch": 0.6113975198550926, "grad_norm": 0.023296086993679035, "learning_rate": 0.00011806659318493862, "loss": 1.1814, "step": 2194 }, { "epoch": 0.6116761878222098, "grad_norm": 0.024371480324582532, "learning_rate": 0.00011792401421709205, "loss": 1.2186, "step": 2195 }, { "epoch": 0.6119548557893271, "grad_norm": 0.02173743363300079, "learning_rate": 0.0001177814656121723, "loss": 1.2166, "step": 2196 }, { "epoch": 0.6122335237564442, "grad_norm": 0.02248220693608942, "learning_rate": 0.00011763894750511506, "loss": 1.1373, "step": 2197 }, { "epoch": 0.6125121917235614, "grad_norm": 0.024365848663526944, "learning_rate": 0.00011749646003082706, "loss": 1.1207, "step": 2198 }, { "epoch": 0.6127908596906786, "grad_norm": 0.024315339099683163, "learning_rate": 0.00011735400332418602, "loss": 1.1419, "step": 2199 }, { "epoch": 0.6130695276577958, "grad_norm": 0.02565771424222138, "learning_rate": 0.00011721157752004058, "loss": 1.1599, "step": 2200 }, { "epoch": 0.6133481956249129, "grad_norm": 0.022835996007207145, "learning_rate": 0.00011706918275321008, "loss": 1.139, "step": 2201 }, { "epoch": 0.6136268635920301, "grad_norm": 0.023779607708786992, "learning_rate": 0.00011692681915848452, "loss": 1.1627, "step": 2202 }, { "epoch": 0.6139055315591473, "grad_norm": 0.022678668517749495, "learning_rate": 0.00011678448687062445, "loss": 1.1496, "step": 2203 }, { "epoch": 0.6141841995262645, "grad_norm": 0.023674544696680667, "learning_rate": 0.00011664218602436067, "loss": 1.1433, "step": 2204 }, { "epoch": 0.6144628674933816, "grad_norm": 0.02295918038279919, "learning_rate": 0.00011649991675439425, "loss": 1.1332, "step": 2205 }, { "epoch": 0.6147415354604988, "grad_norm": 0.023113850164709432, "learning_rate": 0.0001163576791953964, "loss": 1.1116, "step": 2206 }, { "epoch": 0.615020203427616, "grad_norm": 0.022307265245907468, "learning_rate": 0.00011621547348200835, "loss": 1.1651, "step": 2207 }, { "epoch": 0.6152988713947332, "grad_norm": 0.023833590075873278, "learning_rate": 0.00011607329974884108, "loss": 1.1944, "step": 2208 }, { "epoch": 0.6155775393618503, "grad_norm": 0.022453466167113318, "learning_rate": 0.00011593115813047535, "loss": 1.1304, "step": 2209 }, { "epoch": 0.6158562073289675, "grad_norm": 0.023776988840666664, "learning_rate": 0.00011578904876146157, "loss": 1.1776, "step": 2210 }, { "epoch": 0.6161348752960847, "grad_norm": 0.023943028711849564, "learning_rate": 0.00011564697177631953, "loss": 1.1067, "step": 2211 }, { "epoch": 0.6164135432632019, "grad_norm": 0.024726559462892293, "learning_rate": 0.00011550492730953845, "loss": 1.1834, "step": 2212 }, { "epoch": 0.616692211230319, "grad_norm": 0.02266159205203418, "learning_rate": 0.00011536291549557674, "loss": 1.1461, "step": 2213 }, { "epoch": 0.6169708791974362, "grad_norm": 0.024154912663275595, "learning_rate": 0.00011522093646886185, "loss": 1.1242, "step": 2214 }, { "epoch": 0.6172495471645534, "grad_norm": 0.0253198203831934, "learning_rate": 0.00011507899036379019, "loss": 1.1245, "step": 2215 }, { "epoch": 0.6175282151316707, "grad_norm": 0.023349474712901577, "learning_rate": 0.00011493707731472717, "loss": 1.1237, "step": 2216 }, { "epoch": 0.6178068830987878, "grad_norm": 0.022444135199625997, "learning_rate": 0.00011479519745600675, "loss": 1.0881, "step": 2217 }, { "epoch": 0.618085551065905, "grad_norm": 0.023547689218130188, "learning_rate": 0.00011465335092193149, "loss": 1.1184, "step": 2218 }, { "epoch": 0.6183642190330222, "grad_norm": 0.023546870012777874, "learning_rate": 0.00011451153784677244, "loss": 1.1018, "step": 2219 }, { "epoch": 0.6186428870001394, "grad_norm": 0.022819761736647983, "learning_rate": 0.00011436975836476899, "loss": 1.1754, "step": 2220 }, { "epoch": 0.6189215549672565, "grad_norm": 0.024061251153665163, "learning_rate": 0.00011422801261012859, "loss": 1.0952, "step": 2221 }, { "epoch": 0.6192002229343737, "grad_norm": 0.023326943030393628, "learning_rate": 0.00011408630071702706, "loss": 1.1549, "step": 2222 }, { "epoch": 0.6194788909014909, "grad_norm": 0.02322720642398628, "learning_rate": 0.00011394462281960792, "loss": 1.1033, "step": 2223 }, { "epoch": 0.6197575588686081, "grad_norm": 0.023182184414901233, "learning_rate": 0.00011380297905198253, "loss": 1.1194, "step": 2224 }, { "epoch": 0.6200362268357252, "grad_norm": 0.023538995700289642, "learning_rate": 0.00011366136954823004, "loss": 1.1141, "step": 2225 }, { "epoch": 0.6203148948028424, "grad_norm": 0.022335867946666744, "learning_rate": 0.0001135197944423971, "loss": 1.1323, "step": 2226 }, { "epoch": 0.6205935627699596, "grad_norm": 0.021914818144182793, "learning_rate": 0.00011337825386849777, "loss": 1.1006, "step": 2227 }, { "epoch": 0.6208722307370768, "grad_norm": 0.022877090855243507, "learning_rate": 0.00011323674796051358, "loss": 1.2307, "step": 2228 }, { "epoch": 0.6211508987041939, "grad_norm": 0.026001462577962868, "learning_rate": 0.00011309527685239305, "loss": 1.1595, "step": 2229 }, { "epoch": 0.6214295666713111, "grad_norm": 0.02424762290898992, "learning_rate": 0.00011295384067805189, "loss": 1.1836, "step": 2230 }, { "epoch": 0.6217082346384283, "grad_norm": 0.022948483400917496, "learning_rate": 0.00011281243957137267, "loss": 1.1641, "step": 2231 }, { "epoch": 0.6219869026055455, "grad_norm": 0.024373769522663048, "learning_rate": 0.00011267107366620477, "loss": 1.1519, "step": 2232 }, { "epoch": 0.6222655705726626, "grad_norm": 0.022670257111231207, "learning_rate": 0.00011252974309636426, "loss": 1.1631, "step": 2233 }, { "epoch": 0.6225442385397798, "grad_norm": 0.02293512933454902, "learning_rate": 0.00011238844799563384, "loss": 1.0573, "step": 2234 }, { "epoch": 0.622822906506897, "grad_norm": 0.023161527581993593, "learning_rate": 0.00011224718849776255, "loss": 1.179, "step": 2235 }, { "epoch": 0.6231015744740143, "grad_norm": 0.023937771673088724, "learning_rate": 0.00011210596473646569, "loss": 1.1549, "step": 2236 }, { "epoch": 0.6233802424411314, "grad_norm": 0.021560362217374774, "learning_rate": 0.00011196477684542485, "loss": 1.1395, "step": 2237 }, { "epoch": 0.6236589104082486, "grad_norm": 0.021961455013328822, "learning_rate": 0.00011182362495828756, "loss": 1.195, "step": 2238 }, { "epoch": 0.6239375783753658, "grad_norm": 0.022087451250064477, "learning_rate": 0.00011168250920866725, "loss": 1.1329, "step": 2239 }, { "epoch": 0.624216246342483, "grad_norm": 0.024469123550858656, "learning_rate": 0.00011154142973014334, "loss": 1.1035, "step": 2240 }, { "epoch": 0.6244949143096001, "grad_norm": 0.023431555556416128, "learning_rate": 0.0001114003866562607, "loss": 1.127, "step": 2241 }, { "epoch": 0.6247735822767173, "grad_norm": 0.023961188184823932, "learning_rate": 0.00011125938012052983, "loss": 1.1949, "step": 2242 }, { "epoch": 0.6250522502438345, "grad_norm": 0.02378803805581733, "learning_rate": 0.00011111841025642667, "loss": 1.154, "step": 2243 }, { "epoch": 0.6253309182109517, "grad_norm": 0.022018136009454486, "learning_rate": 0.00011097747719739234, "loss": 1.1411, "step": 2244 }, { "epoch": 0.6256095861780688, "grad_norm": 0.02466327167002604, "learning_rate": 0.00011083658107683317, "loss": 1.1095, "step": 2245 }, { "epoch": 0.625888254145186, "grad_norm": 0.02391224279652315, "learning_rate": 0.00011069572202812064, "loss": 1.2145, "step": 2246 }, { "epoch": 0.6261669221123032, "grad_norm": 0.023254900764429935, "learning_rate": 0.000110554900184591, "loss": 1.2103, "step": 2247 }, { "epoch": 0.6264455900794204, "grad_norm": 0.02352395289018378, "learning_rate": 0.00011041411567954532, "loss": 1.1842, "step": 2248 }, { "epoch": 0.6267242580465375, "grad_norm": 0.02178103865554573, "learning_rate": 0.00011027336864624934, "loss": 1.1806, "step": 2249 }, { "epoch": 0.6270029260136547, "grad_norm": 0.02350322973394884, "learning_rate": 0.00011013265921793327, "loss": 1.1196, "step": 2250 }, { "epoch": 0.6272815939807719, "grad_norm": 0.023246694879411053, "learning_rate": 0.00010999198752779181, "loss": 1.0819, "step": 2251 }, { "epoch": 0.627560261947889, "grad_norm": 0.023309927079667052, "learning_rate": 0.00010985135370898393, "loss": 1.1709, "step": 2252 }, { "epoch": 0.6278389299150062, "grad_norm": 0.02411106680396769, "learning_rate": 0.00010971075789463268, "loss": 1.1056, "step": 2253 }, { "epoch": 0.6281175978821234, "grad_norm": 0.024295482606174517, "learning_rate": 0.00010957020021782514, "loss": 1.2047, "step": 2254 }, { "epoch": 0.6283962658492406, "grad_norm": 0.02318979509232623, "learning_rate": 0.00010942968081161242, "loss": 1.1855, "step": 2255 }, { "epoch": 0.6286749338163579, "grad_norm": 0.022678600126151385, "learning_rate": 0.00010928919980900925, "loss": 1.141, "step": 2256 }, { "epoch": 0.628953601783475, "grad_norm": 0.02201580405876534, "learning_rate": 0.00010914875734299404, "loss": 1.1024, "step": 2257 }, { "epoch": 0.6292322697505922, "grad_norm": 0.022169740990272316, "learning_rate": 0.00010900835354650882, "loss": 1.1557, "step": 2258 }, { "epoch": 0.6295109377177094, "grad_norm": 0.022331366576228313, "learning_rate": 0.00010886798855245886, "loss": 1.0807, "step": 2259 }, { "epoch": 0.6297896056848266, "grad_norm": 0.02360490383946953, "learning_rate": 0.00010872766249371287, "loss": 1.103, "step": 2260 }, { "epoch": 0.6300682736519437, "grad_norm": 0.025345899500417914, "learning_rate": 0.00010858737550310257, "loss": 1.1724, "step": 2261 }, { "epoch": 0.6303469416190609, "grad_norm": 0.02277404312561677, "learning_rate": 0.00010844712771342272, "loss": 1.1183, "step": 2262 }, { "epoch": 0.6306256095861781, "grad_norm": 0.023179914281596922, "learning_rate": 0.00010830691925743102, "loss": 1.136, "step": 2263 }, { "epoch": 0.6309042775532953, "grad_norm": 0.0230062472418289, "learning_rate": 0.00010816675026784791, "loss": 1.1882, "step": 2264 }, { "epoch": 0.6311829455204124, "grad_norm": 0.021625875060027132, "learning_rate": 0.00010802662087735651, "loss": 1.152, "step": 2265 }, { "epoch": 0.6314616134875296, "grad_norm": 0.024032708778267842, "learning_rate": 0.00010788653121860241, "loss": 1.1733, "step": 2266 }, { "epoch": 0.6317402814546468, "grad_norm": 0.023376990005370357, "learning_rate": 0.00010774648142419357, "loss": 1.121, "step": 2267 }, { "epoch": 0.632018949421764, "grad_norm": 0.02218552352332347, "learning_rate": 0.00010760647162670026, "loss": 1.0983, "step": 2268 }, { "epoch": 0.6322976173888811, "grad_norm": 0.021241346645917442, "learning_rate": 0.00010746650195865485, "loss": 1.0844, "step": 2269 }, { "epoch": 0.6325762853559983, "grad_norm": 0.025288277155865548, "learning_rate": 0.0001073265725525518, "loss": 1.0928, "step": 2270 }, { "epoch": 0.6328549533231155, "grad_norm": 0.02436469630530094, "learning_rate": 0.00010718668354084742, "loss": 1.0743, "step": 2271 }, { "epoch": 0.6331336212902327, "grad_norm": 0.024649335413017278, "learning_rate": 0.00010704683505595973, "loss": 1.1635, "step": 2272 }, { "epoch": 0.6334122892573498, "grad_norm": 0.022160168560375903, "learning_rate": 0.00010690702723026843, "loss": 1.096, "step": 2273 }, { "epoch": 0.633690957224467, "grad_norm": 0.02422951411193821, "learning_rate": 0.00010676726019611474, "loss": 1.1878, "step": 2274 }, { "epoch": 0.6339696251915842, "grad_norm": 0.02420644970541911, "learning_rate": 0.00010662753408580125, "loss": 1.166, "step": 2275 }, { "epoch": 0.6342482931587015, "grad_norm": 0.02300599323912828, "learning_rate": 0.00010648784903159176, "loss": 1.1838, "step": 2276 }, { "epoch": 0.6345269611258186, "grad_norm": 0.024607739325769946, "learning_rate": 0.00010634820516571141, "loss": 1.1517, "step": 2277 }, { "epoch": 0.6348056290929358, "grad_norm": 0.021385284708004458, "learning_rate": 0.00010620860262034612, "loss": 1.1955, "step": 2278 }, { "epoch": 0.635084297060053, "grad_norm": 0.02299538153122964, "learning_rate": 0.00010606904152764276, "loss": 1.0881, "step": 2279 }, { "epoch": 0.6353629650271702, "grad_norm": 0.023561488227346467, "learning_rate": 0.00010592952201970903, "loss": 1.1141, "step": 2280 }, { "epoch": 0.6356416329942873, "grad_norm": 0.022348472916585895, "learning_rate": 0.00010579004422861323, "loss": 1.1071, "step": 2281 }, { "epoch": 0.6359203009614045, "grad_norm": 0.02197044254740872, "learning_rate": 0.00010565060828638404, "loss": 1.168, "step": 2282 }, { "epoch": 0.6361989689285217, "grad_norm": 0.022944998263396035, "learning_rate": 0.00010551121432501084, "loss": 1.1691, "step": 2283 }, { "epoch": 0.6364776368956389, "grad_norm": 0.02151821281790122, "learning_rate": 0.00010537186247644294, "loss": 1.1364, "step": 2284 }, { "epoch": 0.636756304862756, "grad_norm": 0.022708766537366715, "learning_rate": 0.00010523255287259002, "loss": 1.1824, "step": 2285 }, { "epoch": 0.6370349728298732, "grad_norm": 0.022913604494532815, "learning_rate": 0.0001050932856453216, "loss": 1.0942, "step": 2286 }, { "epoch": 0.6373136407969904, "grad_norm": 0.025907137409673462, "learning_rate": 0.00010495406092646718, "loss": 1.1919, "step": 2287 }, { "epoch": 0.6375923087641076, "grad_norm": 0.024943940037339785, "learning_rate": 0.00010481487884781596, "loss": 1.146, "step": 2288 }, { "epoch": 0.6378709767312247, "grad_norm": 0.025943138475647585, "learning_rate": 0.00010467573954111695, "loss": 1.1473, "step": 2289 }, { "epoch": 0.6381496446983419, "grad_norm": 0.025137499899100958, "learning_rate": 0.00010453664313807847, "loss": 1.1613, "step": 2290 }, { "epoch": 0.6384283126654591, "grad_norm": 0.025976393409897044, "learning_rate": 0.0001043975897703683, "loss": 1.1202, "step": 2291 }, { "epoch": 0.6387069806325762, "grad_norm": 0.02438090888063813, "learning_rate": 0.00010425857956961347, "loss": 1.2137, "step": 2292 }, { "epoch": 0.6389856485996934, "grad_norm": 0.023407187509009893, "learning_rate": 0.00010411961266740017, "loss": 1.1747, "step": 2293 }, { "epoch": 0.6392643165668106, "grad_norm": 0.037179451410039775, "learning_rate": 0.00010398068919527361, "loss": 1.1256, "step": 2294 }, { "epoch": 0.6395429845339278, "grad_norm": 0.024003282506103743, "learning_rate": 0.00010384180928473791, "loss": 1.1367, "step": 2295 }, { "epoch": 0.6398216525010451, "grad_norm": 0.02443590895209687, "learning_rate": 0.00010370297306725589, "loss": 1.2006, "step": 2296 }, { "epoch": 0.6401003204681622, "grad_norm": 0.023190400557648854, "learning_rate": 0.00010356418067424903, "loss": 1.1055, "step": 2297 }, { "epoch": 0.6403789884352794, "grad_norm": 0.02235470222882703, "learning_rate": 0.0001034254322370973, "loss": 1.1044, "step": 2298 }, { "epoch": 0.6406576564023966, "grad_norm": 0.022062611339175066, "learning_rate": 0.00010328672788713921, "loss": 1.1273, "step": 2299 }, { "epoch": 0.6409363243695138, "grad_norm": 0.02281146809381952, "learning_rate": 0.00010314806775567135, "loss": 1.0887, "step": 2300 }, { "epoch": 0.6412149923366309, "grad_norm": 0.024563679050238926, "learning_rate": 0.00010300945197394855, "loss": 1.2078, "step": 2301 }, { "epoch": 0.6414936603037481, "grad_norm": 0.02309776200738329, "learning_rate": 0.00010287088067318363, "loss": 1.1373, "step": 2302 }, { "epoch": 0.6417723282708653, "grad_norm": 0.023027608592196616, "learning_rate": 0.00010273235398454733, "loss": 1.2039, "step": 2303 }, { "epoch": 0.6420509962379825, "grad_norm": 0.022507524985242236, "learning_rate": 0.00010259387203916812, "loss": 1.1163, "step": 2304 }, { "epoch": 0.6423296642050996, "grad_norm": 0.022939185731031594, "learning_rate": 0.00010245543496813217, "loss": 1.1454, "step": 2305 }, { "epoch": 0.6426083321722168, "grad_norm": 0.02392180057975523, "learning_rate": 0.0001023170429024831, "loss": 1.1533, "step": 2306 }, { "epoch": 0.642887000139334, "grad_norm": 0.024520115019626663, "learning_rate": 0.000102178695973222, "loss": 1.135, "step": 2307 }, { "epoch": 0.6431656681064511, "grad_norm": 0.022375744580810188, "learning_rate": 0.00010204039431130722, "loss": 1.1632, "step": 2308 }, { "epoch": 0.6434443360735683, "grad_norm": 0.02331549809732229, "learning_rate": 0.00010190213804765424, "loss": 1.082, "step": 2309 }, { "epoch": 0.6437230040406855, "grad_norm": 0.02313955954544895, "learning_rate": 0.00010176392731313558, "loss": 1.1283, "step": 2310 }, { "epoch": 0.6440016720078027, "grad_norm": 0.023839668221481043, "learning_rate": 0.00010162576223858061, "loss": 1.2338, "step": 2311 }, { "epoch": 0.6442803399749198, "grad_norm": 0.024705601931399597, "learning_rate": 0.00010148764295477547, "loss": 1.154, "step": 2312 }, { "epoch": 0.644559007942037, "grad_norm": 0.024581170423769907, "learning_rate": 0.00010134956959246317, "loss": 1.2086, "step": 2313 }, { "epoch": 0.6448376759091542, "grad_norm": 0.025045770790246582, "learning_rate": 0.000101211542282343, "loss": 1.1559, "step": 2314 }, { "epoch": 0.6451163438762714, "grad_norm": 0.024557244718685402, "learning_rate": 0.00010107356115507075, "loss": 1.1638, "step": 2315 }, { "epoch": 0.6453950118433887, "grad_norm": 0.022910692002113504, "learning_rate": 0.00010093562634125849, "loss": 1.1376, "step": 2316 }, { "epoch": 0.6456736798105058, "grad_norm": 0.02351059781759277, "learning_rate": 0.00010079773797147442, "loss": 1.1616, "step": 2317 }, { "epoch": 0.645952347777623, "grad_norm": 0.022353415235503812, "learning_rate": 0.0001006598961762428, "loss": 1.1228, "step": 2318 }, { "epoch": 0.6462310157447402, "grad_norm": 0.021574973229009566, "learning_rate": 0.0001005221010860439, "loss": 1.0962, "step": 2319 }, { "epoch": 0.6465096837118574, "grad_norm": 0.024336682708611633, "learning_rate": 0.00010038435283131362, "loss": 1.0928, "step": 2320 }, { "epoch": 0.6467883516789745, "grad_norm": 0.023722229911115437, "learning_rate": 0.00010024665154244358, "loss": 1.1542, "step": 2321 }, { "epoch": 0.6470670196460917, "grad_norm": 0.022936911552075542, "learning_rate": 0.00010010899734978101, "loss": 1.1283, "step": 2322 }, { "epoch": 0.6473456876132089, "grad_norm": 0.02229719075582332, "learning_rate": 9.997139038362847e-05, "loss": 1.1271, "step": 2323 }, { "epoch": 0.647624355580326, "grad_norm": 0.02315807229183015, "learning_rate": 9.983383077424377e-05, "loss": 1.1904, "step": 2324 }, { "epoch": 0.6479030235474432, "grad_norm": 0.02225800506292388, "learning_rate": 9.969631865184012e-05, "loss": 1.1857, "step": 2325 }, { "epoch": 0.6481816915145604, "grad_norm": 0.02551047209503041, "learning_rate": 9.955885414658558e-05, "loss": 1.1289, "step": 2326 }, { "epoch": 0.6484603594816776, "grad_norm": 0.02205803099387086, "learning_rate": 9.942143738860317e-05, "loss": 1.0992, "step": 2327 }, { "epoch": 0.6487390274487947, "grad_norm": 0.024122965971023983, "learning_rate": 9.928406850797076e-05, "loss": 1.1421, "step": 2328 }, { "epoch": 0.6490176954159119, "grad_norm": 0.023914139609287427, "learning_rate": 9.914674763472082e-05, "loss": 1.1987, "step": 2329 }, { "epoch": 0.6492963633830291, "grad_norm": 0.02303678623897385, "learning_rate": 9.900947489884039e-05, "loss": 1.2561, "step": 2330 }, { "epoch": 0.6495750313501463, "grad_norm": 0.02460592977139982, "learning_rate": 9.887225043027112e-05, "loss": 1.1724, "step": 2331 }, { "epoch": 0.6498536993172634, "grad_norm": 0.023930516563163872, "learning_rate": 9.873507435890877e-05, "loss": 1.1452, "step": 2332 }, { "epoch": 0.6501323672843806, "grad_norm": 0.02275790598909307, "learning_rate": 9.85979468146033e-05, "loss": 1.1548, "step": 2333 }, { "epoch": 0.6504110352514978, "grad_norm": 0.02279355500623332, "learning_rate": 9.846086792715884e-05, "loss": 1.1022, "step": 2334 }, { "epoch": 0.650689703218615, "grad_norm": 0.024899592773300016, "learning_rate": 9.832383782633339e-05, "loss": 1.1417, "step": 2335 }, { "epoch": 0.6509683711857323, "grad_norm": 0.024290757872408766, "learning_rate": 9.818685664183873e-05, "loss": 1.1297, "step": 2336 }, { "epoch": 0.6512470391528494, "grad_norm": 0.023082580921366863, "learning_rate": 9.804992450334043e-05, "loss": 1.1693, "step": 2337 }, { "epoch": 0.6515257071199666, "grad_norm": 0.0237360541174853, "learning_rate": 9.791304154045762e-05, "loss": 1.1524, "step": 2338 }, { "epoch": 0.6518043750870838, "grad_norm": 0.022670026039783128, "learning_rate": 9.777620788276286e-05, "loss": 1.1573, "step": 2339 }, { "epoch": 0.652083043054201, "grad_norm": 0.02430812360826416, "learning_rate": 9.763942365978198e-05, "loss": 1.1914, "step": 2340 }, { "epoch": 0.6523617110213181, "grad_norm": 0.02274843017202355, "learning_rate": 9.75026890009941e-05, "loss": 1.0918, "step": 2341 }, { "epoch": 0.6526403789884353, "grad_norm": 0.02480651826301706, "learning_rate": 9.736600403583135e-05, "loss": 1.1536, "step": 2342 }, { "epoch": 0.6529190469555525, "grad_norm": 0.02471250171688361, "learning_rate": 9.722936889367888e-05, "loss": 1.1347, "step": 2343 }, { "epoch": 0.6531977149226696, "grad_norm": 0.021798021703986713, "learning_rate": 9.709278370387472e-05, "loss": 1.0782, "step": 2344 }, { "epoch": 0.6534763828897868, "grad_norm": 0.022798780891107925, "learning_rate": 9.69562485957095e-05, "loss": 1.2004, "step": 2345 }, { "epoch": 0.653755050856904, "grad_norm": 0.02290345000840701, "learning_rate": 9.681976369842645e-05, "loss": 1.1439, "step": 2346 }, { "epoch": 0.6540337188240212, "grad_norm": 0.023023645750344903, "learning_rate": 9.66833291412214e-05, "loss": 1.1248, "step": 2347 }, { "epoch": 0.6543123867911383, "grad_norm": 0.02470817819484524, "learning_rate": 9.654694505324242e-05, "loss": 1.1728, "step": 2348 }, { "epoch": 0.6545910547582555, "grad_norm": 0.022472307432138926, "learning_rate": 9.641061156358979e-05, "loss": 1.1478, "step": 2349 }, { "epoch": 0.6548697227253727, "grad_norm": 0.024405811400981785, "learning_rate": 9.6274328801316e-05, "loss": 1.1028, "step": 2350 }, { "epoch": 0.6551483906924899, "grad_norm": 0.022010536949566496, "learning_rate": 9.613809689542539e-05, "loss": 1.1316, "step": 2351 }, { "epoch": 0.655427058659607, "grad_norm": 0.02522031982525159, "learning_rate": 9.60019159748743e-05, "loss": 1.1574, "step": 2352 }, { "epoch": 0.6557057266267242, "grad_norm": 0.022732661419162838, "learning_rate": 9.58657861685707e-05, "loss": 1.0905, "step": 2353 }, { "epoch": 0.6559843945938414, "grad_norm": 0.023961193626335156, "learning_rate": 9.572970760537423e-05, "loss": 1.1145, "step": 2354 }, { "epoch": 0.6562630625609586, "grad_norm": 0.022206189249828902, "learning_rate": 9.559368041409592e-05, "loss": 1.1367, "step": 2355 }, { "epoch": 0.6565417305280759, "grad_norm": 0.02272530842586259, "learning_rate": 9.545770472349834e-05, "loss": 1.1277, "step": 2356 }, { "epoch": 0.656820398495193, "grad_norm": 0.021911282911446815, "learning_rate": 9.532178066229523e-05, "loss": 1.1146, "step": 2357 }, { "epoch": 0.6570990664623102, "grad_norm": 0.02255522843354455, "learning_rate": 9.518590835915141e-05, "loss": 1.0983, "step": 2358 }, { "epoch": 0.6573777344294274, "grad_norm": 0.022859697306702228, "learning_rate": 9.505008794268278e-05, "loss": 1.2003, "step": 2359 }, { "epoch": 0.6576564023965445, "grad_norm": 0.02280042603169705, "learning_rate": 9.491431954145609e-05, "loss": 1.1193, "step": 2360 }, { "epoch": 0.6579350703636617, "grad_norm": 0.02314622484176537, "learning_rate": 9.47786032839888e-05, "loss": 1.1359, "step": 2361 }, { "epoch": 0.6582137383307789, "grad_norm": 0.02329187969092124, "learning_rate": 9.464293929874915e-05, "loss": 1.126, "step": 2362 }, { "epoch": 0.6584924062978961, "grad_norm": 0.023365766815588197, "learning_rate": 9.45073277141558e-05, "loss": 1.1523, "step": 2363 }, { "epoch": 0.6587710742650132, "grad_norm": 0.023642192657561, "learning_rate": 9.437176865857778e-05, "loss": 1.1441, "step": 2364 }, { "epoch": 0.6590497422321304, "grad_norm": 0.025004960033940593, "learning_rate": 9.42362622603345e-05, "loss": 1.128, "step": 2365 }, { "epoch": 0.6593284101992476, "grad_norm": 0.023890354081577053, "learning_rate": 9.410080864769542e-05, "loss": 1.2085, "step": 2366 }, { "epoch": 0.6596070781663648, "grad_norm": 0.02364200256645901, "learning_rate": 9.396540794888004e-05, "loss": 1.165, "step": 2367 }, { "epoch": 0.6598857461334819, "grad_norm": 0.023201643934640498, "learning_rate": 9.383006029205793e-05, "loss": 1.1496, "step": 2368 }, { "epoch": 0.6601644141005991, "grad_norm": 0.023147094133209735, "learning_rate": 9.369476580534825e-05, "loss": 1.149, "step": 2369 }, { "epoch": 0.6604430820677163, "grad_norm": 0.022317463959542094, "learning_rate": 9.355952461681996e-05, "loss": 1.1216, "step": 2370 }, { "epoch": 0.6607217500348335, "grad_norm": 0.024269293259537226, "learning_rate": 9.342433685449149e-05, "loss": 1.1466, "step": 2371 }, { "epoch": 0.6610004180019506, "grad_norm": 0.024454487545281674, "learning_rate": 9.328920264633071e-05, "loss": 1.1255, "step": 2372 }, { "epoch": 0.6612790859690678, "grad_norm": 0.022559831596625037, "learning_rate": 9.31541221202548e-05, "loss": 1.1081, "step": 2373 }, { "epoch": 0.661557753936185, "grad_norm": 0.022067368489836106, "learning_rate": 9.301909540413019e-05, "loss": 1.1404, "step": 2374 }, { "epoch": 0.6618364219033022, "grad_norm": 0.02164086439698921, "learning_rate": 9.288412262577236e-05, "loss": 1.1321, "step": 2375 }, { "epoch": 0.6621150898704194, "grad_norm": 0.023445374654536316, "learning_rate": 9.274920391294559e-05, "loss": 1.1012, "step": 2376 }, { "epoch": 0.6623937578375366, "grad_norm": 0.02370127646508811, "learning_rate": 9.261433939336316e-05, "loss": 1.1855, "step": 2377 }, { "epoch": 0.6626724258046538, "grad_norm": 0.023745513885642205, "learning_rate": 9.247952919468693e-05, "loss": 1.133, "step": 2378 }, { "epoch": 0.662951093771771, "grad_norm": 0.022907862280553786, "learning_rate": 9.234477344452736e-05, "loss": 1.1327, "step": 2379 }, { "epoch": 0.6632297617388881, "grad_norm": 0.024759316916727576, "learning_rate": 9.221007227044352e-05, "loss": 1.1851, "step": 2380 }, { "epoch": 0.6635084297060053, "grad_norm": 0.02315730040894347, "learning_rate": 9.207542579994263e-05, "loss": 1.1342, "step": 2381 }, { "epoch": 0.6637870976731225, "grad_norm": 0.022436598799445772, "learning_rate": 9.194083416048018e-05, "loss": 1.119, "step": 2382 }, { "epoch": 0.6640657656402397, "grad_norm": 0.02398223708870105, "learning_rate": 9.180629747945977e-05, "loss": 1.1465, "step": 2383 }, { "epoch": 0.6643444336073568, "grad_norm": 0.02329136571657905, "learning_rate": 9.1671815884233e-05, "loss": 1.1303, "step": 2384 }, { "epoch": 0.664623101574474, "grad_norm": 0.026175842227724552, "learning_rate": 9.153738950209924e-05, "loss": 1.1964, "step": 2385 }, { "epoch": 0.6649017695415912, "grad_norm": 0.022601398557498355, "learning_rate": 9.140301846030577e-05, "loss": 1.0979, "step": 2386 }, { "epoch": 0.6651804375087084, "grad_norm": 0.023495981924521663, "learning_rate": 9.126870288604732e-05, "loss": 1.1245, "step": 2387 }, { "epoch": 0.6654591054758255, "grad_norm": 0.02179273198556142, "learning_rate": 9.113444290646614e-05, "loss": 1.1478, "step": 2388 }, { "epoch": 0.6657377734429427, "grad_norm": 0.021007729237226536, "learning_rate": 9.100023864865194e-05, "loss": 1.1319, "step": 2389 }, { "epoch": 0.6660164414100599, "grad_norm": 0.02237369062945818, "learning_rate": 9.086609023964156e-05, "loss": 1.1054, "step": 2390 }, { "epoch": 0.6662951093771771, "grad_norm": 0.02121405664929568, "learning_rate": 9.073199780641908e-05, "loss": 1.0829, "step": 2391 }, { "epoch": 0.6665737773442942, "grad_norm": 0.022260235328009085, "learning_rate": 9.059796147591561e-05, "loss": 1.1334, "step": 2392 }, { "epoch": 0.6668524453114114, "grad_norm": 0.02470220674356839, "learning_rate": 9.0463981375009e-05, "loss": 1.1484, "step": 2393 }, { "epoch": 0.6671311132785286, "grad_norm": 0.023294187657034032, "learning_rate": 9.033005763052406e-05, "loss": 1.1151, "step": 2394 }, { "epoch": 0.6674097812456458, "grad_norm": 0.021257596199432058, "learning_rate": 9.019619036923207e-05, "loss": 1.175, "step": 2395 }, { "epoch": 0.667688449212763, "grad_norm": 0.025524052644699133, "learning_rate": 9.006237971785104e-05, "loss": 1.1415, "step": 2396 }, { "epoch": 0.6679671171798802, "grad_norm": 0.02256012637177893, "learning_rate": 8.992862580304522e-05, "loss": 1.1398, "step": 2397 }, { "epoch": 0.6682457851469974, "grad_norm": 0.02182158185559625, "learning_rate": 8.979492875142528e-05, "loss": 1.1546, "step": 2398 }, { "epoch": 0.6685244531141146, "grad_norm": 0.022603546858456367, "learning_rate": 8.966128868954796e-05, "loss": 1.1043, "step": 2399 }, { "epoch": 0.6688031210812317, "grad_norm": 0.02287713623125166, "learning_rate": 8.952770574391615e-05, "loss": 1.1815, "step": 2400 }, { "epoch": 0.6690817890483489, "grad_norm": 0.021959173852625927, "learning_rate": 8.93941800409786e-05, "loss": 1.1573, "step": 2401 }, { "epoch": 0.6693604570154661, "grad_norm": 0.022496762180122146, "learning_rate": 8.926071170712994e-05, "loss": 1.0626, "step": 2402 }, { "epoch": 0.6696391249825833, "grad_norm": 0.023251662250934016, "learning_rate": 8.91273008687104e-05, "loss": 1.1905, "step": 2403 }, { "epoch": 0.6699177929497004, "grad_norm": 0.02485301215321533, "learning_rate": 8.89939476520058e-05, "loss": 1.1951, "step": 2404 }, { "epoch": 0.6701964609168176, "grad_norm": 0.023156899740136107, "learning_rate": 8.88606521832476e-05, "loss": 1.0823, "step": 2405 }, { "epoch": 0.6704751288839348, "grad_norm": 0.02263479243425421, "learning_rate": 8.87274145886124e-05, "loss": 1.0808, "step": 2406 }, { "epoch": 0.670753796851052, "grad_norm": 0.021386306565357168, "learning_rate": 8.859423499422204e-05, "loss": 1.0904, "step": 2407 }, { "epoch": 0.6710324648181691, "grad_norm": 0.02156998044980095, "learning_rate": 8.84611135261435e-05, "loss": 1.0828, "step": 2408 }, { "epoch": 0.6713111327852863, "grad_norm": 0.0234999695609239, "learning_rate": 8.832805031038872e-05, "loss": 1.2125, "step": 2409 }, { "epoch": 0.6715898007524035, "grad_norm": 0.023868823249426418, "learning_rate": 8.819504547291446e-05, "loss": 1.1254, "step": 2410 }, { "epoch": 0.6718684687195207, "grad_norm": 0.03067677321303241, "learning_rate": 8.806209913962237e-05, "loss": 1.2136, "step": 2411 }, { "epoch": 0.6721471366866378, "grad_norm": 0.025294713591541125, "learning_rate": 8.792921143635857e-05, "loss": 1.1379, "step": 2412 }, { "epoch": 0.672425804653755, "grad_norm": 0.02299676207952963, "learning_rate": 8.77963824889137e-05, "loss": 1.1452, "step": 2413 }, { "epoch": 0.6727044726208722, "grad_norm": 0.026191535754364066, "learning_rate": 8.766361242302285e-05, "loss": 1.1774, "step": 2414 }, { "epoch": 0.6729831405879894, "grad_norm": 0.022920425707628287, "learning_rate": 8.753090136436529e-05, "loss": 1.1618, "step": 2415 }, { "epoch": 0.6732618085551066, "grad_norm": 0.024722026385493465, "learning_rate": 8.739824943856445e-05, "loss": 1.205, "step": 2416 }, { "epoch": 0.6735404765222238, "grad_norm": 0.02252802244442811, "learning_rate": 8.726565677118788e-05, "loss": 1.1342, "step": 2417 }, { "epoch": 0.673819144489341, "grad_norm": 0.021823145129928168, "learning_rate": 8.713312348774699e-05, "loss": 1.1251, "step": 2418 }, { "epoch": 0.6740978124564582, "grad_norm": 0.02382175604868723, "learning_rate": 8.700064971369685e-05, "loss": 1.1657, "step": 2419 }, { "epoch": 0.6743764804235753, "grad_norm": 0.0243244534705506, "learning_rate": 8.686823557443632e-05, "loss": 1.1786, "step": 2420 }, { "epoch": 0.6746551483906925, "grad_norm": 0.021813046818546487, "learning_rate": 8.673588119530794e-05, "loss": 1.1321, "step": 2421 }, { "epoch": 0.6749338163578097, "grad_norm": 0.023549527509898607, "learning_rate": 8.660358670159734e-05, "loss": 1.1176, "step": 2422 }, { "epoch": 0.6752124843249269, "grad_norm": 0.02237011466192232, "learning_rate": 8.647135221853375e-05, "loss": 1.0696, "step": 2423 }, { "epoch": 0.675491152292044, "grad_norm": 0.022281854509367913, "learning_rate": 8.63391778712895e-05, "loss": 1.2188, "step": 2424 }, { "epoch": 0.6757698202591612, "grad_norm": 0.022989733977463775, "learning_rate": 8.620706378498003e-05, "loss": 1.1318, "step": 2425 }, { "epoch": 0.6760484882262784, "grad_norm": 0.022754857628362646, "learning_rate": 8.607501008466365e-05, "loss": 1.153, "step": 2426 }, { "epoch": 0.6763271561933956, "grad_norm": 0.023588189437868237, "learning_rate": 8.59430168953416e-05, "loss": 1.1577, "step": 2427 }, { "epoch": 0.6766058241605127, "grad_norm": 0.02415735428559928, "learning_rate": 8.581108434195768e-05, "loss": 1.1679, "step": 2428 }, { "epoch": 0.6768844921276299, "grad_norm": 0.022682097769738306, "learning_rate": 8.567921254939865e-05, "loss": 1.1225, "step": 2429 }, { "epoch": 0.6771631600947471, "grad_norm": 0.023357403790050257, "learning_rate": 8.554740164249331e-05, "loss": 1.1143, "step": 2430 }, { "epoch": 0.6774418280618643, "grad_norm": 0.02325953918422997, "learning_rate": 8.54156517460132e-05, "loss": 1.1787, "step": 2431 }, { "epoch": 0.6777204960289814, "grad_norm": 0.022926447129331604, "learning_rate": 8.528396298467181e-05, "loss": 1.1321, "step": 2432 }, { "epoch": 0.6779991639960986, "grad_norm": 0.021171450420683906, "learning_rate": 8.515233548312502e-05, "loss": 1.1368, "step": 2433 }, { "epoch": 0.6782778319632158, "grad_norm": 0.02157342290207039, "learning_rate": 8.502076936597038e-05, "loss": 1.2297, "step": 2434 }, { "epoch": 0.678556499930333, "grad_norm": 0.027206524269361227, "learning_rate": 8.488926475774785e-05, "loss": 1.1849, "step": 2435 }, { "epoch": 0.6788351678974502, "grad_norm": 0.024507467340554933, "learning_rate": 8.475782178293866e-05, "loss": 1.1317, "step": 2436 }, { "epoch": 0.6791138358645674, "grad_norm": 0.02445340426564814, "learning_rate": 8.462644056596599e-05, "loss": 1.1566, "step": 2437 }, { "epoch": 0.6793925038316846, "grad_norm": 0.022895449760898082, "learning_rate": 8.449512123119442e-05, "loss": 1.1989, "step": 2438 }, { "epoch": 0.6796711717988018, "grad_norm": 0.02455742372644942, "learning_rate": 8.436386390293007e-05, "loss": 1.1846, "step": 2439 }, { "epoch": 0.6799498397659189, "grad_norm": 0.023214679192174924, "learning_rate": 8.423266870542015e-05, "loss": 1.1473, "step": 2440 }, { "epoch": 0.6802285077330361, "grad_norm": 0.02392547111468261, "learning_rate": 8.410153576285347e-05, "loss": 1.1623, "step": 2441 }, { "epoch": 0.6805071757001533, "grad_norm": 0.02486059134534761, "learning_rate": 8.397046519935946e-05, "loss": 1.1881, "step": 2442 }, { "epoch": 0.6807858436672705, "grad_norm": 0.023597159830475823, "learning_rate": 8.383945713900877e-05, "loss": 1.0719, "step": 2443 }, { "epoch": 0.6810645116343876, "grad_norm": 0.02326859900919008, "learning_rate": 8.370851170581274e-05, "loss": 1.153, "step": 2444 }, { "epoch": 0.6813431796015048, "grad_norm": 0.021845649370867245, "learning_rate": 8.35776290237236e-05, "loss": 1.106, "step": 2445 }, { "epoch": 0.681621847568622, "grad_norm": 0.023120831829528576, "learning_rate": 8.344680921663387e-05, "loss": 1.1388, "step": 2446 }, { "epoch": 0.6819005155357392, "grad_norm": 0.025177672564154787, "learning_rate": 8.331605240837705e-05, "loss": 1.1777, "step": 2447 }, { "epoch": 0.6821791835028563, "grad_norm": 0.024089725467592638, "learning_rate": 8.318535872272647e-05, "loss": 1.2143, "step": 2448 }, { "epoch": 0.6824578514699735, "grad_norm": 0.026182900737876647, "learning_rate": 8.305472828339615e-05, "loss": 1.161, "step": 2449 }, { "epoch": 0.6827365194370907, "grad_norm": 0.021740049085443712, "learning_rate": 8.292416121403986e-05, "loss": 1.0737, "step": 2450 }, { "epoch": 0.6830151874042079, "grad_norm": 0.021624468973849897, "learning_rate": 8.279365763825171e-05, "loss": 1.1153, "step": 2451 }, { "epoch": 0.683293855371325, "grad_norm": 0.022911228409059903, "learning_rate": 8.266321767956538e-05, "loss": 1.1256, "step": 2452 }, { "epoch": 0.6835725233384422, "grad_norm": 0.023944523209982447, "learning_rate": 8.253284146145477e-05, "loss": 1.1582, "step": 2453 }, { "epoch": 0.6838511913055594, "grad_norm": 0.024601809432827724, "learning_rate": 8.240252910733301e-05, "loss": 1.1589, "step": 2454 }, { "epoch": 0.6841298592726766, "grad_norm": 0.023355678687708985, "learning_rate": 8.227228074055304e-05, "loss": 1.1243, "step": 2455 }, { "epoch": 0.6844085272397938, "grad_norm": 0.02316748605192898, "learning_rate": 8.214209648440708e-05, "loss": 1.1066, "step": 2456 }, { "epoch": 0.684687195206911, "grad_norm": 0.024058052322912152, "learning_rate": 8.201197646212684e-05, "loss": 1.2006, "step": 2457 }, { "epoch": 0.6849658631740282, "grad_norm": 0.023866025704515306, "learning_rate": 8.188192079688289e-05, "loss": 1.156, "step": 2458 }, { "epoch": 0.6852445311411454, "grad_norm": 0.02199258929940852, "learning_rate": 8.17519296117854e-05, "loss": 1.1104, "step": 2459 }, { "epoch": 0.6855231991082625, "grad_norm": 0.02451764521772099, "learning_rate": 8.162200302988298e-05, "loss": 1.1457, "step": 2460 }, { "epoch": 0.6858018670753797, "grad_norm": 0.02486902471339864, "learning_rate": 8.14921411741635e-05, "loss": 1.1708, "step": 2461 }, { "epoch": 0.6860805350424969, "grad_norm": 0.021152912756297083, "learning_rate": 8.13623441675532e-05, "loss": 1.1298, "step": 2462 }, { "epoch": 0.6863592030096141, "grad_norm": 0.022046685877350005, "learning_rate": 8.123261213291723e-05, "loss": 1.122, "step": 2463 }, { "epoch": 0.6866378709767312, "grad_norm": 0.020851392954539052, "learning_rate": 8.110294519305906e-05, "loss": 1.1181, "step": 2464 }, { "epoch": 0.6869165389438484, "grad_norm": 0.02319390679467996, "learning_rate": 8.097334347072068e-05, "loss": 1.1763, "step": 2465 }, { "epoch": 0.6871952069109656, "grad_norm": 0.022335919769829883, "learning_rate": 8.084380708858216e-05, "loss": 1.1334, "step": 2466 }, { "epoch": 0.6874738748780828, "grad_norm": 0.024161619476602034, "learning_rate": 8.071433616926189e-05, "loss": 1.1577, "step": 2467 }, { "epoch": 0.6877525428451999, "grad_norm": 0.023102395314946473, "learning_rate": 8.058493083531627e-05, "loss": 1.2356, "step": 2468 }, { "epoch": 0.6880312108123171, "grad_norm": 0.023262269113093655, "learning_rate": 8.045559120923947e-05, "loss": 1.1445, "step": 2469 }, { "epoch": 0.6883098787794343, "grad_norm": 0.024156101880678307, "learning_rate": 8.03263174134637e-05, "loss": 1.1364, "step": 2470 }, { "epoch": 0.6885885467465515, "grad_norm": 0.02228032412653933, "learning_rate": 8.019710957035857e-05, "loss": 1.1379, "step": 2471 }, { "epoch": 0.6888672147136686, "grad_norm": 0.022321965736314744, "learning_rate": 8.006796780223151e-05, "loss": 1.1045, "step": 2472 }, { "epoch": 0.6891458826807858, "grad_norm": 0.026096096722124564, "learning_rate": 7.993889223132728e-05, "loss": 1.1503, "step": 2473 }, { "epoch": 0.689424550647903, "grad_norm": 0.022194516421039687, "learning_rate": 7.980988297982808e-05, "loss": 1.174, "step": 2474 }, { "epoch": 0.6897032186150202, "grad_norm": 0.024537804160735283, "learning_rate": 7.968094016985311e-05, "loss": 1.1868, "step": 2475 }, { "epoch": 0.6899818865821374, "grad_norm": 0.022597188676594864, "learning_rate": 7.9552063923459e-05, "loss": 1.059, "step": 2476 }, { "epoch": 0.6902605545492546, "grad_norm": 0.021936039272359355, "learning_rate": 7.942325436263897e-05, "loss": 1.0949, "step": 2477 }, { "epoch": 0.6905392225163718, "grad_norm": 0.022826998067351518, "learning_rate": 7.929451160932362e-05, "loss": 1.2097, "step": 2478 }, { "epoch": 0.690817890483489, "grad_norm": 0.022336964343370694, "learning_rate": 7.916583578537981e-05, "loss": 1.2002, "step": 2479 }, { "epoch": 0.6910965584506061, "grad_norm": 0.022551419034432633, "learning_rate": 7.903722701261141e-05, "loss": 1.1616, "step": 2480 }, { "epoch": 0.6913752264177233, "grad_norm": 0.02503991209503786, "learning_rate": 7.890868541275853e-05, "loss": 1.1262, "step": 2481 }, { "epoch": 0.6916538943848405, "grad_norm": 0.023870869504969545, "learning_rate": 7.878021110749802e-05, "loss": 1.1332, "step": 2482 }, { "epoch": 0.6919325623519577, "grad_norm": 0.022165125354595347, "learning_rate": 7.865180421844262e-05, "loss": 1.1616, "step": 2483 }, { "epoch": 0.6922112303190748, "grad_norm": 0.023197023843744412, "learning_rate": 7.852346486714174e-05, "loss": 1.0879, "step": 2484 }, { "epoch": 0.692489898286192, "grad_norm": 0.022858740716002607, "learning_rate": 7.839519317508045e-05, "loss": 1.1231, "step": 2485 }, { "epoch": 0.6927685662533092, "grad_norm": 0.021650343305296606, "learning_rate": 7.826698926367999e-05, "loss": 1.0915, "step": 2486 }, { "epoch": 0.6930472342204264, "grad_norm": 0.023926036731604226, "learning_rate": 7.813885325429731e-05, "loss": 1.0918, "step": 2487 }, { "epoch": 0.6933259021875435, "grad_norm": 0.02334941857124673, "learning_rate": 7.801078526822528e-05, "loss": 1.145, "step": 2488 }, { "epoch": 0.6936045701546607, "grad_norm": 0.021878926072673503, "learning_rate": 7.788278542669201e-05, "loss": 1.1447, "step": 2489 }, { "epoch": 0.6938832381217779, "grad_norm": 0.022996775353206628, "learning_rate": 7.775485385086165e-05, "loss": 1.1018, "step": 2490 }, { "epoch": 0.694161906088895, "grad_norm": 0.024383885449505604, "learning_rate": 7.762699066183321e-05, "loss": 1.1145, "step": 2491 }, { "epoch": 0.6944405740560122, "grad_norm": 0.0237203427565341, "learning_rate": 7.749919598064133e-05, "loss": 1.2659, "step": 2492 }, { "epoch": 0.6947192420231294, "grad_norm": 0.022636565382053705, "learning_rate": 7.737146992825548e-05, "loss": 1.1472, "step": 2493 }, { "epoch": 0.6949979099902466, "grad_norm": 0.025232407653600058, "learning_rate": 7.724381262558052e-05, "loss": 1.1988, "step": 2494 }, { "epoch": 0.6952765779573638, "grad_norm": 0.022044782255912585, "learning_rate": 7.711622419345587e-05, "loss": 1.0408, "step": 2495 }, { "epoch": 0.695555245924481, "grad_norm": 0.02405980801224026, "learning_rate": 7.698870475265612e-05, "loss": 1.0905, "step": 2496 }, { "epoch": 0.6958339138915982, "grad_norm": 0.023853652725027437, "learning_rate": 7.686125442389025e-05, "loss": 1.1719, "step": 2497 }, { "epoch": 0.6961125818587154, "grad_norm": 0.022761266247656585, "learning_rate": 7.673387332780204e-05, "loss": 1.1741, "step": 2498 }, { "epoch": 0.6963912498258326, "grad_norm": 0.02425492349374519, "learning_rate": 7.660656158496952e-05, "loss": 1.1108, "step": 2499 }, { "epoch": 0.6966699177929497, "grad_norm": 0.023136806826857675, "learning_rate": 7.647931931590531e-05, "loss": 1.1887, "step": 2500 }, { "epoch": 0.6969485857600669, "grad_norm": 0.02292544741174742, "learning_rate": 7.635214664105595e-05, "loss": 1.1189, "step": 2501 }, { "epoch": 0.6972272537271841, "grad_norm": 0.0216405030436629, "learning_rate": 7.622504368080256e-05, "loss": 1.0784, "step": 2502 }, { "epoch": 0.6975059216943013, "grad_norm": 0.022966165870419424, "learning_rate": 7.609801055545979e-05, "loss": 1.1629, "step": 2503 }, { "epoch": 0.6977845896614184, "grad_norm": 0.022386859028974675, "learning_rate": 7.597104738527652e-05, "loss": 1.1145, "step": 2504 }, { "epoch": 0.6980632576285356, "grad_norm": 0.02364572238134356, "learning_rate": 7.584415429043522e-05, "loss": 1.1456, "step": 2505 }, { "epoch": 0.6983419255956528, "grad_norm": 0.022202234207159853, "learning_rate": 7.571733139105213e-05, "loss": 1.1387, "step": 2506 }, { "epoch": 0.69862059356277, "grad_norm": 0.02235238086126314, "learning_rate": 7.559057880717698e-05, "loss": 1.1239, "step": 2507 }, { "epoch": 0.6988992615298871, "grad_norm": 0.02199811110618228, "learning_rate": 7.546389665879308e-05, "loss": 1.0962, "step": 2508 }, { "epoch": 0.6991779294970043, "grad_norm": 0.02313681875853379, "learning_rate": 7.533728506581679e-05, "loss": 1.1803, "step": 2509 }, { "epoch": 0.6994565974641215, "grad_norm": 0.022525904930456177, "learning_rate": 7.521074414809801e-05, "loss": 1.1845, "step": 2510 }, { "epoch": 0.6997352654312387, "grad_norm": 0.023074170745320265, "learning_rate": 7.508427402541944e-05, "loss": 1.0788, "step": 2511 }, { "epoch": 0.7000139333983558, "grad_norm": 0.023651679783490184, "learning_rate": 7.495787481749696e-05, "loss": 1.0922, "step": 2512 }, { "epoch": 0.700292601365473, "grad_norm": 0.025818345273812104, "learning_rate": 7.483154664397928e-05, "loss": 1.1436, "step": 2513 }, { "epoch": 0.7005712693325902, "grad_norm": 0.022701229857907294, "learning_rate": 7.47052896244479e-05, "loss": 1.045, "step": 2514 }, { "epoch": 0.7008499372997073, "grad_norm": 0.022865289193205005, "learning_rate": 7.457910387841682e-05, "loss": 1.1658, "step": 2515 }, { "epoch": 0.7011286052668246, "grad_norm": 0.02327778445176115, "learning_rate": 7.445298952533272e-05, "loss": 1.129, "step": 2516 }, { "epoch": 0.7014072732339418, "grad_norm": 0.021525589350867416, "learning_rate": 7.432694668457472e-05, "loss": 1.2038, "step": 2517 }, { "epoch": 0.701685941201059, "grad_norm": 0.024475996965397283, "learning_rate": 7.420097547545408e-05, "loss": 1.144, "step": 2518 }, { "epoch": 0.7019646091681762, "grad_norm": 0.02368483887885525, "learning_rate": 7.407507601721439e-05, "loss": 1.1574, "step": 2519 }, { "epoch": 0.7022432771352933, "grad_norm": 0.02603947371172219, "learning_rate": 7.39492484290313e-05, "loss": 1.1678, "step": 2520 }, { "epoch": 0.7025219451024105, "grad_norm": 0.0228078898864621, "learning_rate": 7.382349283001248e-05, "loss": 1.1198, "step": 2521 }, { "epoch": 0.7028006130695277, "grad_norm": 0.0233956141284736, "learning_rate": 7.369780933919725e-05, "loss": 1.1815, "step": 2522 }, { "epoch": 0.7030792810366449, "grad_norm": 0.02201422075580679, "learning_rate": 7.357219807555695e-05, "loss": 1.1027, "step": 2523 }, { "epoch": 0.703357949003762, "grad_norm": 0.022780430836709727, "learning_rate": 7.344665915799429e-05, "loss": 1.1459, "step": 2524 }, { "epoch": 0.7036366169708792, "grad_norm": 0.022019689822933466, "learning_rate": 7.33211927053437e-05, "loss": 1.1273, "step": 2525 }, { "epoch": 0.7039152849379964, "grad_norm": 0.024993719719648394, "learning_rate": 7.31957988363709e-05, "loss": 1.1614, "step": 2526 }, { "epoch": 0.7041939529051136, "grad_norm": 0.023566552964594426, "learning_rate": 7.307047766977303e-05, "loss": 1.1272, "step": 2527 }, { "epoch": 0.7044726208722307, "grad_norm": 0.02312538051822837, "learning_rate": 7.294522932417818e-05, "loss": 1.1624, "step": 2528 }, { "epoch": 0.7047512888393479, "grad_norm": 0.022386456621894316, "learning_rate": 7.28200539181458e-05, "loss": 1.1937, "step": 2529 }, { "epoch": 0.7050299568064651, "grad_norm": 0.02354378512680468, "learning_rate": 7.269495157016599e-05, "loss": 1.1768, "step": 2530 }, { "epoch": 0.7053086247735822, "grad_norm": 0.024957105031091234, "learning_rate": 7.256992239865991e-05, "loss": 1.1415, "step": 2531 }, { "epoch": 0.7055872927406994, "grad_norm": 0.023096051014604976, "learning_rate": 7.244496652197941e-05, "loss": 1.173, "step": 2532 }, { "epoch": 0.7058659607078166, "grad_norm": 0.021872632024829742, "learning_rate": 7.232008405840696e-05, "loss": 1.0733, "step": 2533 }, { "epoch": 0.7061446286749338, "grad_norm": 0.02332400232110203, "learning_rate": 7.219527512615544e-05, "loss": 1.1427, "step": 2534 }, { "epoch": 0.706423296642051, "grad_norm": 0.022920928200629517, "learning_rate": 7.207053984336831e-05, "loss": 1.1742, "step": 2535 }, { "epoch": 0.7067019646091681, "grad_norm": 0.02301055824285117, "learning_rate": 7.194587832811909e-05, "loss": 1.1191, "step": 2536 }, { "epoch": 0.7069806325762854, "grad_norm": 0.022822579268391912, "learning_rate": 7.182129069841167e-05, "loss": 1.0901, "step": 2537 }, { "epoch": 0.7072593005434026, "grad_norm": 0.024528123008961684, "learning_rate": 7.16967770721798e-05, "loss": 1.1241, "step": 2538 }, { "epoch": 0.7075379685105198, "grad_norm": 0.022854797890407303, "learning_rate": 7.157233756728748e-05, "loss": 1.0702, "step": 2539 }, { "epoch": 0.7078166364776369, "grad_norm": 0.02410486094215057, "learning_rate": 7.144797230152823e-05, "loss": 1.0841, "step": 2540 }, { "epoch": 0.7080953044447541, "grad_norm": 0.022156147014005666, "learning_rate": 7.13236813926255e-05, "loss": 1.1388, "step": 2541 }, { "epoch": 0.7083739724118713, "grad_norm": 0.02486839854805072, "learning_rate": 7.119946495823223e-05, "loss": 1.2365, "step": 2542 }, { "epoch": 0.7086526403789885, "grad_norm": 0.02382781836544741, "learning_rate": 7.107532311593098e-05, "loss": 1.1608, "step": 2543 }, { "epoch": 0.7089313083461056, "grad_norm": 0.024168605623370424, "learning_rate": 7.095125598323348e-05, "loss": 1.1528, "step": 2544 }, { "epoch": 0.7092099763132228, "grad_norm": 0.022978217219455605, "learning_rate": 7.082726367758113e-05, "loss": 1.1518, "step": 2545 }, { "epoch": 0.70948864428034, "grad_norm": 0.021557052725701346, "learning_rate": 7.070334631634411e-05, "loss": 1.1192, "step": 2546 }, { "epoch": 0.7097673122474571, "grad_norm": 0.029824760221313886, "learning_rate": 7.057950401682192e-05, "loss": 1.1134, "step": 2547 }, { "epoch": 0.7100459802145743, "grad_norm": 0.022414267343869122, "learning_rate": 7.045573689624279e-05, "loss": 1.1093, "step": 2548 }, { "epoch": 0.7103246481816915, "grad_norm": 0.024220491788287602, "learning_rate": 7.033204507176402e-05, "loss": 1.1285, "step": 2549 }, { "epoch": 0.7106033161488087, "grad_norm": 0.022201659954700585, "learning_rate": 7.020842866047133e-05, "loss": 1.1499, "step": 2550 }, { "epoch": 0.7108819841159258, "grad_norm": 0.023167406403368413, "learning_rate": 7.008488777937949e-05, "loss": 1.1752, "step": 2551 }, { "epoch": 0.711160652083043, "grad_norm": 0.02200801896907373, "learning_rate": 6.996142254543132e-05, "loss": 1.0703, "step": 2552 }, { "epoch": 0.7114393200501602, "grad_norm": 0.024749927663073844, "learning_rate": 6.983803307549837e-05, "loss": 1.1665, "step": 2553 }, { "epoch": 0.7117179880172774, "grad_norm": 0.022861514191122194, "learning_rate": 6.971471948638023e-05, "loss": 1.184, "step": 2554 }, { "epoch": 0.7119966559843945, "grad_norm": 0.02272370342769268, "learning_rate": 6.959148189480481e-05, "loss": 1.1314, "step": 2555 }, { "epoch": 0.7122753239515117, "grad_norm": 0.021527429146150627, "learning_rate": 6.946832041742804e-05, "loss": 1.1163, "step": 2556 }, { "epoch": 0.712553991918629, "grad_norm": 0.02203542739379397, "learning_rate": 6.934523517083389e-05, "loss": 1.0754, "step": 2557 }, { "epoch": 0.7128326598857462, "grad_norm": 0.022912679013789364, "learning_rate": 6.922222627153394e-05, "loss": 1.1089, "step": 2558 }, { "epoch": 0.7131113278528634, "grad_norm": 0.022465674892327547, "learning_rate": 6.90992938359677e-05, "loss": 1.1228, "step": 2559 }, { "epoch": 0.7133899958199805, "grad_norm": 0.02305280844460341, "learning_rate": 6.89764379805023e-05, "loss": 1.1165, "step": 2560 }, { "epoch": 0.7136686637870977, "grad_norm": 0.02233027861447786, "learning_rate": 6.88536588214322e-05, "loss": 1.1018, "step": 2561 }, { "epoch": 0.7139473317542149, "grad_norm": 0.021948029801482503, "learning_rate": 6.873095647497947e-05, "loss": 1.1905, "step": 2562 }, { "epoch": 0.714225999721332, "grad_norm": 0.022730303324687454, "learning_rate": 6.860833105729341e-05, "loss": 1.1449, "step": 2563 }, { "epoch": 0.7145046676884492, "grad_norm": 0.023166501949394857, "learning_rate": 6.848578268445037e-05, "loss": 1.1847, "step": 2564 }, { "epoch": 0.7147833356555664, "grad_norm": 0.021949285710131206, "learning_rate": 6.83633114724539e-05, "loss": 1.1692, "step": 2565 }, { "epoch": 0.7150620036226836, "grad_norm": 0.022684400244839798, "learning_rate": 6.82409175372346e-05, "loss": 1.2029, "step": 2566 }, { "epoch": 0.7153406715898007, "grad_norm": 0.022860148288917515, "learning_rate": 6.811860099464962e-05, "loss": 1.1516, "step": 2567 }, { "epoch": 0.7156193395569179, "grad_norm": 0.028379691414117, "learning_rate": 6.799636196048312e-05, "loss": 1.1297, "step": 2568 }, { "epoch": 0.7158980075240351, "grad_norm": 0.023977118138770247, "learning_rate": 6.787420055044579e-05, "loss": 1.1259, "step": 2569 }, { "epoch": 0.7161766754911523, "grad_norm": 0.022384715259903862, "learning_rate": 6.775211688017493e-05, "loss": 1.1384, "step": 2570 }, { "epoch": 0.7164553434582694, "grad_norm": 0.02256855192508628, "learning_rate": 6.763011106523405e-05, "loss": 1.14, "step": 2571 }, { "epoch": 0.7167340114253866, "grad_norm": 0.024871184090839125, "learning_rate": 6.75081832211132e-05, "loss": 1.1037, "step": 2572 }, { "epoch": 0.7170126793925038, "grad_norm": 0.023633041008520546, "learning_rate": 6.738633346322842e-05, "loss": 1.1003, "step": 2573 }, { "epoch": 0.717291347359621, "grad_norm": 0.021396946870557026, "learning_rate": 6.726456190692195e-05, "loss": 1.1462, "step": 2574 }, { "epoch": 0.7175700153267381, "grad_norm": 0.02393407483863036, "learning_rate": 6.7142868667462e-05, "loss": 1.1728, "step": 2575 }, { "epoch": 0.7178486832938553, "grad_norm": 0.025247407435287323, "learning_rate": 6.702125386004268e-05, "loss": 1.1556, "step": 2576 }, { "epoch": 0.7181273512609726, "grad_norm": 0.022186237415983977, "learning_rate": 6.689971759978371e-05, "loss": 1.1503, "step": 2577 }, { "epoch": 0.7184060192280898, "grad_norm": 0.02345473355962643, "learning_rate": 6.677826000173061e-05, "loss": 1.1772, "step": 2578 }, { "epoch": 0.718684687195207, "grad_norm": 0.023019688136789257, "learning_rate": 6.665688118085434e-05, "loss": 1.1287, "step": 2579 }, { "epoch": 0.7189633551623241, "grad_norm": 0.023984210281780344, "learning_rate": 6.653558125205132e-05, "loss": 1.1008, "step": 2580 }, { "epoch": 0.7192420231294413, "grad_norm": 0.02359499771669309, "learning_rate": 6.641436033014335e-05, "loss": 1.1555, "step": 2581 }, { "epoch": 0.7195206910965585, "grad_norm": 0.02139761496269419, "learning_rate": 6.629321852987742e-05, "loss": 1.1542, "step": 2582 }, { "epoch": 0.7197993590636756, "grad_norm": 0.023003709900292386, "learning_rate": 6.61721559659255e-05, "loss": 1.1019, "step": 2583 }, { "epoch": 0.7200780270307928, "grad_norm": 0.02246831375528147, "learning_rate": 6.605117275288475e-05, "loss": 1.1576, "step": 2584 }, { "epoch": 0.72035669499791, "grad_norm": 0.02337691214834454, "learning_rate": 6.593026900527703e-05, "loss": 1.1851, "step": 2585 }, { "epoch": 0.7206353629650272, "grad_norm": 0.02208866850536832, "learning_rate": 6.580944483754911e-05, "loss": 1.1289, "step": 2586 }, { "epoch": 0.7209140309321443, "grad_norm": 0.021816480823587836, "learning_rate": 6.56887003640724e-05, "loss": 1.1047, "step": 2587 }, { "epoch": 0.7211926988992615, "grad_norm": 0.02165593625943852, "learning_rate": 6.556803569914291e-05, "loss": 1.0999, "step": 2588 }, { "epoch": 0.7214713668663787, "grad_norm": 0.022656736349159802, "learning_rate": 6.544745095698093e-05, "loss": 1.1203, "step": 2589 }, { "epoch": 0.7217500348334959, "grad_norm": 0.02666167650081091, "learning_rate": 6.532694625173137e-05, "loss": 1.0956, "step": 2590 }, { "epoch": 0.722028702800613, "grad_norm": 0.02163444648516206, "learning_rate": 6.520652169746307e-05, "loss": 1.1677, "step": 2591 }, { "epoch": 0.7223073707677302, "grad_norm": 0.022024701476811973, "learning_rate": 6.508617740816924e-05, "loss": 1.1191, "step": 2592 }, { "epoch": 0.7225860387348474, "grad_norm": 0.022109497103813987, "learning_rate": 6.496591349776705e-05, "loss": 1.1516, "step": 2593 }, { "epoch": 0.7228647067019646, "grad_norm": 0.022028876502995884, "learning_rate": 6.484573008009755e-05, "loss": 1.077, "step": 2594 }, { "epoch": 0.7231433746690817, "grad_norm": 0.021762174258580822, "learning_rate": 6.472562726892556e-05, "loss": 1.1598, "step": 2595 }, { "epoch": 0.7234220426361989, "grad_norm": 0.022210115558013298, "learning_rate": 6.460560517793975e-05, "loss": 1.0974, "step": 2596 }, { "epoch": 0.7237007106033162, "grad_norm": 0.02273472786089935, "learning_rate": 6.448566392075215e-05, "loss": 1.1298, "step": 2597 }, { "epoch": 0.7239793785704334, "grad_norm": 0.023261162291337244, "learning_rate": 6.436580361089845e-05, "loss": 1.1935, "step": 2598 }, { "epoch": 0.7242580465375505, "grad_norm": 0.022257015939408507, "learning_rate": 6.424602436183768e-05, "loss": 1.1576, "step": 2599 }, { "epoch": 0.7245367145046677, "grad_norm": 0.02337231294482713, "learning_rate": 6.412632628695218e-05, "loss": 1.1336, "step": 2600 }, { "epoch": 0.7248153824717849, "grad_norm": 0.02119649529585324, "learning_rate": 6.400670949954731e-05, "loss": 1.2101, "step": 2601 }, { "epoch": 0.7250940504389021, "grad_norm": 0.02399957619186805, "learning_rate": 6.388717411285164e-05, "loss": 1.1726, "step": 2602 }, { "epoch": 0.7253727184060192, "grad_norm": 0.02200295588461916, "learning_rate": 6.376772024001653e-05, "loss": 1.0668, "step": 2603 }, { "epoch": 0.7256513863731364, "grad_norm": 0.023214268148394827, "learning_rate": 6.364834799411631e-05, "loss": 1.0927, "step": 2604 }, { "epoch": 0.7259300543402536, "grad_norm": 0.021470044587895094, "learning_rate": 6.3529057488148e-05, "loss": 1.1075, "step": 2605 }, { "epoch": 0.7262087223073708, "grad_norm": 0.022911578592240696, "learning_rate": 6.340984883503132e-05, "loss": 1.1456, "step": 2606 }, { "epoch": 0.7264873902744879, "grad_norm": 0.02306408184627339, "learning_rate": 6.329072214760829e-05, "loss": 1.0989, "step": 2607 }, { "epoch": 0.7267660582416051, "grad_norm": 0.022568843852126576, "learning_rate": 6.317167753864357e-05, "loss": 1.0889, "step": 2608 }, { "epoch": 0.7270447262087223, "grad_norm": 0.022270770986498267, "learning_rate": 6.305271512082407e-05, "loss": 1.1897, "step": 2609 }, { "epoch": 0.7273233941758395, "grad_norm": 0.020992273102673733, "learning_rate": 6.293383500675877e-05, "loss": 1.1318, "step": 2610 }, { "epoch": 0.7276020621429566, "grad_norm": 0.021538897880882486, "learning_rate": 6.281503730897889e-05, "loss": 1.1274, "step": 2611 }, { "epoch": 0.7278807301100738, "grad_norm": 0.022606922899304063, "learning_rate": 6.269632213993759e-05, "loss": 1.1549, "step": 2612 }, { "epoch": 0.728159398077191, "grad_norm": 0.025322507367546654, "learning_rate": 6.257768961200997e-05, "loss": 1.1611, "step": 2613 }, { "epoch": 0.7284380660443082, "grad_norm": 0.024636373473464358, "learning_rate": 6.245913983749272e-05, "loss": 1.1758, "step": 2614 }, { "epoch": 0.7287167340114253, "grad_norm": 0.022103413016017735, "learning_rate": 6.234067292860442e-05, "loss": 1.1229, "step": 2615 }, { "epoch": 0.7289954019785425, "grad_norm": 0.025184438290591263, "learning_rate": 6.222228899748497e-05, "loss": 1.1132, "step": 2616 }, { "epoch": 0.7292740699456598, "grad_norm": 0.021818872417873888, "learning_rate": 6.210398815619596e-05, "loss": 1.0917, "step": 2617 }, { "epoch": 0.729552737912777, "grad_norm": 0.022234928332342585, "learning_rate": 6.198577051672017e-05, "loss": 1.105, "step": 2618 }, { "epoch": 0.7298314058798941, "grad_norm": 0.023812905523002106, "learning_rate": 6.186763619096176e-05, "loss": 1.1965, "step": 2619 }, { "epoch": 0.7301100738470113, "grad_norm": 0.022486673266406826, "learning_rate": 6.174958529074584e-05, "loss": 1.0987, "step": 2620 }, { "epoch": 0.7303887418141285, "grad_norm": 0.024064766955427187, "learning_rate": 6.163161792781874e-05, "loss": 1.1493, "step": 2621 }, { "epoch": 0.7306674097812457, "grad_norm": 0.0228956144496198, "learning_rate": 6.151373421384752e-05, "loss": 1.1899, "step": 2622 }, { "epoch": 0.7309460777483628, "grad_norm": 0.022654190491203857, "learning_rate": 6.139593426042024e-05, "loss": 1.0734, "step": 2623 }, { "epoch": 0.73122474571548, "grad_norm": 0.021867992240178418, "learning_rate": 6.127821817904557e-05, "loss": 1.1049, "step": 2624 }, { "epoch": 0.7315034136825972, "grad_norm": 0.02300851800704764, "learning_rate": 6.116058608115288e-05, "loss": 1.1842, "step": 2625 }, { "epoch": 0.7317820816497144, "grad_norm": 0.02257627676693218, "learning_rate": 6.104303807809187e-05, "loss": 1.1048, "step": 2626 }, { "epoch": 0.7320607496168315, "grad_norm": 0.023236550579390818, "learning_rate": 6.0925574281132845e-05, "loss": 1.146, "step": 2627 }, { "epoch": 0.7323394175839487, "grad_norm": 0.02276535143860846, "learning_rate": 6.080819480146619e-05, "loss": 1.1546, "step": 2628 }, { "epoch": 0.7326180855510659, "grad_norm": 0.02313974056659426, "learning_rate": 6.069089975020263e-05, "loss": 1.1037, "step": 2629 }, { "epoch": 0.7328967535181831, "grad_norm": 0.02200452446749388, "learning_rate": 6.057368923837295e-05, "loss": 1.1154, "step": 2630 }, { "epoch": 0.7331754214853002, "grad_norm": 0.023311380891619982, "learning_rate": 6.04565633769279e-05, "loss": 1.1467, "step": 2631 }, { "epoch": 0.7334540894524174, "grad_norm": 0.023810641371397913, "learning_rate": 6.033952227673801e-05, "loss": 1.0993, "step": 2632 }, { "epoch": 0.7337327574195346, "grad_norm": 0.02172628369417332, "learning_rate": 6.022256604859377e-05, "loss": 1.1536, "step": 2633 }, { "epoch": 0.7340114253866518, "grad_norm": 0.022542111784615647, "learning_rate": 6.010569480320506e-05, "loss": 1.1371, "step": 2634 }, { "epoch": 0.7342900933537689, "grad_norm": 0.021960363534420095, "learning_rate": 5.998890865120155e-05, "loss": 1.0837, "step": 2635 }, { "epoch": 0.7345687613208861, "grad_norm": 0.023433067451307684, "learning_rate": 5.9872207703132266e-05, "loss": 1.1332, "step": 2636 }, { "epoch": 0.7348474292880034, "grad_norm": 0.022468537036606454, "learning_rate": 5.9755592069465666e-05, "loss": 1.1505, "step": 2637 }, { "epoch": 0.7351260972551206, "grad_norm": 0.02254464644581653, "learning_rate": 5.963906186058925e-05, "loss": 1.1452, "step": 2638 }, { "epoch": 0.7354047652222377, "grad_norm": 0.022997463316068525, "learning_rate": 5.952261718680989e-05, "loss": 1.1226, "step": 2639 }, { "epoch": 0.7356834331893549, "grad_norm": 0.022770048143975957, "learning_rate": 5.9406258158353297e-05, "loss": 1.0957, "step": 2640 }, { "epoch": 0.7359621011564721, "grad_norm": 0.023221755737691436, "learning_rate": 5.928998488536423e-05, "loss": 1.1556, "step": 2641 }, { "epoch": 0.7362407691235893, "grad_norm": 0.02202778130275446, "learning_rate": 5.917379747790625e-05, "loss": 1.1486, "step": 2642 }, { "epoch": 0.7365194370907064, "grad_norm": 0.022325272511762206, "learning_rate": 5.9057696045961686e-05, "loss": 1.2083, "step": 2643 }, { "epoch": 0.7367981050578236, "grad_norm": 0.024294975467796295, "learning_rate": 5.8941680699431294e-05, "loss": 1.1813, "step": 2644 }, { "epoch": 0.7370767730249408, "grad_norm": 0.02329298475629247, "learning_rate": 5.8825751548134605e-05, "loss": 1.2311, "step": 2645 }, { "epoch": 0.737355440992058, "grad_norm": 0.024093228714020497, "learning_rate": 5.87099087018093e-05, "loss": 1.1129, "step": 2646 }, { "epoch": 0.7376341089591751, "grad_norm": 0.02410424299380222, "learning_rate": 5.859415227011157e-05, "loss": 1.1248, "step": 2647 }, { "epoch": 0.7379127769262923, "grad_norm": 0.022834803435329343, "learning_rate": 5.847848236261569e-05, "loss": 1.1495, "step": 2648 }, { "epoch": 0.7381914448934095, "grad_norm": 0.021478216150506567, "learning_rate": 5.8362899088814156e-05, "loss": 1.1498, "step": 2649 }, { "epoch": 0.7384701128605267, "grad_norm": 0.024058603779737238, "learning_rate": 5.8247402558117245e-05, "loss": 1.2475, "step": 2650 }, { "epoch": 0.7387487808276438, "grad_norm": 0.022773212468464216, "learning_rate": 5.813199287985331e-05, "loss": 1.1058, "step": 2651 }, { "epoch": 0.739027448794761, "grad_norm": 0.0225292837992, "learning_rate": 5.801667016326847e-05, "loss": 1.1585, "step": 2652 }, { "epoch": 0.7393061167618782, "grad_norm": 0.022725338103894203, "learning_rate": 5.79014345175264e-05, "loss": 1.0796, "step": 2653 }, { "epoch": 0.7395847847289954, "grad_norm": 0.02186186509519754, "learning_rate": 5.778628605170847e-05, "loss": 1.1694, "step": 2654 }, { "epoch": 0.7398634526961125, "grad_norm": 0.022772137620546928, "learning_rate": 5.76712248748135e-05, "loss": 1.1155, "step": 2655 }, { "epoch": 0.7401421206632297, "grad_norm": 0.0243521158825889, "learning_rate": 5.755625109575774e-05, "loss": 1.0677, "step": 2656 }, { "epoch": 0.740420788630347, "grad_norm": 0.025081747995364823, "learning_rate": 5.744136482337453e-05, "loss": 1.0581, "step": 2657 }, { "epoch": 0.7406994565974642, "grad_norm": 0.024777360274811568, "learning_rate": 5.732656616641462e-05, "loss": 1.1287, "step": 2658 }, { "epoch": 0.7409781245645813, "grad_norm": 0.022316975048639144, "learning_rate": 5.721185523354556e-05, "loss": 1.1366, "step": 2659 }, { "epoch": 0.7412567925316985, "grad_norm": 0.020908706588377242, "learning_rate": 5.709723213335209e-05, "loss": 1.1085, "step": 2660 }, { "epoch": 0.7415354604988157, "grad_norm": 0.022224298822557997, "learning_rate": 5.698269697433569e-05, "loss": 1.1275, "step": 2661 }, { "epoch": 0.7418141284659329, "grad_norm": 0.02289209173696962, "learning_rate": 5.6868249864914686e-05, "loss": 1.0948, "step": 2662 }, { "epoch": 0.74209279643305, "grad_norm": 0.022174512995600266, "learning_rate": 5.6753890913423896e-05, "loss": 1.0812, "step": 2663 }, { "epoch": 0.7423714644001672, "grad_norm": 0.023720438029535877, "learning_rate": 5.6639620228114863e-05, "loss": 1.1323, "step": 2664 }, { "epoch": 0.7426501323672844, "grad_norm": 0.022152960973658976, "learning_rate": 5.652543791715537e-05, "loss": 1.1375, "step": 2665 }, { "epoch": 0.7429288003344016, "grad_norm": 0.021408759085330627, "learning_rate": 5.6411344088629875e-05, "loss": 1.0887, "step": 2666 }, { "epoch": 0.7432074683015187, "grad_norm": 0.021832863759412544, "learning_rate": 5.629733885053873e-05, "loss": 1.0724, "step": 2667 }, { "epoch": 0.7434861362686359, "grad_norm": 0.02305378915505336, "learning_rate": 5.618342231079867e-05, "loss": 1.1585, "step": 2668 }, { "epoch": 0.7437648042357531, "grad_norm": 0.02368454328484694, "learning_rate": 5.606959457724228e-05, "loss": 1.133, "step": 2669 }, { "epoch": 0.7440434722028703, "grad_norm": 0.021609918826860565, "learning_rate": 5.5955855757618294e-05, "loss": 1.1323, "step": 2670 }, { "epoch": 0.7443221401699874, "grad_norm": 0.022373837321304522, "learning_rate": 5.5842205959591055e-05, "loss": 1.2713, "step": 2671 }, { "epoch": 0.7446008081371046, "grad_norm": 0.02326666742036033, "learning_rate": 5.572864529074078e-05, "loss": 1.1365, "step": 2672 }, { "epoch": 0.7448794761042218, "grad_norm": 0.02305449252496421, "learning_rate": 5.56151738585633e-05, "loss": 1.1968, "step": 2673 }, { "epoch": 0.745158144071339, "grad_norm": 0.02333477665635562, "learning_rate": 5.5501791770470025e-05, "loss": 1.1326, "step": 2674 }, { "epoch": 0.7454368120384561, "grad_norm": 0.02212370777045585, "learning_rate": 5.53884991337876e-05, "loss": 1.0967, "step": 2675 }, { "epoch": 0.7457154800055733, "grad_norm": 0.022209050769604707, "learning_rate": 5.5275296055758246e-05, "loss": 1.0922, "step": 2676 }, { "epoch": 0.7459941479726906, "grad_norm": 0.022646255044694696, "learning_rate": 5.516218264353917e-05, "loss": 1.2034, "step": 2677 }, { "epoch": 0.7462728159398078, "grad_norm": 0.025251376334594938, "learning_rate": 5.504915900420289e-05, "loss": 1.1969, "step": 2678 }, { "epoch": 0.7465514839069249, "grad_norm": 0.021911610700456115, "learning_rate": 5.493622524473686e-05, "loss": 1.1592, "step": 2679 }, { "epoch": 0.7468301518740421, "grad_norm": 0.024111475635322784, "learning_rate": 5.482338147204351e-05, "loss": 1.1449, "step": 2680 }, { "epoch": 0.7471088198411593, "grad_norm": 0.024571593812799327, "learning_rate": 5.471062779293995e-05, "loss": 1.171, "step": 2681 }, { "epoch": 0.7473874878082765, "grad_norm": 0.022271551359577795, "learning_rate": 5.4597964314158235e-05, "loss": 1.1123, "step": 2682 }, { "epoch": 0.7476661557753936, "grad_norm": 0.02150280352276408, "learning_rate": 5.4485391142344755e-05, "loss": 1.1644, "step": 2683 }, { "epoch": 0.7479448237425108, "grad_norm": 0.02438822895301949, "learning_rate": 5.437290838406065e-05, "loss": 1.1518, "step": 2684 }, { "epoch": 0.748223491709628, "grad_norm": 0.023413697738132403, "learning_rate": 5.4260516145781415e-05, "loss": 1.1306, "step": 2685 }, { "epoch": 0.7485021596767452, "grad_norm": 0.027072964756310887, "learning_rate": 5.414821453389684e-05, "loss": 1.1123, "step": 2686 }, { "epoch": 0.7487808276438623, "grad_norm": 0.023966525670252557, "learning_rate": 5.403600365471085e-05, "loss": 1.1342, "step": 2687 }, { "epoch": 0.7490594956109795, "grad_norm": 0.02429850516357251, "learning_rate": 5.392388361444165e-05, "loss": 1.1789, "step": 2688 }, { "epoch": 0.7493381635780967, "grad_norm": 0.02335152850860452, "learning_rate": 5.381185451922129e-05, "loss": 1.216, "step": 2689 }, { "epoch": 0.7496168315452139, "grad_norm": 0.023329774313187137, "learning_rate": 5.3699916475095835e-05, "loss": 1.157, "step": 2690 }, { "epoch": 0.749895499512331, "grad_norm": 0.025552025998699258, "learning_rate": 5.358806958802514e-05, "loss": 1.2017, "step": 2691 }, { "epoch": 0.7501741674794482, "grad_norm": 0.021878685710625238, "learning_rate": 5.347631396388281e-05, "loss": 1.0883, "step": 2692 }, { "epoch": 0.7504528354465654, "grad_norm": 0.02317087491901822, "learning_rate": 5.336464970845594e-05, "loss": 1.1533, "step": 2693 }, { "epoch": 0.7507315034136826, "grad_norm": 0.022853164340187415, "learning_rate": 5.325307692744524e-05, "loss": 1.1608, "step": 2694 }, { "epoch": 0.7510101713807997, "grad_norm": 0.02354511870805471, "learning_rate": 5.314159572646489e-05, "loss": 1.1482, "step": 2695 }, { "epoch": 0.7512888393479169, "grad_norm": 0.021735169802639148, "learning_rate": 5.3030206211042144e-05, "loss": 1.0875, "step": 2696 }, { "epoch": 0.7515675073150342, "grad_norm": 0.022697940752871938, "learning_rate": 5.2918908486617685e-05, "loss": 1.2416, "step": 2697 }, { "epoch": 0.7518461752821514, "grad_norm": 0.02365311730830597, "learning_rate": 5.280770265854532e-05, "loss": 1.176, "step": 2698 }, { "epoch": 0.7521248432492685, "grad_norm": 0.022852828723891053, "learning_rate": 5.2696588832091645e-05, "loss": 1.0947, "step": 2699 }, { "epoch": 0.7524035112163857, "grad_norm": 0.02133972214882993, "learning_rate": 5.25855671124364e-05, "loss": 1.1281, "step": 2700 }, { "epoch": 0.7526821791835029, "grad_norm": 0.02434654033826499, "learning_rate": 5.247463760467208e-05, "loss": 1.1867, "step": 2701 }, { "epoch": 0.7529608471506201, "grad_norm": 0.023079309760506345, "learning_rate": 5.2363800413803745e-05, "loss": 1.1403, "step": 2702 }, { "epoch": 0.7532395151177372, "grad_norm": 0.023451190900643324, "learning_rate": 5.225305564474928e-05, "loss": 1.1005, "step": 2703 }, { "epoch": 0.7535181830848544, "grad_norm": 0.022143037187814203, "learning_rate": 5.214240340233895e-05, "loss": 1.1101, "step": 2704 }, { "epoch": 0.7537968510519716, "grad_norm": 0.02148300959769039, "learning_rate": 5.2031843791315516e-05, "loss": 1.0763, "step": 2705 }, { "epoch": 0.7540755190190888, "grad_norm": 0.026036193063799496, "learning_rate": 5.192137691633394e-05, "loss": 1.0674, "step": 2706 }, { "epoch": 0.7543541869862059, "grad_norm": 0.02330278982129682, "learning_rate": 5.181100288196156e-05, "loss": 1.1531, "step": 2707 }, { "epoch": 0.7546328549533231, "grad_norm": 0.024892265715972944, "learning_rate": 5.170072179267759e-05, "loss": 1.1634, "step": 2708 }, { "epoch": 0.7549115229204403, "grad_norm": 0.024203810276920725, "learning_rate": 5.1590533752873624e-05, "loss": 1.15, "step": 2709 }, { "epoch": 0.7551901908875575, "grad_norm": 0.02436799505939933, "learning_rate": 5.148043886685283e-05, "loss": 1.1411, "step": 2710 }, { "epoch": 0.7554688588546746, "grad_norm": 0.021561498830451394, "learning_rate": 5.1370437238830426e-05, "loss": 1.1197, "step": 2711 }, { "epoch": 0.7557475268217918, "grad_norm": 0.023487408750314066, "learning_rate": 5.126052897293315e-05, "loss": 1.1355, "step": 2712 }, { "epoch": 0.756026194788909, "grad_norm": 0.02324679439348326, "learning_rate": 5.1150714173199586e-05, "loss": 1.1146, "step": 2713 }, { "epoch": 0.7563048627560262, "grad_norm": 0.024361634538657605, "learning_rate": 5.104099294357959e-05, "loss": 1.098, "step": 2714 }, { "epoch": 0.7565835307231433, "grad_norm": 0.024121361913787366, "learning_rate": 5.0931365387934784e-05, "loss": 1.162, "step": 2715 }, { "epoch": 0.7568621986902605, "grad_norm": 0.02287062025951163, "learning_rate": 5.0821831610037784e-05, "loss": 1.226, "step": 2716 }, { "epoch": 0.7571408666573778, "grad_norm": 0.023969350386989014, "learning_rate": 5.071239171357266e-05, "loss": 1.115, "step": 2717 }, { "epoch": 0.757419534624495, "grad_norm": 0.023186718328396593, "learning_rate": 5.060304580213444e-05, "loss": 1.1225, "step": 2718 }, { "epoch": 0.7576982025916121, "grad_norm": 0.023821169347387153, "learning_rate": 5.049379397922938e-05, "loss": 1.1406, "step": 2719 }, { "epoch": 0.7579768705587293, "grad_norm": 0.024798064097694136, "learning_rate": 5.0384636348274426e-05, "loss": 1.1371, "step": 2720 }, { "epoch": 0.7582555385258465, "grad_norm": 0.02242754346393854, "learning_rate": 5.0275573012597673e-05, "loss": 1.146, "step": 2721 }, { "epoch": 0.7585342064929637, "grad_norm": 0.022571786073510396, "learning_rate": 5.0166604075437696e-05, "loss": 1.1522, "step": 2722 }, { "epoch": 0.7588128744600808, "grad_norm": 0.023551615384084793, "learning_rate": 5.0057729639943854e-05, "loss": 1.112, "step": 2723 }, { "epoch": 0.759091542427198, "grad_norm": 0.02393490537043464, "learning_rate": 4.9948949809175934e-05, "loss": 1.0936, "step": 2724 }, { "epoch": 0.7593702103943152, "grad_norm": 0.02644857077128262, "learning_rate": 4.984026468610434e-05, "loss": 1.1734, "step": 2725 }, { "epoch": 0.7596488783614324, "grad_norm": 0.021751142739950623, "learning_rate": 4.9731674373609556e-05, "loss": 1.1355, "step": 2726 }, { "epoch": 0.7599275463285495, "grad_norm": 0.021890836591989166, "learning_rate": 4.962317897448271e-05, "loss": 1.1267, "step": 2727 }, { "epoch": 0.7602062142956667, "grad_norm": 0.022456743452262763, "learning_rate": 4.951477859142473e-05, "loss": 1.1209, "step": 2728 }, { "epoch": 0.7604848822627839, "grad_norm": 0.025512315979187992, "learning_rate": 4.9406473327046795e-05, "loss": 1.0941, "step": 2729 }, { "epoch": 0.760763550229901, "grad_norm": 0.024065634309526428, "learning_rate": 4.929826328386994e-05, "loss": 1.1328, "step": 2730 }, { "epoch": 0.7610422181970182, "grad_norm": 0.021021873042506283, "learning_rate": 4.9190148564325164e-05, "loss": 1.1443, "step": 2731 }, { "epoch": 0.7613208861641354, "grad_norm": 0.02036610634380314, "learning_rate": 4.908212927075312e-05, "loss": 1.1752, "step": 2732 }, { "epoch": 0.7615995541312526, "grad_norm": 0.023238875309761656, "learning_rate": 4.897420550540422e-05, "loss": 1.1532, "step": 2733 }, { "epoch": 0.7618782220983698, "grad_norm": 0.022527920869298154, "learning_rate": 4.886637737043843e-05, "loss": 1.1528, "step": 2734 }, { "epoch": 0.7621568900654869, "grad_norm": 0.022041025996048503, "learning_rate": 4.875864496792523e-05, "loss": 1.095, "step": 2735 }, { "epoch": 0.7624355580326041, "grad_norm": 0.021891781488525927, "learning_rate": 4.865100839984336e-05, "loss": 1.1353, "step": 2736 }, { "epoch": 0.7627142259997214, "grad_norm": 0.023343585603883655, "learning_rate": 4.854346776808098e-05, "loss": 1.1426, "step": 2737 }, { "epoch": 0.7629928939668386, "grad_norm": 0.023145497922677245, "learning_rate": 4.843602317443532e-05, "loss": 1.2184, "step": 2738 }, { "epoch": 0.7632715619339557, "grad_norm": 0.02344635895209142, "learning_rate": 4.8328674720612784e-05, "loss": 1.0643, "step": 2739 }, { "epoch": 0.7635502299010729, "grad_norm": 0.024607865163374678, "learning_rate": 4.822142250822876e-05, "loss": 1.1829, "step": 2740 }, { "epoch": 0.7638288978681901, "grad_norm": 0.02262864268000974, "learning_rate": 4.811426663880754e-05, "loss": 1.1668, "step": 2741 }, { "epoch": 0.7641075658353073, "grad_norm": 0.02395967601761953, "learning_rate": 4.800720721378212e-05, "loss": 1.1534, "step": 2742 }, { "epoch": 0.7643862338024244, "grad_norm": 0.021863874267590403, "learning_rate": 4.790024433449432e-05, "loss": 1.1206, "step": 2743 }, { "epoch": 0.7646649017695416, "grad_norm": 0.0232818778481095, "learning_rate": 4.77933781021946e-05, "loss": 1.1296, "step": 2744 }, { "epoch": 0.7649435697366588, "grad_norm": 0.023992676443495776, "learning_rate": 4.7686608618041744e-05, "loss": 1.1458, "step": 2745 }, { "epoch": 0.765222237703776, "grad_norm": 0.025031591980605568, "learning_rate": 4.7579935983103136e-05, "loss": 1.1668, "step": 2746 }, { "epoch": 0.7655009056708931, "grad_norm": 0.022038525442082672, "learning_rate": 4.7473360298354415e-05, "loss": 1.1148, "step": 2747 }, { "epoch": 0.7657795736380103, "grad_norm": 0.02311393385650958, "learning_rate": 4.736688166467951e-05, "loss": 1.1737, "step": 2748 }, { "epoch": 0.7660582416051275, "grad_norm": 0.02198301817465749, "learning_rate": 4.7260500182870326e-05, "loss": 1.1147, "step": 2749 }, { "epoch": 0.7663369095722447, "grad_norm": 0.021810441820330147, "learning_rate": 4.715421595362702e-05, "loss": 1.1872, "step": 2750 }, { "epoch": 0.7666155775393618, "grad_norm": 0.022279339631385446, "learning_rate": 4.704802907755744e-05, "loss": 1.1508, "step": 2751 }, { "epoch": 0.766894245506479, "grad_norm": 0.02301690433542349, "learning_rate": 4.694193965517751e-05, "loss": 1.1731, "step": 2752 }, { "epoch": 0.7671729134735962, "grad_norm": 0.023460426118878427, "learning_rate": 4.683594778691079e-05, "loss": 1.1519, "step": 2753 }, { "epoch": 0.7674515814407133, "grad_norm": 0.022156202001863644, "learning_rate": 4.673005357308858e-05, "loss": 1.1792, "step": 2754 }, { "epoch": 0.7677302494078305, "grad_norm": 0.023096162613270053, "learning_rate": 4.662425711394955e-05, "loss": 1.2105, "step": 2755 }, { "epoch": 0.7680089173749477, "grad_norm": 0.026034522300194547, "learning_rate": 4.651855850964008e-05, "loss": 1.1514, "step": 2756 }, { "epoch": 0.768287585342065, "grad_norm": 0.020535310099045908, "learning_rate": 4.6412957860213654e-05, "loss": 1.0978, "step": 2757 }, { "epoch": 0.7685662533091822, "grad_norm": 0.023229624465815217, "learning_rate": 4.6307455265631396e-05, "loss": 1.1379, "step": 2758 }, { "epoch": 0.7688449212762993, "grad_norm": 0.02186070992639135, "learning_rate": 4.620205082576125e-05, "loss": 1.1797, "step": 2759 }, { "epoch": 0.7691235892434165, "grad_norm": 0.022846440511701142, "learning_rate": 4.609674464037848e-05, "loss": 1.1054, "step": 2760 }, { "epoch": 0.7694022572105337, "grad_norm": 0.022503579061198816, "learning_rate": 4.599153680916517e-05, "loss": 1.152, "step": 2761 }, { "epoch": 0.7696809251776509, "grad_norm": 0.02181769099993219, "learning_rate": 4.58864274317105e-05, "loss": 1.0486, "step": 2762 }, { "epoch": 0.769959593144768, "grad_norm": 0.022723861100019917, "learning_rate": 4.578141660751018e-05, "loss": 1.1315, "step": 2763 }, { "epoch": 0.7702382611118852, "grad_norm": 0.02330376685752637, "learning_rate": 4.567650443596702e-05, "loss": 1.0944, "step": 2764 }, { "epoch": 0.7705169290790024, "grad_norm": 0.021350822403364767, "learning_rate": 4.557169101639004e-05, "loss": 1.1998, "step": 2765 }, { "epoch": 0.7707955970461196, "grad_norm": 0.02142451106563928, "learning_rate": 4.546697644799509e-05, "loss": 1.2004, "step": 2766 }, { "epoch": 0.7710742650132367, "grad_norm": 0.022264222084502214, "learning_rate": 4.5362360829904215e-05, "loss": 1.2019, "step": 2767 }, { "epoch": 0.7713529329803539, "grad_norm": 0.022859168589369944, "learning_rate": 4.525784426114602e-05, "loss": 1.0935, "step": 2768 }, { "epoch": 0.7716316009474711, "grad_norm": 0.02469792521015789, "learning_rate": 4.515342684065506e-05, "loss": 1.1916, "step": 2769 }, { "epoch": 0.7719102689145882, "grad_norm": 0.02329867806813808, "learning_rate": 4.504910866727242e-05, "loss": 1.1997, "step": 2770 }, { "epoch": 0.7721889368817054, "grad_norm": 0.022765850155074898, "learning_rate": 4.494488983974487e-05, "loss": 1.1189, "step": 2771 }, { "epoch": 0.7724676048488226, "grad_norm": 0.0253693557201632, "learning_rate": 4.484077045672542e-05, "loss": 1.162, "step": 2772 }, { "epoch": 0.7727462728159398, "grad_norm": 0.02338490595254168, "learning_rate": 4.473675061677271e-05, "loss": 1.1055, "step": 2773 }, { "epoch": 0.773024940783057, "grad_norm": 0.02218919067245113, "learning_rate": 4.4632830418351354e-05, "loss": 1.1441, "step": 2774 }, { "epoch": 0.7733036087501741, "grad_norm": 0.024120421852039886, "learning_rate": 4.452900995983142e-05, "loss": 1.2276, "step": 2775 }, { "epoch": 0.7735822767172913, "grad_norm": 0.02330652336765321, "learning_rate": 4.4425289339488914e-05, "loss": 1.1466, "step": 2776 }, { "epoch": 0.7738609446844086, "grad_norm": 0.023691830881437892, "learning_rate": 4.432166865550498e-05, "loss": 1.1414, "step": 2777 }, { "epoch": 0.7741396126515258, "grad_norm": 0.02603245478683936, "learning_rate": 4.421814800596637e-05, "loss": 1.0996, "step": 2778 }, { "epoch": 0.7744182806186429, "grad_norm": 0.022723559682386065, "learning_rate": 4.4114727488865025e-05, "loss": 1.1765, "step": 2779 }, { "epoch": 0.7746969485857601, "grad_norm": 0.023096134634906486, "learning_rate": 4.401140720209823e-05, "loss": 1.1312, "step": 2780 }, { "epoch": 0.7749756165528773, "grad_norm": 0.02071263420165633, "learning_rate": 4.3908187243468174e-05, "loss": 1.1273, "step": 2781 }, { "epoch": 0.7752542845199945, "grad_norm": 0.02314762727704433, "learning_rate": 4.380506771068243e-05, "loss": 1.0751, "step": 2782 }, { "epoch": 0.7755329524871116, "grad_norm": 0.023916093096344102, "learning_rate": 4.370204870135314e-05, "loss": 1.2325, "step": 2783 }, { "epoch": 0.7758116204542288, "grad_norm": 0.02402521025066705, "learning_rate": 4.359913031299756e-05, "loss": 1.1927, "step": 2784 }, { "epoch": 0.776090288421346, "grad_norm": 0.024144037077387503, "learning_rate": 4.34963126430375e-05, "loss": 1.0391, "step": 2785 }, { "epoch": 0.7763689563884631, "grad_norm": 0.02122397925389478, "learning_rate": 4.3393595788799544e-05, "loss": 1.1744, "step": 2786 }, { "epoch": 0.7766476243555803, "grad_norm": 0.023316877295590333, "learning_rate": 4.329097984751485e-05, "loss": 1.1524, "step": 2787 }, { "epoch": 0.7769262923226975, "grad_norm": 0.023021208738370057, "learning_rate": 4.3188464916319046e-05, "loss": 1.1892, "step": 2788 }, { "epoch": 0.7772049602898147, "grad_norm": 0.021917680666123193, "learning_rate": 4.308605109225205e-05, "loss": 1.1461, "step": 2789 }, { "epoch": 0.7774836282569318, "grad_norm": 0.02108401616302989, "learning_rate": 4.298373847225822e-05, "loss": 1.0909, "step": 2790 }, { "epoch": 0.777762296224049, "grad_norm": 0.02140631786651903, "learning_rate": 4.288152715318596e-05, "loss": 1.0934, "step": 2791 }, { "epoch": 0.7780409641911662, "grad_norm": 0.02291712256474964, "learning_rate": 4.277941723178791e-05, "loss": 1.1765, "step": 2792 }, { "epoch": 0.7783196321582834, "grad_norm": 0.0236916638826726, "learning_rate": 4.267740880472068e-05, "loss": 1.1181, "step": 2793 }, { "epoch": 0.7785983001254005, "grad_norm": 0.022134379642947767, "learning_rate": 4.2575501968544854e-05, "loss": 1.2009, "step": 2794 }, { "epoch": 0.7788769680925177, "grad_norm": 0.022270515835542536, "learning_rate": 4.247369681972471e-05, "loss": 1.1124, "step": 2795 }, { "epoch": 0.7791556360596349, "grad_norm": 0.02400793382774249, "learning_rate": 4.2371993454628395e-05, "loss": 1.1484, "step": 2796 }, { "epoch": 0.7794343040267522, "grad_norm": 0.022244357421611134, "learning_rate": 4.2270391969527725e-05, "loss": 1.136, "step": 2797 }, { "epoch": 0.7797129719938694, "grad_norm": 0.021259886869529152, "learning_rate": 4.216889246059796e-05, "loss": 1.0615, "step": 2798 }, { "epoch": 0.7799916399609865, "grad_norm": 0.02317042753358548, "learning_rate": 4.206749502391798e-05, "loss": 1.0769, "step": 2799 }, { "epoch": 0.7802703079281037, "grad_norm": 0.023051247345244493, "learning_rate": 4.196619975546981e-05, "loss": 1.1486, "step": 2800 }, { "epoch": 0.7805489758952209, "grad_norm": 0.02300752027745384, "learning_rate": 4.186500675113911e-05, "loss": 1.1022, "step": 2801 }, { "epoch": 0.780827643862338, "grad_norm": 0.021587251150629348, "learning_rate": 4.17639161067144e-05, "loss": 1.0933, "step": 2802 }, { "epoch": 0.7811063118294552, "grad_norm": 0.022756608586863705, "learning_rate": 4.166292791788756e-05, "loss": 1.1765, "step": 2803 }, { "epoch": 0.7813849797965724, "grad_norm": 0.0223376814428305, "learning_rate": 4.156204228025326e-05, "loss": 1.1764, "step": 2804 }, { "epoch": 0.7816636477636896, "grad_norm": 0.02246414835171958, "learning_rate": 4.1461259289309324e-05, "loss": 1.1598, "step": 2805 }, { "epoch": 0.7819423157308067, "grad_norm": 0.023163945261828935, "learning_rate": 4.136057904045614e-05, "loss": 1.0925, "step": 2806 }, { "epoch": 0.7822209836979239, "grad_norm": 0.021934080292699994, "learning_rate": 4.1260001628997184e-05, "loss": 1.1595, "step": 2807 }, { "epoch": 0.7824996516650411, "grad_norm": 0.025111086812221132, "learning_rate": 4.1159527150138296e-05, "loss": 1.1954, "step": 2808 }, { "epoch": 0.7827783196321583, "grad_norm": 0.024268825040644135, "learning_rate": 4.105915569898803e-05, "loss": 1.094, "step": 2809 }, { "epoch": 0.7830569875992754, "grad_norm": 0.023431953794008517, "learning_rate": 4.0958887370557334e-05, "loss": 1.1701, "step": 2810 }, { "epoch": 0.7833356555663926, "grad_norm": 0.02285746068079979, "learning_rate": 4.085872225975963e-05, "loss": 1.0778, "step": 2811 }, { "epoch": 0.7836143235335098, "grad_norm": 0.02172423508137538, "learning_rate": 4.075866046141047e-05, "loss": 1.1645, "step": 2812 }, { "epoch": 0.783892991500627, "grad_norm": 0.023107024316298113, "learning_rate": 4.065870207022789e-05, "loss": 1.1272, "step": 2813 }, { "epoch": 0.7841716594677441, "grad_norm": 0.02317021613396616, "learning_rate": 4.055884718083173e-05, "loss": 1.1193, "step": 2814 }, { "epoch": 0.7844503274348613, "grad_norm": 0.022264094638495737, "learning_rate": 4.045909588774412e-05, "loss": 1.1509, "step": 2815 }, { "epoch": 0.7847289954019785, "grad_norm": 0.023065946761737347, "learning_rate": 4.035944828538892e-05, "loss": 1.161, "step": 2816 }, { "epoch": 0.7850076633690958, "grad_norm": 0.023019768346013644, "learning_rate": 4.025990446809199e-05, "loss": 1.1449, "step": 2817 }, { "epoch": 0.785286331336213, "grad_norm": 0.02478556344254632, "learning_rate": 4.016046453008076e-05, "loss": 1.1237, "step": 2818 }, { "epoch": 0.7855649993033301, "grad_norm": 0.022318621522947876, "learning_rate": 4.006112856548463e-05, "loss": 1.1263, "step": 2819 }, { "epoch": 0.7858436672704473, "grad_norm": 0.023666562874175748, "learning_rate": 3.996189666833427e-05, "loss": 1.1183, "step": 2820 }, { "epoch": 0.7861223352375645, "grad_norm": 0.022279632701687528, "learning_rate": 3.986276893256204e-05, "loss": 1.1686, "step": 2821 }, { "epoch": 0.7864010032046816, "grad_norm": 0.022008903122187142, "learning_rate": 3.976374545200158e-05, "loss": 1.1549, "step": 2822 }, { "epoch": 0.7866796711717988, "grad_norm": 0.021777614363618816, "learning_rate": 3.966482632038795e-05, "loss": 1.0933, "step": 2823 }, { "epoch": 0.786958339138916, "grad_norm": 0.022214973437502582, "learning_rate": 3.956601163135727e-05, "loss": 1.131, "step": 2824 }, { "epoch": 0.7872370071060332, "grad_norm": 0.028088325632276544, "learning_rate": 3.9467301478447065e-05, "loss": 1.148, "step": 2825 }, { "epoch": 0.7875156750731503, "grad_norm": 0.025078484834238263, "learning_rate": 3.936869595509563e-05, "loss": 1.1276, "step": 2826 }, { "epoch": 0.7877943430402675, "grad_norm": 0.023207220624541076, "learning_rate": 3.9270195154642414e-05, "loss": 1.1255, "step": 2827 }, { "epoch": 0.7880730110073847, "grad_norm": 0.02426691355446033, "learning_rate": 3.9171799170327566e-05, "loss": 1.1614, "step": 2828 }, { "epoch": 0.7883516789745019, "grad_norm": 0.022940050911747143, "learning_rate": 3.90735080952922e-05, "loss": 1.1431, "step": 2829 }, { "epoch": 0.788630346941619, "grad_norm": 0.024253713140454398, "learning_rate": 3.8975322022577886e-05, "loss": 1.121, "step": 2830 }, { "epoch": 0.7889090149087362, "grad_norm": 0.022938463958193076, "learning_rate": 3.887724104512713e-05, "loss": 1.1378, "step": 2831 }, { "epoch": 0.7891876828758534, "grad_norm": 0.023332923981323053, "learning_rate": 3.877926525578265e-05, "loss": 1.1352, "step": 2832 }, { "epoch": 0.7894663508429706, "grad_norm": 0.02446088680981489, "learning_rate": 3.86813947472878e-05, "loss": 1.1224, "step": 2833 }, { "epoch": 0.7897450188100877, "grad_norm": 0.023049702454869796, "learning_rate": 3.8583629612286074e-05, "loss": 1.1435, "step": 2834 }, { "epoch": 0.7900236867772049, "grad_norm": 0.022685538622658805, "learning_rate": 3.84859699433214e-05, "loss": 1.0949, "step": 2835 }, { "epoch": 0.7903023547443221, "grad_norm": 0.022741330842291893, "learning_rate": 3.83884158328378e-05, "loss": 1.123, "step": 2836 }, { "epoch": 0.7905810227114394, "grad_norm": 0.022262778646896353, "learning_rate": 3.829096737317944e-05, "loss": 1.1156, "step": 2837 }, { "epoch": 0.7908596906785565, "grad_norm": 0.023121552190797267, "learning_rate": 3.8193624656590314e-05, "loss": 1.1232, "step": 2838 }, { "epoch": 0.7911383586456737, "grad_norm": 0.021560555321218415, "learning_rate": 3.8096387775214475e-05, "loss": 1.183, "step": 2839 }, { "epoch": 0.7914170266127909, "grad_norm": 0.024306460807869832, "learning_rate": 3.799925682109581e-05, "loss": 1.1529, "step": 2840 }, { "epoch": 0.7916956945799081, "grad_norm": 0.023236100648410758, "learning_rate": 3.790223188617776e-05, "loss": 1.1467, "step": 2841 }, { "epoch": 0.7919743625470252, "grad_norm": 0.021023741421045735, "learning_rate": 3.780531306230358e-05, "loss": 1.0829, "step": 2842 }, { "epoch": 0.7922530305141424, "grad_norm": 0.02425842766385467, "learning_rate": 3.7708500441216027e-05, "loss": 1.1827, "step": 2843 }, { "epoch": 0.7925316984812596, "grad_norm": 0.02217644785839989, "learning_rate": 3.7611794114557284e-05, "loss": 1.1256, "step": 2844 }, { "epoch": 0.7928103664483768, "grad_norm": 0.02257030451534082, "learning_rate": 3.751519417386896e-05, "loss": 1.1369, "step": 2845 }, { "epoch": 0.7930890344154939, "grad_norm": 0.022553677924221978, "learning_rate": 3.741870071059203e-05, "loss": 1.1442, "step": 2846 }, { "epoch": 0.7933677023826111, "grad_norm": 0.02402715119174623, "learning_rate": 3.732231381606648e-05, "loss": 1.0843, "step": 2847 }, { "epoch": 0.7936463703497283, "grad_norm": 0.02253429908553841, "learning_rate": 3.722603358153159e-05, "loss": 1.0904, "step": 2848 }, { "epoch": 0.7939250383168455, "grad_norm": 0.022825457215273254, "learning_rate": 3.712986009812564e-05, "loss": 1.1599, "step": 2849 }, { "epoch": 0.7942037062839626, "grad_norm": 0.023212116864689377, "learning_rate": 3.703379345688588e-05, "loss": 1.1026, "step": 2850 }, { "epoch": 0.7944823742510798, "grad_norm": 0.02346852716575104, "learning_rate": 3.69378337487483e-05, "loss": 1.1482, "step": 2851 }, { "epoch": 0.794761042218197, "grad_norm": 0.022392785330700604, "learning_rate": 3.684198106454786e-05, "loss": 1.1168, "step": 2852 }, { "epoch": 0.7950397101853142, "grad_norm": 0.023273853108983018, "learning_rate": 3.674623549501799e-05, "loss": 1.1257, "step": 2853 }, { "epoch": 0.7953183781524313, "grad_norm": 0.02245935728803472, "learning_rate": 3.6650597130790936e-05, "loss": 1.1173, "step": 2854 }, { "epoch": 0.7955970461195485, "grad_norm": 0.02230256678329429, "learning_rate": 3.655506606239732e-05, "loss": 1.1383, "step": 2855 }, { "epoch": 0.7958757140866657, "grad_norm": 0.023348114823190432, "learning_rate": 3.645964238026635e-05, "loss": 1.1003, "step": 2856 }, { "epoch": 0.796154382053783, "grad_norm": 0.022535465071809778, "learning_rate": 3.6364326174725344e-05, "loss": 1.1871, "step": 2857 }, { "epoch": 0.7964330500209001, "grad_norm": 0.022517385936060313, "learning_rate": 3.626911753600016e-05, "loss": 1.1858, "step": 2858 }, { "epoch": 0.7967117179880173, "grad_norm": 0.0217488735133211, "learning_rate": 3.6174016554214565e-05, "loss": 1.1686, "step": 2859 }, { "epoch": 0.7969903859551345, "grad_norm": 0.022514911014340496, "learning_rate": 3.6079023319390647e-05, "loss": 1.101, "step": 2860 }, { "epoch": 0.7972690539222517, "grad_norm": 0.022570401846292373, "learning_rate": 3.598413792144835e-05, "loss": 1.1529, "step": 2861 }, { "epoch": 0.7975477218893688, "grad_norm": 0.02307053312863705, "learning_rate": 3.588936045020568e-05, "loss": 1.127, "step": 2862 }, { "epoch": 0.797826389856486, "grad_norm": 0.02170327416272817, "learning_rate": 3.579469099537831e-05, "loss": 1.1354, "step": 2863 }, { "epoch": 0.7981050578236032, "grad_norm": 0.023091565384891214, "learning_rate": 3.57001296465798e-05, "loss": 1.1859, "step": 2864 }, { "epoch": 0.7983837257907204, "grad_norm": 0.023264426592257748, "learning_rate": 3.5605676493321283e-05, "loss": 1.1097, "step": 2865 }, { "epoch": 0.7986623937578375, "grad_norm": 0.023273193074385086, "learning_rate": 3.551133162501159e-05, "loss": 1.1572, "step": 2866 }, { "epoch": 0.7989410617249547, "grad_norm": 0.02299279198725001, "learning_rate": 3.541709513095685e-05, "loss": 1.2196, "step": 2867 }, { "epoch": 0.7992197296920719, "grad_norm": 0.02269120326199951, "learning_rate": 3.5322967100360926e-05, "loss": 1.0636, "step": 2868 }, { "epoch": 0.7994983976591891, "grad_norm": 0.02288746812904392, "learning_rate": 3.5228947622324674e-05, "loss": 1.0927, "step": 2869 }, { "epoch": 0.7997770656263062, "grad_norm": 0.023965998359409466, "learning_rate": 3.5135036785846425e-05, "loss": 1.0917, "step": 2870 }, { "epoch": 0.8000557335934234, "grad_norm": 0.023229284742332653, "learning_rate": 3.5041234679821534e-05, "loss": 1.1402, "step": 2871 }, { "epoch": 0.8003344015605406, "grad_norm": 0.02400574655249109, "learning_rate": 3.494754139304252e-05, "loss": 1.1424, "step": 2872 }, { "epoch": 0.8006130695276578, "grad_norm": 0.024093303325641986, "learning_rate": 3.485395701419878e-05, "loss": 1.2181, "step": 2873 }, { "epoch": 0.8008917374947749, "grad_norm": 0.023176468736738596, "learning_rate": 3.476048163187685e-05, "loss": 1.1958, "step": 2874 }, { "epoch": 0.8011704054618921, "grad_norm": 0.023088850823877696, "learning_rate": 3.466711533455979e-05, "loss": 1.1829, "step": 2875 }, { "epoch": 0.8014490734290093, "grad_norm": 0.02251681127310276, "learning_rate": 3.457385821062769e-05, "loss": 1.1588, "step": 2876 }, { "epoch": 0.8017277413961266, "grad_norm": 0.021835754493056424, "learning_rate": 3.4480710348357015e-05, "loss": 1.1029, "step": 2877 }, { "epoch": 0.8020064093632437, "grad_norm": 0.023320303814290805, "learning_rate": 3.438767183592103e-05, "loss": 1.1178, "step": 2878 }, { "epoch": 0.8022850773303609, "grad_norm": 0.02208944310119795, "learning_rate": 3.42947427613894e-05, "loss": 1.1379, "step": 2879 }, { "epoch": 0.8025637452974781, "grad_norm": 0.022124086791049265, "learning_rate": 3.420192321272823e-05, "loss": 1.0685, "step": 2880 }, { "epoch": 0.8028424132645953, "grad_norm": 0.02221841137339749, "learning_rate": 3.4109213277799855e-05, "loss": 1.1288, "step": 2881 }, { "epoch": 0.8031210812317124, "grad_norm": 0.021820961136491217, "learning_rate": 3.4016613044362996e-05, "loss": 1.1394, "step": 2882 }, { "epoch": 0.8033997491988296, "grad_norm": 0.021602876396969197, "learning_rate": 3.392412260007236e-05, "loss": 1.1175, "step": 2883 }, { "epoch": 0.8036784171659468, "grad_norm": 0.022580054903628143, "learning_rate": 3.3831742032478873e-05, "loss": 1.1335, "step": 2884 }, { "epoch": 0.803957085133064, "grad_norm": 0.021739169138903582, "learning_rate": 3.37394714290294e-05, "loss": 1.1748, "step": 2885 }, { "epoch": 0.8042357531001811, "grad_norm": 0.022579680843243452, "learning_rate": 3.3647310877066756e-05, "loss": 1.1574, "step": 2886 }, { "epoch": 0.8045144210672983, "grad_norm": 0.023864081368944686, "learning_rate": 3.355526046382945e-05, "loss": 1.1828, "step": 2887 }, { "epoch": 0.8047930890344155, "grad_norm": 0.021699057094710462, "learning_rate": 3.3463320276451886e-05, "loss": 1.1537, "step": 2888 }, { "epoch": 0.8050717570015327, "grad_norm": 0.02203576060856733, "learning_rate": 3.3371490401964096e-05, "loss": 1.1602, "step": 2889 }, { "epoch": 0.8053504249686498, "grad_norm": 0.022054147093443634, "learning_rate": 3.327977092729158e-05, "loss": 1.1406, "step": 2890 }, { "epoch": 0.805629092935767, "grad_norm": 0.021883638719587185, "learning_rate": 3.3188161939255464e-05, "loss": 1.1997, "step": 2891 }, { "epoch": 0.8059077609028842, "grad_norm": 0.021678972161150584, "learning_rate": 3.3096663524572224e-05, "loss": 1.1357, "step": 2892 }, { "epoch": 0.8061864288700014, "grad_norm": 0.02190719758273146, "learning_rate": 3.3005275769853764e-05, "loss": 1.1688, "step": 2893 }, { "epoch": 0.8064650968371185, "grad_norm": 0.02254271373573254, "learning_rate": 3.291399876160702e-05, "loss": 1.1348, "step": 2894 }, { "epoch": 0.8067437648042357, "grad_norm": 0.023224020932836542, "learning_rate": 3.2822832586234384e-05, "loss": 1.1123, "step": 2895 }, { "epoch": 0.8070224327713529, "grad_norm": 0.021565647035940834, "learning_rate": 3.273177733003305e-05, "loss": 1.1351, "step": 2896 }, { "epoch": 0.8073011007384702, "grad_norm": 0.02254346687569153, "learning_rate": 3.264083307919543e-05, "loss": 1.2094, "step": 2897 }, { "epoch": 0.8075797687055873, "grad_norm": 0.021859511900383405, "learning_rate": 3.254999991980875e-05, "loss": 1.1712, "step": 2898 }, { "epoch": 0.8078584366727045, "grad_norm": 0.022521929758052592, "learning_rate": 3.245927793785516e-05, "loss": 1.192, "step": 2899 }, { "epoch": 0.8081371046398217, "grad_norm": 0.021068458449379598, "learning_rate": 3.2368667219211466e-05, "loss": 1.0801, "step": 2900 }, { "epoch": 0.8084157726069389, "grad_norm": 0.02381890837719869, "learning_rate": 3.227816784964924e-05, "loss": 1.1242, "step": 2901 }, { "epoch": 0.808694440574056, "grad_norm": 0.02239601595528392, "learning_rate": 3.218777991483456e-05, "loss": 1.1998, "step": 2902 }, { "epoch": 0.8089731085411732, "grad_norm": 0.022405585381139816, "learning_rate": 3.209750350032812e-05, "loss": 1.1076, "step": 2903 }, { "epoch": 0.8092517765082904, "grad_norm": 0.023239045186192134, "learning_rate": 3.200733869158501e-05, "loss": 1.1043, "step": 2904 }, { "epoch": 0.8095304444754076, "grad_norm": 0.02143177100200752, "learning_rate": 3.191728557395471e-05, "loss": 1.1018, "step": 2905 }, { "epoch": 0.8098091124425247, "grad_norm": 0.02374813833770211, "learning_rate": 3.182734423268083e-05, "loss": 1.0538, "step": 2906 }, { "epoch": 0.8100877804096419, "grad_norm": 0.024291354251143604, "learning_rate": 3.17375147529014e-05, "loss": 1.1404, "step": 2907 }, { "epoch": 0.8103664483767591, "grad_norm": 0.02264959009395692, "learning_rate": 3.164779721964833e-05, "loss": 1.097, "step": 2908 }, { "epoch": 0.8106451163438763, "grad_norm": 0.021401616853353844, "learning_rate": 3.155819171784775e-05, "loss": 1.146, "step": 2909 }, { "epoch": 0.8109237843109934, "grad_norm": 0.022394230466663178, "learning_rate": 3.1468698332319635e-05, "loss": 1.1314, "step": 2910 }, { "epoch": 0.8112024522781106, "grad_norm": 0.022338681433425264, "learning_rate": 3.137931714777791e-05, "loss": 1.1591, "step": 2911 }, { "epoch": 0.8114811202452278, "grad_norm": 0.022974654918587183, "learning_rate": 3.129004824883014e-05, "loss": 1.1287, "step": 2912 }, { "epoch": 0.811759788212345, "grad_norm": 0.02211798995672517, "learning_rate": 3.120089171997783e-05, "loss": 1.1731, "step": 2913 }, { "epoch": 0.8120384561794621, "grad_norm": 0.022000351491642044, "learning_rate": 3.111184764561589e-05, "loss": 1.1826, "step": 2914 }, { "epoch": 0.8123171241465793, "grad_norm": 0.021546226438346215, "learning_rate": 3.102291611003292e-05, "loss": 1.0984, "step": 2915 }, { "epoch": 0.8125957921136965, "grad_norm": 0.024771062982443633, "learning_rate": 3.093409719741097e-05, "loss": 1.1275, "step": 2916 }, { "epoch": 0.8128744600808138, "grad_norm": 0.021530931650379746, "learning_rate": 3.0845390991825464e-05, "loss": 1.1242, "step": 2917 }, { "epoch": 0.8131531280479309, "grad_norm": 0.02082753312589157, "learning_rate": 3.0756797577245095e-05, "loss": 1.0633, "step": 2918 }, { "epoch": 0.8134317960150481, "grad_norm": 0.024688620407009882, "learning_rate": 3.06683170375319e-05, "loss": 1.1867, "step": 2919 }, { "epoch": 0.8137104639821653, "grad_norm": 0.020910349527027534, "learning_rate": 3.057994945644094e-05, "loss": 1.1194, "step": 2920 }, { "epoch": 0.8139891319492825, "grad_norm": 0.02188178987637176, "learning_rate": 3.0491694917620423e-05, "loss": 1.1712, "step": 2921 }, { "epoch": 0.8142677999163996, "grad_norm": 0.022199051849616146, "learning_rate": 3.040355350461156e-05, "loss": 1.1551, "step": 2922 }, { "epoch": 0.8145464678835168, "grad_norm": 0.020895788149488698, "learning_rate": 3.0315525300848508e-05, "loss": 1.1388, "step": 2923 }, { "epoch": 0.814825135850634, "grad_norm": 0.02153293143250121, "learning_rate": 3.0227610389658113e-05, "loss": 1.0879, "step": 2924 }, { "epoch": 0.8151038038177512, "grad_norm": 0.022186973324301532, "learning_rate": 3.0139808854260172e-05, "loss": 1.0683, "step": 2925 }, { "epoch": 0.8153824717848683, "grad_norm": 0.021657080637279526, "learning_rate": 3.0052120777766968e-05, "loss": 1.1719, "step": 2926 }, { "epoch": 0.8156611397519855, "grad_norm": 0.0220582030393345, "learning_rate": 2.9964546243183556e-05, "loss": 1.1576, "step": 2927 }, { "epoch": 0.8159398077191027, "grad_norm": 0.021481479221460396, "learning_rate": 2.9877085333407426e-05, "loss": 1.1273, "step": 2928 }, { "epoch": 0.8162184756862199, "grad_norm": 0.022814615727908074, "learning_rate": 2.9789738131228546e-05, "loss": 1.1333, "step": 2929 }, { "epoch": 0.816497143653337, "grad_norm": 0.021237134566524166, "learning_rate": 2.9702504719329185e-05, "loss": 1.103, "step": 2930 }, { "epoch": 0.8167758116204542, "grad_norm": 0.023642547410425634, "learning_rate": 2.9615385180283953e-05, "loss": 1.0771, "step": 2931 }, { "epoch": 0.8170544795875714, "grad_norm": 0.022307910404617486, "learning_rate": 2.9528379596559714e-05, "loss": 1.0847, "step": 2932 }, { "epoch": 0.8173331475546886, "grad_norm": 0.02313909028364493, "learning_rate": 2.9441488050515327e-05, "loss": 1.1091, "step": 2933 }, { "epoch": 0.8176118155218057, "grad_norm": 0.022565017428063494, "learning_rate": 2.9354710624401812e-05, "loss": 1.1782, "step": 2934 }, { "epoch": 0.8178904834889229, "grad_norm": 0.022684661543674912, "learning_rate": 2.9268047400362195e-05, "loss": 1.1434, "step": 2935 }, { "epoch": 0.8181691514560401, "grad_norm": 0.022299135905275116, "learning_rate": 2.9181498460431253e-05, "loss": 1.1433, "step": 2936 }, { "epoch": 0.8184478194231574, "grad_norm": 0.022520616952839884, "learning_rate": 2.9095063886535702e-05, "loss": 1.167, "step": 2937 }, { "epoch": 0.8187264873902745, "grad_norm": 0.022178113738446928, "learning_rate": 2.9008743760494013e-05, "loss": 1.1031, "step": 2938 }, { "epoch": 0.8190051553573917, "grad_norm": 0.022333368583391737, "learning_rate": 2.892253816401619e-05, "loss": 1.168, "step": 2939 }, { "epoch": 0.8192838233245089, "grad_norm": 0.023792399751462124, "learning_rate": 2.8836447178703975e-05, "loss": 1.1137, "step": 2940 }, { "epoch": 0.8195624912916261, "grad_norm": 0.023745047637988032, "learning_rate": 2.8750470886050514e-05, "loss": 1.1116, "step": 2941 }, { "epoch": 0.8198411592587432, "grad_norm": 0.023339968186005384, "learning_rate": 2.8664609367440505e-05, "loss": 1.1266, "step": 2942 }, { "epoch": 0.8201198272258604, "grad_norm": 0.02124358572435238, "learning_rate": 2.8578862704149803e-05, "loss": 1.1423, "step": 2943 }, { "epoch": 0.8203984951929776, "grad_norm": 0.021359408636750562, "learning_rate": 2.8493230977345777e-05, "loss": 1.1165, "step": 2944 }, { "epoch": 0.8206771631600948, "grad_norm": 0.022736852682087413, "learning_rate": 2.840771426808679e-05, "loss": 1.1215, "step": 2945 }, { "epoch": 0.8209558311272119, "grad_norm": 0.02218422845635192, "learning_rate": 2.8322312657322454e-05, "loss": 1.1115, "step": 2946 }, { "epoch": 0.8212344990943291, "grad_norm": 0.02197880333443349, "learning_rate": 2.8237026225893406e-05, "loss": 1.1147, "step": 2947 }, { "epoch": 0.8215131670614463, "grad_norm": 0.021615959333653102, "learning_rate": 2.815185505453128e-05, "loss": 1.1252, "step": 2948 }, { "epoch": 0.8217918350285635, "grad_norm": 0.02180705409528935, "learning_rate": 2.8066799223858503e-05, "loss": 1.103, "step": 2949 }, { "epoch": 0.8220705029956806, "grad_norm": 0.024818728433617227, "learning_rate": 2.798185881438846e-05, "loss": 1.1224, "step": 2950 }, { "epoch": 0.8223491709627978, "grad_norm": 0.022972423982809576, "learning_rate": 2.789703390652514e-05, "loss": 1.1605, "step": 2951 }, { "epoch": 0.822627838929915, "grad_norm": 0.024477779119873342, "learning_rate": 2.7812324580563304e-05, "loss": 1.177, "step": 2952 }, { "epoch": 0.8229065068970322, "grad_norm": 0.023185056490515457, "learning_rate": 2.7727730916688262e-05, "loss": 1.131, "step": 2953 }, { "epoch": 0.8231851748641493, "grad_norm": 0.02262724421051914, "learning_rate": 2.764325299497589e-05, "loss": 1.1237, "step": 2954 }, { "epoch": 0.8234638428312665, "grad_norm": 0.022267678142299067, "learning_rate": 2.7558890895392387e-05, "loss": 1.1857, "step": 2955 }, { "epoch": 0.8237425107983837, "grad_norm": 0.02326591120345571, "learning_rate": 2.7474644697794446e-05, "loss": 1.1878, "step": 2956 }, { "epoch": 0.8240211787655009, "grad_norm": 0.023819336175012015, "learning_rate": 2.739051448192894e-05, "loss": 1.1475, "step": 2957 }, { "epoch": 0.8242998467326181, "grad_norm": 0.022529323172583316, "learning_rate": 2.730650032743301e-05, "loss": 1.0967, "step": 2958 }, { "epoch": 0.8245785146997353, "grad_norm": 0.021636544919813018, "learning_rate": 2.722260231383395e-05, "loss": 1.1762, "step": 2959 }, { "epoch": 0.8248571826668525, "grad_norm": 0.02216041069387021, "learning_rate": 2.7138820520549132e-05, "loss": 1.1451, "step": 2960 }, { "epoch": 0.8251358506339697, "grad_norm": 0.021397557157750453, "learning_rate": 2.7055155026885776e-05, "loss": 1.1074, "step": 2961 }, { "epoch": 0.8254145186010868, "grad_norm": 0.021771289354358764, "learning_rate": 2.6971605912041223e-05, "loss": 1.1545, "step": 2962 }, { "epoch": 0.825693186568204, "grad_norm": 0.022089579877417467, "learning_rate": 2.6888173255102435e-05, "loss": 1.0594, "step": 2963 }, { "epoch": 0.8259718545353212, "grad_norm": 0.021648011258055253, "learning_rate": 2.6804857135046292e-05, "loss": 1.1234, "step": 2964 }, { "epoch": 0.8262505225024384, "grad_norm": 0.023096657883876943, "learning_rate": 2.6721657630739324e-05, "loss": 1.1018, "step": 2965 }, { "epoch": 0.8265291904695555, "grad_norm": 0.022804395653823775, "learning_rate": 2.663857482093768e-05, "loss": 1.0693, "step": 2966 }, { "epoch": 0.8268078584366727, "grad_norm": 0.02127129615428675, "learning_rate": 2.6555608784286938e-05, "loss": 1.1138, "step": 2967 }, { "epoch": 0.8270865264037899, "grad_norm": 0.021399627861457295, "learning_rate": 2.6472759599322336e-05, "loss": 1.1643, "step": 2968 }, { "epoch": 0.827365194370907, "grad_norm": 0.021703439558324485, "learning_rate": 2.6390027344468296e-05, "loss": 1.1202, "step": 2969 }, { "epoch": 0.8276438623380242, "grad_norm": 0.02142929379295523, "learning_rate": 2.6307412098038688e-05, "loss": 1.103, "step": 2970 }, { "epoch": 0.8279225303051414, "grad_norm": 0.022041464706077995, "learning_rate": 2.6224913938236592e-05, "loss": 1.0646, "step": 2971 }, { "epoch": 0.8282011982722586, "grad_norm": 0.021733573870312883, "learning_rate": 2.6142532943154287e-05, "loss": 1.0829, "step": 2972 }, { "epoch": 0.8284798662393758, "grad_norm": 0.022467935195154123, "learning_rate": 2.6060269190773047e-05, "loss": 1.0717, "step": 2973 }, { "epoch": 0.8287585342064929, "grad_norm": 0.023291800106049025, "learning_rate": 2.5978122758963267e-05, "loss": 1.1316, "step": 2974 }, { "epoch": 0.8290372021736101, "grad_norm": 0.02499497426753029, "learning_rate": 2.5896093725484175e-05, "loss": 1.188, "step": 2975 }, { "epoch": 0.8293158701407273, "grad_norm": 0.0224159755886672, "learning_rate": 2.5814182167983993e-05, "loss": 1.1661, "step": 2976 }, { "epoch": 0.8295945381078444, "grad_norm": 0.0227583242377824, "learning_rate": 2.5732388163999685e-05, "loss": 1.1356, "step": 2977 }, { "epoch": 0.8298732060749617, "grad_norm": 0.021879415734647953, "learning_rate": 2.5650711790956962e-05, "loss": 1.1648, "step": 2978 }, { "epoch": 0.8301518740420789, "grad_norm": 0.024464231909225316, "learning_rate": 2.5569153126170115e-05, "loss": 1.2152, "step": 2979 }, { "epoch": 0.8304305420091961, "grad_norm": 0.021563383593219368, "learning_rate": 2.5487712246842073e-05, "loss": 1.0989, "step": 2980 }, { "epoch": 0.8307092099763133, "grad_norm": 0.020989925568516046, "learning_rate": 2.5406389230064334e-05, "loss": 1.1242, "step": 2981 }, { "epoch": 0.8309878779434304, "grad_norm": 0.021609825627346816, "learning_rate": 2.5325184152816652e-05, "loss": 1.1013, "step": 2982 }, { "epoch": 0.8312665459105476, "grad_norm": 0.02086327882138373, "learning_rate": 2.5244097091967293e-05, "loss": 1.0831, "step": 2983 }, { "epoch": 0.8315452138776648, "grad_norm": 0.03703018575653702, "learning_rate": 2.5163128124272754e-05, "loss": 1.1234, "step": 2984 }, { "epoch": 0.831823881844782, "grad_norm": 0.022422414473855862, "learning_rate": 2.508227732637781e-05, "loss": 1.1479, "step": 2985 }, { "epoch": 0.8321025498118991, "grad_norm": 0.02160382478194286, "learning_rate": 2.5001544774815208e-05, "loss": 1.1463, "step": 2986 }, { "epoch": 0.8323812177790163, "grad_norm": 0.02246411925336019, "learning_rate": 2.492093054600597e-05, "loss": 1.1008, "step": 2987 }, { "epoch": 0.8326598857461335, "grad_norm": 0.02376526218415232, "learning_rate": 2.484043471625895e-05, "loss": 1.1348, "step": 2988 }, { "epoch": 0.8329385537132507, "grad_norm": 0.021196200691685485, "learning_rate": 2.4760057361771017e-05, "loss": 1.0965, "step": 2989 }, { "epoch": 0.8332172216803678, "grad_norm": 0.02349294634243294, "learning_rate": 2.4679798558626894e-05, "loss": 1.2517, "step": 2990 }, { "epoch": 0.833495889647485, "grad_norm": 0.021685796997788612, "learning_rate": 2.4599658382799074e-05, "loss": 1.0802, "step": 2991 }, { "epoch": 0.8337745576146022, "grad_norm": 0.021434825396683697, "learning_rate": 2.4519636910147676e-05, "loss": 1.1296, "step": 2992 }, { "epoch": 0.8340532255817193, "grad_norm": 0.022685509999780386, "learning_rate": 2.443973421642061e-05, "loss": 1.1786, "step": 2993 }, { "epoch": 0.8343318935488365, "grad_norm": 0.021693596278303487, "learning_rate": 2.4359950377253134e-05, "loss": 1.1416, "step": 2994 }, { "epoch": 0.8346105615159537, "grad_norm": 0.022287331446921573, "learning_rate": 2.428028546816829e-05, "loss": 1.148, "step": 2995 }, { "epoch": 0.8348892294830709, "grad_norm": 0.024038958834323644, "learning_rate": 2.4200739564576276e-05, "loss": 1.1057, "step": 2996 }, { "epoch": 0.835167897450188, "grad_norm": 0.021849504172335672, "learning_rate": 2.4121312741774807e-05, "loss": 1.1048, "step": 2997 }, { "epoch": 0.8354465654173053, "grad_norm": 0.022255963832310835, "learning_rate": 2.404200507494874e-05, "loss": 1.2216, "step": 2998 }, { "epoch": 0.8357252333844225, "grad_norm": 0.022828105221119666, "learning_rate": 2.396281663917029e-05, "loss": 1.2054, "step": 2999 }, { "epoch": 0.8360039013515397, "grad_norm": 0.02138812537267978, "learning_rate": 2.388374750939868e-05, "loss": 1.1244, "step": 3000 } ], "logging_steps": 1, "max_steps": 3588, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2797962208697385e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }