{ "best_metric": 1.4006094932556152, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.11316484345529988, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007544322897019992, "grad_norm": 28.675323486328125, "learning_rate": 3.3333333333333333e-06, "loss": 8.4345, "step": 1 }, { "epoch": 0.0007544322897019992, "eval_loss": 2.208278179168701, "eval_runtime": 221.7168, "eval_samples_per_second": 10.071, "eval_steps_per_second": 5.038, "step": 1 }, { "epoch": 0.0015088645794039985, "grad_norm": 29.8911190032959, "learning_rate": 6.666666666666667e-06, "loss": 7.8978, "step": 2 }, { "epoch": 0.002263296869105998, "grad_norm": 25.677209854125977, "learning_rate": 1e-05, "loss": 8.1437, "step": 3 }, { "epoch": 0.003017729158807997, "grad_norm": 29.33332061767578, "learning_rate": 1.3333333333333333e-05, "loss": 8.1284, "step": 4 }, { "epoch": 0.003772161448509996, "grad_norm": 22.953279495239258, "learning_rate": 1.6666666666666667e-05, "loss": 7.5246, "step": 5 }, { "epoch": 0.004526593738211996, "grad_norm": 24.480426788330078, "learning_rate": 2e-05, "loss": 7.4452, "step": 6 }, { "epoch": 0.005281026027913994, "grad_norm": 18.336746215820312, "learning_rate": 2.3333333333333336e-05, "loss": 6.855, "step": 7 }, { "epoch": 0.006035458317615994, "grad_norm": 14.812328338623047, "learning_rate": 2.6666666666666667e-05, "loss": 6.3316, "step": 8 }, { "epoch": 0.0067898906073179935, "grad_norm": 14.74028205871582, "learning_rate": 3e-05, "loss": 6.8076, "step": 9 }, { "epoch": 0.007544322897019992, "grad_norm": 15.624900817871094, "learning_rate": 3.3333333333333335e-05, "loss": 6.6817, "step": 10 }, { "epoch": 0.008298755186721992, "grad_norm": 13.134078025817871, "learning_rate": 3.6666666666666666e-05, "loss": 6.5695, "step": 11 }, { "epoch": 0.009053187476423991, "grad_norm": 11.790728569030762, "learning_rate": 4e-05, "loss": 6.0311, "step": 12 }, { "epoch": 0.00980761976612599, "grad_norm": 13.062444686889648, "learning_rate": 4.3333333333333334e-05, "loss": 6.5594, "step": 13 }, { "epoch": 0.010562052055827989, "grad_norm": 11.395371437072754, "learning_rate": 4.666666666666667e-05, "loss": 6.4936, "step": 14 }, { "epoch": 0.011316484345529988, "grad_norm": 13.467660903930664, "learning_rate": 5e-05, "loss": 6.3229, "step": 15 }, { "epoch": 0.012070916635231988, "grad_norm": 12.771601676940918, "learning_rate": 5.333333333333333e-05, "loss": 6.7069, "step": 16 }, { "epoch": 0.012825348924933987, "grad_norm": 13.968463897705078, "learning_rate": 5.666666666666667e-05, "loss": 6.5837, "step": 17 }, { "epoch": 0.013579781214635987, "grad_norm": 12.743508338928223, "learning_rate": 6e-05, "loss": 6.6322, "step": 18 }, { "epoch": 0.014334213504337986, "grad_norm": 10.325510025024414, "learning_rate": 6.333333333333333e-05, "loss": 6.4495, "step": 19 }, { "epoch": 0.015088645794039984, "grad_norm": 10.890840530395508, "learning_rate": 6.666666666666667e-05, "loss": 6.0026, "step": 20 }, { "epoch": 0.015843078083741986, "grad_norm": 10.725335121154785, "learning_rate": 7e-05, "loss": 6.0757, "step": 21 }, { "epoch": 0.016597510373443983, "grad_norm": 12.592613220214844, "learning_rate": 7.333333333333333e-05, "loss": 5.9807, "step": 22 }, { "epoch": 0.01735194266314598, "grad_norm": 11.364941596984863, "learning_rate": 7.666666666666667e-05, "loss": 5.5821, "step": 23 }, { "epoch": 0.018106374952847983, "grad_norm": 12.720358848571777, "learning_rate": 8e-05, "loss": 5.9976, "step": 24 }, { "epoch": 0.01886080724254998, "grad_norm": 13.64416790008545, "learning_rate": 8.333333333333334e-05, "loss": 6.1231, "step": 25 }, { "epoch": 0.01961523953225198, "grad_norm": 11.732402801513672, "learning_rate": 8.666666666666667e-05, "loss": 5.9014, "step": 26 }, { "epoch": 0.02036967182195398, "grad_norm": 11.253823280334473, "learning_rate": 9e-05, "loss": 6.1194, "step": 27 }, { "epoch": 0.021124104111655977, "grad_norm": 12.609724998474121, "learning_rate": 9.333333333333334e-05, "loss": 6.3912, "step": 28 }, { "epoch": 0.02187853640135798, "grad_norm": 17.590932846069336, "learning_rate": 9.666666666666667e-05, "loss": 6.2529, "step": 29 }, { "epoch": 0.022632968691059976, "grad_norm": 11.86485767364502, "learning_rate": 0.0001, "loss": 6.0362, "step": 30 }, { "epoch": 0.023387400980761978, "grad_norm": 11.341562271118164, "learning_rate": 9.999146252290264e-05, "loss": 5.7276, "step": 31 }, { "epoch": 0.024141833270463976, "grad_norm": 12.26652717590332, "learning_rate": 9.996585300715116e-05, "loss": 5.598, "step": 32 }, { "epoch": 0.024896265560165973, "grad_norm": 11.066813468933105, "learning_rate": 9.99231801983717e-05, "loss": 6.3367, "step": 33 }, { "epoch": 0.025650697849867975, "grad_norm": 13.498788833618164, "learning_rate": 9.986345866928941e-05, "loss": 5.407, "step": 34 }, { "epoch": 0.026405130139569973, "grad_norm": 9.877449035644531, "learning_rate": 9.978670881475172e-05, "loss": 5.6733, "step": 35 }, { "epoch": 0.027159562429271974, "grad_norm": 10.806673049926758, "learning_rate": 9.96929568447637e-05, "loss": 5.4878, "step": 36 }, { "epoch": 0.02791399471897397, "grad_norm": 9.050161361694336, "learning_rate": 9.958223477553714e-05, "loss": 5.2645, "step": 37 }, { "epoch": 0.028668427008675973, "grad_norm": 11.107382774353027, "learning_rate": 9.94545804185573e-05, "loss": 5.0922, "step": 38 }, { "epoch": 0.02942285929837797, "grad_norm": 11.42280101776123, "learning_rate": 9.931003736767013e-05, "loss": 6.2962, "step": 39 }, { "epoch": 0.03017729158807997, "grad_norm": 11.97855281829834, "learning_rate": 9.91486549841951e-05, "loss": 5.7078, "step": 40 }, { "epoch": 0.03093172387778197, "grad_norm": 12.64185905456543, "learning_rate": 9.89704883800683e-05, "loss": 5.2872, "step": 41 }, { "epoch": 0.03168615616748397, "grad_norm": 10.183547973632812, "learning_rate": 9.877559839902184e-05, "loss": 5.02, "step": 42 }, { "epoch": 0.03244058845718597, "grad_norm": 9.924983978271484, "learning_rate": 9.85640515958057e-05, "loss": 4.922, "step": 43 }, { "epoch": 0.03319502074688797, "grad_norm": 10.258749961853027, "learning_rate": 9.833592021345937e-05, "loss": 5.0618, "step": 44 }, { "epoch": 0.033949453036589965, "grad_norm": 11.61485481262207, "learning_rate": 9.809128215864097e-05, "loss": 4.4817, "step": 45 }, { "epoch": 0.03470388532629196, "grad_norm": 11.918696403503418, "learning_rate": 9.783022097502204e-05, "loss": 4.2096, "step": 46 }, { "epoch": 0.03545831761599397, "grad_norm": 13.742955207824707, "learning_rate": 9.755282581475769e-05, "loss": 3.9376, "step": 47 }, { "epoch": 0.036212749905695965, "grad_norm": 12.087013244628906, "learning_rate": 9.725919140804099e-05, "loss": 3.4829, "step": 48 }, { "epoch": 0.03696718219539796, "grad_norm": 9.929948806762695, "learning_rate": 9.694941803075283e-05, "loss": 1.3657, "step": 49 }, { "epoch": 0.03772161448509996, "grad_norm": 14.584424018859863, "learning_rate": 9.662361147021779e-05, "loss": 2.7382, "step": 50 }, { "epoch": 0.03772161448509996, "eval_loss": 1.6733660697937012, "eval_runtime": 223.1506, "eval_samples_per_second": 10.007, "eval_steps_per_second": 5.006, "step": 50 }, { "epoch": 0.03847604677480196, "grad_norm": 18.024608612060547, "learning_rate": 9.628188298907782e-05, "loss": 7.0038, "step": 51 }, { "epoch": 0.03923047906450396, "grad_norm": 13.419705390930176, "learning_rate": 9.592434928729616e-05, "loss": 7.1901, "step": 52 }, { "epoch": 0.03998491135420596, "grad_norm": 9.089792251586914, "learning_rate": 9.555113246230442e-05, "loss": 6.516, "step": 53 }, { "epoch": 0.04073934364390796, "grad_norm": 8.969603538513184, "learning_rate": 9.516235996730645e-05, "loss": 5.9768, "step": 54 }, { "epoch": 0.04149377593360996, "grad_norm": 9.698931694030762, "learning_rate": 9.475816456775313e-05, "loss": 6.6656, "step": 55 }, { "epoch": 0.042248208223311955, "grad_norm": 9.046277046203613, "learning_rate": 9.43386842960031e-05, "loss": 6.2961, "step": 56 }, { "epoch": 0.04300264051301396, "grad_norm": 8.99155330657959, "learning_rate": 9.39040624041849e-05, "loss": 5.875, "step": 57 }, { "epoch": 0.04375707280271596, "grad_norm": 8.806083679199219, "learning_rate": 9.345444731527642e-05, "loss": 6.0438, "step": 58 }, { "epoch": 0.044511505092417955, "grad_norm": 8.72610092163086, "learning_rate": 9.298999257241863e-05, "loss": 6.2185, "step": 59 }, { "epoch": 0.04526593738211995, "grad_norm": 8.603937149047852, "learning_rate": 9.251085678648072e-05, "loss": 5.5133, "step": 60 }, { "epoch": 0.04602036967182195, "grad_norm": 8.591045379638672, "learning_rate": 9.201720358189464e-05, "loss": 5.7165, "step": 61 }, { "epoch": 0.046774801961523955, "grad_norm": 9.11679458618164, "learning_rate": 9.150920154077754e-05, "loss": 6.2389, "step": 62 }, { "epoch": 0.04752923425122595, "grad_norm": 10.736730575561523, "learning_rate": 9.098702414536107e-05, "loss": 6.3546, "step": 63 }, { "epoch": 0.04828366654092795, "grad_norm": 10.027013778686523, "learning_rate": 9.045084971874738e-05, "loss": 6.193, "step": 64 }, { "epoch": 0.04903809883062995, "grad_norm": 8.398100852966309, "learning_rate": 8.9900861364012e-05, "loss": 6.1567, "step": 65 }, { "epoch": 0.04979253112033195, "grad_norm": 8.403151512145996, "learning_rate": 8.933724690167417e-05, "loss": 5.4731, "step": 66 }, { "epoch": 0.05054696341003395, "grad_norm": 8.387701988220215, "learning_rate": 8.876019880555649e-05, "loss": 6.2674, "step": 67 }, { "epoch": 0.05130139569973595, "grad_norm": 8.1467924118042, "learning_rate": 8.816991413705516e-05, "loss": 5.895, "step": 68 }, { "epoch": 0.05205582798943795, "grad_norm": 9.244304656982422, "learning_rate": 8.756659447784368e-05, "loss": 6.3247, "step": 69 }, { "epoch": 0.052810260279139945, "grad_norm": 8.65837287902832, "learning_rate": 8.695044586103296e-05, "loss": 5.7135, "step": 70 }, { "epoch": 0.05356469256884195, "grad_norm": 8.656953811645508, "learning_rate": 8.632167870081121e-05, "loss": 5.9795, "step": 71 }, { "epoch": 0.05431912485854395, "grad_norm": 7.9136481285095215, "learning_rate": 8.568050772058762e-05, "loss": 6.0396, "step": 72 }, { "epoch": 0.055073557148245945, "grad_norm": 8.58019733428955, "learning_rate": 8.502715187966455e-05, "loss": 5.8431, "step": 73 }, { "epoch": 0.05582798943794794, "grad_norm": 7.603174209594727, "learning_rate": 8.436183429846313e-05, "loss": 5.9853, "step": 74 }, { "epoch": 0.05658242172764994, "grad_norm": 8.182300567626953, "learning_rate": 8.368478218232787e-05, "loss": 5.4377, "step": 75 }, { "epoch": 0.057336854017351946, "grad_norm": 8.220528602600098, "learning_rate": 8.299622674393614e-05, "loss": 6.0818, "step": 76 }, { "epoch": 0.058091286307053944, "grad_norm": 8.084641456604004, "learning_rate": 8.229640312433937e-05, "loss": 5.5474, "step": 77 }, { "epoch": 0.05884571859675594, "grad_norm": 8.768123626708984, "learning_rate": 8.158555031266254e-05, "loss": 5.957, "step": 78 }, { "epoch": 0.05960015088645794, "grad_norm": 7.863815784454346, "learning_rate": 8.086391106448965e-05, "loss": 5.8028, "step": 79 }, { "epoch": 0.06035458317615994, "grad_norm": 8.921903610229492, "learning_rate": 8.013173181896283e-05, "loss": 5.7401, "step": 80 }, { "epoch": 0.06110901546586194, "grad_norm": 8.030502319335938, "learning_rate": 7.938926261462366e-05, "loss": 5.4555, "step": 81 }, { "epoch": 0.06186344775556394, "grad_norm": 6.994987964630127, "learning_rate": 7.863675700402526e-05, "loss": 5.144, "step": 82 }, { "epoch": 0.06261788004526593, "grad_norm": 8.704511642456055, "learning_rate": 7.787447196714427e-05, "loss": 6.2048, "step": 83 }, { "epoch": 0.06337231233496794, "grad_norm": 8.054746627807617, "learning_rate": 7.710266782362247e-05, "loss": 4.8961, "step": 84 }, { "epoch": 0.06412674462466994, "grad_norm": 8.681465148925781, "learning_rate": 7.63216081438678e-05, "loss": 6.2316, "step": 85 }, { "epoch": 0.06488117691437194, "grad_norm": 8.60116195678711, "learning_rate": 7.553155965904535e-05, "loss": 5.2028, "step": 86 }, { "epoch": 0.06563560920407394, "grad_norm": 8.654870986938477, "learning_rate": 7.473279216998895e-05, "loss": 6.033, "step": 87 }, { "epoch": 0.06639004149377593, "grad_norm": 8.84859848022461, "learning_rate": 7.392557845506432e-05, "loss": 5.0623, "step": 88 }, { "epoch": 0.06714447378347793, "grad_norm": 8.718551635742188, "learning_rate": 7.311019417701566e-05, "loss": 5.4232, "step": 89 }, { "epoch": 0.06789890607317993, "grad_norm": 9.166291236877441, "learning_rate": 7.228691778882693e-05, "loss": 5.2321, "step": 90 }, { "epoch": 0.06865333836288193, "grad_norm": 8.29334545135498, "learning_rate": 7.145603043863045e-05, "loss": 5.4072, "step": 91 }, { "epoch": 0.06940777065258392, "grad_norm": 7.80470609664917, "learning_rate": 7.061781587369519e-05, "loss": 4.705, "step": 92 }, { "epoch": 0.07016220294228594, "grad_norm": 9.8253812789917, "learning_rate": 6.977256034352712e-05, "loss": 5.9066, "step": 93 }, { "epoch": 0.07091663523198793, "grad_norm": 9.516755104064941, "learning_rate": 6.892055250211552e-05, "loss": 5.2003, "step": 94 }, { "epoch": 0.07167106752168993, "grad_norm": 10.457696914672852, "learning_rate": 6.806208330935766e-05, "loss": 5.369, "step": 95 }, { "epoch": 0.07242549981139193, "grad_norm": 10.63792610168457, "learning_rate": 6.719744593169641e-05, "loss": 4.6555, "step": 96 }, { "epoch": 0.07317993210109393, "grad_norm": 10.199153900146484, "learning_rate": 6.632693564200416e-05, "loss": 4.3893, "step": 97 }, { "epoch": 0.07393436439079593, "grad_norm": 9.126897811889648, "learning_rate": 6.545084971874738e-05, "loss": 3.0374, "step": 98 }, { "epoch": 0.07468879668049792, "grad_norm": 8.347147941589355, "learning_rate": 6.456948734446624e-05, "loss": 2.4093, "step": 99 }, { "epoch": 0.07544322897019992, "grad_norm": 8.605951309204102, "learning_rate": 6.368314950360415e-05, "loss": 1.6323, "step": 100 }, { "epoch": 0.07544322897019992, "eval_loss": 1.581653356552124, "eval_runtime": 223.1345, "eval_samples_per_second": 10.007, "eval_steps_per_second": 5.006, "step": 100 }, { "epoch": 0.07619766125990192, "grad_norm": 14.999310493469238, "learning_rate": 6.279213887972179e-05, "loss": 6.7778, "step": 101 }, { "epoch": 0.07695209354960392, "grad_norm": 13.291474342346191, "learning_rate": 6.189675975213094e-05, "loss": 7.1744, "step": 102 }, { "epoch": 0.07770652583930593, "grad_norm": 9.604938507080078, "learning_rate": 6.099731789198344e-05, "loss": 6.6883, "step": 103 }, { "epoch": 0.07846095812900793, "grad_norm": 7.80354118347168, "learning_rate": 6.009412045785051e-05, "loss": 6.2805, "step": 104 }, { "epoch": 0.07921539041870992, "grad_norm": 8.760526657104492, "learning_rate": 5.918747589082853e-05, "loss": 6.5151, "step": 105 }, { "epoch": 0.07996982270841192, "grad_norm": 8.761163711547852, "learning_rate": 5.82776938092065e-05, "loss": 6.4826, "step": 106 }, { "epoch": 0.08072425499811392, "grad_norm": 8.215286254882812, "learning_rate": 5.736508490273188e-05, "loss": 5.9167, "step": 107 }, { "epoch": 0.08147868728781592, "grad_norm": 7.599548816680908, "learning_rate": 5.644996082651017e-05, "loss": 6.2646, "step": 108 }, { "epoch": 0.08223311957751792, "grad_norm": 7.709637641906738, "learning_rate": 5.553263409457504e-05, "loss": 5.8361, "step": 109 }, { "epoch": 0.08298755186721991, "grad_norm": 7.818946838378906, "learning_rate": 5.4613417973165106e-05, "loss": 6.3807, "step": 110 }, { "epoch": 0.08374198415692191, "grad_norm": 7.244435787200928, "learning_rate": 5.3692626373743706e-05, "loss": 6.0912, "step": 111 }, { "epoch": 0.08449641644662391, "grad_norm": 7.93871545791626, "learning_rate": 5.27705737457985e-05, "loss": 6.1092, "step": 112 }, { "epoch": 0.08525084873632592, "grad_norm": 7.6457319259643555, "learning_rate": 5.184757496945726e-05, "loss": 5.915, "step": 113 }, { "epoch": 0.08600528102602792, "grad_norm": 7.350983619689941, "learning_rate": 5.092394524795649e-05, "loss": 5.7853, "step": 114 }, { "epoch": 0.08675971331572992, "grad_norm": 7.477654933929443, "learning_rate": 5e-05, "loss": 5.9845, "step": 115 }, { "epoch": 0.08751414560543191, "grad_norm": 7.959140777587891, "learning_rate": 4.907605475204352e-05, "loss": 5.3337, "step": 116 }, { "epoch": 0.08826857789513391, "grad_norm": 8.025788307189941, "learning_rate": 4.8152425030542766e-05, "loss": 5.5754, "step": 117 }, { "epoch": 0.08902301018483591, "grad_norm": 7.158747673034668, "learning_rate": 4.72294262542015e-05, "loss": 5.5501, "step": 118 }, { "epoch": 0.08977744247453791, "grad_norm": 7.6621994972229, "learning_rate": 4.6307373626256306e-05, "loss": 5.9097, "step": 119 }, { "epoch": 0.0905318747642399, "grad_norm": 7.132166862487793, "learning_rate": 4.5386582026834906e-05, "loss": 5.744, "step": 120 }, { "epoch": 0.0912863070539419, "grad_norm": 7.8009819984436035, "learning_rate": 4.446736590542497e-05, "loss": 5.5201, "step": 121 }, { "epoch": 0.0920407393436439, "grad_norm": 7.407027721405029, "learning_rate": 4.3550039173489845e-05, "loss": 5.6957, "step": 122 }, { "epoch": 0.09279517163334591, "grad_norm": 7.293229579925537, "learning_rate": 4.2634915097268115e-05, "loss": 6.0079, "step": 123 }, { "epoch": 0.09354960392304791, "grad_norm": 7.645759105682373, "learning_rate": 4.1722306190793495e-05, "loss": 4.653, "step": 124 }, { "epoch": 0.09430403621274991, "grad_norm": 7.993988513946533, "learning_rate": 4.0812524109171476e-05, "loss": 5.4163, "step": 125 }, { "epoch": 0.0950584685024519, "grad_norm": 7.342037677764893, "learning_rate": 3.99058795421495e-05, "loss": 5.9263, "step": 126 }, { "epoch": 0.0958129007921539, "grad_norm": 8.084555625915527, "learning_rate": 3.9002682108016585e-05, "loss": 6.2218, "step": 127 }, { "epoch": 0.0965673330818559, "grad_norm": 8.398702621459961, "learning_rate": 3.8103240247869075e-05, "loss": 5.8363, "step": 128 }, { "epoch": 0.0973217653715579, "grad_norm": 8.095590591430664, "learning_rate": 3.720786112027822e-05, "loss": 6.0603, "step": 129 }, { "epoch": 0.0980761976612599, "grad_norm": 8.621750831604004, "learning_rate": 3.631685049639586e-05, "loss": 6.1481, "step": 130 }, { "epoch": 0.0988306299509619, "grad_norm": 7.470015048980713, "learning_rate": 3.543051265553377e-05, "loss": 5.6436, "step": 131 }, { "epoch": 0.0995850622406639, "grad_norm": 8.399615287780762, "learning_rate": 3.4549150281252636e-05, "loss": 5.7993, "step": 132 }, { "epoch": 0.1003394945303659, "grad_norm": 7.979325294494629, "learning_rate": 3.367306435799584e-05, "loss": 4.9809, "step": 133 }, { "epoch": 0.1010939268200679, "grad_norm": 8.482051849365234, "learning_rate": 3.2802554068303596e-05, "loss": 4.963, "step": 134 }, { "epoch": 0.1018483591097699, "grad_norm": 7.327165603637695, "learning_rate": 3.1937916690642356e-05, "loss": 4.7399, "step": 135 }, { "epoch": 0.1026027913994719, "grad_norm": 7.478617191314697, "learning_rate": 3.107944749788449e-05, "loss": 5.3976, "step": 136 }, { "epoch": 0.1033572236891739, "grad_norm": 7.518840312957764, "learning_rate": 3.0227439656472877e-05, "loss": 5.1067, "step": 137 }, { "epoch": 0.1041116559788759, "grad_norm": 7.427170276641846, "learning_rate": 2.9382184126304834e-05, "loss": 4.4334, "step": 138 }, { "epoch": 0.10486608826857789, "grad_norm": 8.182501792907715, "learning_rate": 2.8543969561369556e-05, "loss": 5.201, "step": 139 }, { "epoch": 0.10562052055827989, "grad_norm": 9.181497573852539, "learning_rate": 2.771308221117309e-05, "loss": 5.1471, "step": 140 }, { "epoch": 0.10637495284798189, "grad_norm": 8.158872604370117, "learning_rate": 2.688980582298435e-05, "loss": 4.324, "step": 141 }, { "epoch": 0.1071293851376839, "grad_norm": 8.7058744430542, "learning_rate": 2.607442154493568e-05, "loss": 5.2668, "step": 142 }, { "epoch": 0.1078838174273859, "grad_norm": 9.988224029541016, "learning_rate": 2.5267207830011068e-05, "loss": 4.9627, "step": 143 }, { "epoch": 0.1086382497170879, "grad_norm": 9.397239685058594, "learning_rate": 2.446844034095466e-05, "loss": 5.4195, "step": 144 }, { "epoch": 0.1093926820067899, "grad_norm": 8.99825668334961, "learning_rate": 2.3678391856132204e-05, "loss": 4.6881, "step": 145 }, { "epoch": 0.11014711429649189, "grad_norm": 8.604011535644531, "learning_rate": 2.2897332176377528e-05, "loss": 4.5378, "step": 146 }, { "epoch": 0.11090154658619389, "grad_norm": 9.167732238769531, "learning_rate": 2.2125528032855724e-05, "loss": 3.5005, "step": 147 }, { "epoch": 0.11165597887589589, "grad_norm": 8.983088493347168, "learning_rate": 2.136324299597474e-05, "loss": 3.3061, "step": 148 }, { "epoch": 0.11241041116559788, "grad_norm": 8.106727600097656, "learning_rate": 2.061073738537635e-05, "loss": 2.1657, "step": 149 }, { "epoch": 0.11316484345529988, "grad_norm": 10.579435348510742, "learning_rate": 1.9868268181037185e-05, "loss": 2.5445, "step": 150 }, { "epoch": 0.11316484345529988, "eval_loss": 1.4006094932556152, "eval_runtime": 223.0685, "eval_samples_per_second": 10.01, "eval_steps_per_second": 5.007, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.150074508503941e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }