{ "best_metric": 0.11835423856973648, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.019498245157935787, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012998830105290525, "grad_norm": 8.895720481872559, "learning_rate": 1e-05, "loss": 1.8459, "step": 1 }, { "epoch": 0.00012998830105290525, "eval_loss": 1.0915981531143188, "eval_runtime": 1412.1497, "eval_samples_per_second": 9.175, "eval_steps_per_second": 2.294, "step": 1 }, { "epoch": 0.0002599766021058105, "grad_norm": 13.155832290649414, "learning_rate": 2e-05, "loss": 2.443, "step": 2 }, { "epoch": 0.0003899649031587157, "grad_norm": 14.273755073547363, "learning_rate": 3e-05, "loss": 2.9329, "step": 3 }, { "epoch": 0.000519953204211621, "grad_norm": 16.93520736694336, "learning_rate": 4e-05, "loss": 3.3626, "step": 4 }, { "epoch": 0.0006499415052645262, "grad_norm": 11.356696128845215, "learning_rate": 5e-05, "loss": 1.8192, "step": 5 }, { "epoch": 0.0007799298063174314, "grad_norm": 11.887849807739258, "learning_rate": 6e-05, "loss": 1.3449, "step": 6 }, { "epoch": 0.0009099181073703367, "grad_norm": 9.773666381835938, "learning_rate": 7e-05, "loss": 0.9185, "step": 7 }, { "epoch": 0.001039906408423242, "grad_norm": 21.219711303710938, "learning_rate": 8e-05, "loss": 1.4177, "step": 8 }, { "epoch": 0.0011698947094761472, "grad_norm": 10.484102249145508, "learning_rate": 9e-05, "loss": 0.9221, "step": 9 }, { "epoch": 0.0012998830105290524, "grad_norm": 25.021635055541992, "learning_rate": 0.0001, "loss": 1.8455, "step": 10 }, { "epoch": 0.0014298713115819576, "grad_norm": 19.793079376220703, "learning_rate": 9.999316524962345e-05, "loss": 1.4859, "step": 11 }, { "epoch": 0.0015598596126348628, "grad_norm": 12.939811706542969, "learning_rate": 9.997266286704631e-05, "loss": 0.952, "step": 12 }, { "epoch": 0.0016898479136877682, "grad_norm": 16.663564682006836, "learning_rate": 9.993849845741524e-05, "loss": 1.0298, "step": 13 }, { "epoch": 0.0018198362147406734, "grad_norm": 12.337509155273438, "learning_rate": 9.989068136093873e-05, "loss": 1.119, "step": 14 }, { "epoch": 0.0019498245157935786, "grad_norm": 8.889068603515625, "learning_rate": 9.98292246503335e-05, "loss": 0.806, "step": 15 }, { "epoch": 0.002079812816846484, "grad_norm": 8.746395111083984, "learning_rate": 9.975414512725057e-05, "loss": 0.5811, "step": 16 }, { "epoch": 0.002209801117899389, "grad_norm": 7.167966842651367, "learning_rate": 9.966546331768191e-05, "loss": 0.5683, "step": 17 }, { "epoch": 0.0023397894189522944, "grad_norm": 7.786585807800293, "learning_rate": 9.956320346634876e-05, "loss": 0.6394, "step": 18 }, { "epoch": 0.0024697777200051994, "grad_norm": 7.365955352783203, "learning_rate": 9.944739353007344e-05, "loss": 0.7046, "step": 19 }, { "epoch": 0.002599766021058105, "grad_norm": 7.709902286529541, "learning_rate": 9.931806517013612e-05, "loss": 0.4938, "step": 20 }, { "epoch": 0.00272975432211101, "grad_norm": 9.915132522583008, "learning_rate": 9.917525374361912e-05, "loss": 0.942, "step": 21 }, { "epoch": 0.002859742623163915, "grad_norm": 9.246820449829102, "learning_rate": 9.901899829374047e-05, "loss": 0.6803, "step": 22 }, { "epoch": 0.0029897309242168206, "grad_norm": 13.664460182189941, "learning_rate": 9.884934153917997e-05, "loss": 0.5407, "step": 23 }, { "epoch": 0.0031197192252697256, "grad_norm": 11.21821403503418, "learning_rate": 9.86663298624003e-05, "loss": 0.7522, "step": 24 }, { "epoch": 0.003249707526322631, "grad_norm": 8.65762710571289, "learning_rate": 9.847001329696653e-05, "loss": 0.5756, "step": 25 }, { "epoch": 0.0033796958273755364, "grad_norm": 7.683070659637451, "learning_rate": 9.826044551386744e-05, "loss": 0.59, "step": 26 }, { "epoch": 0.0035096841284284414, "grad_norm": 6.744781017303467, "learning_rate": 9.803768380684242e-05, "loss": 0.4952, "step": 27 }, { "epoch": 0.003639672429481347, "grad_norm": 9.178666114807129, "learning_rate": 9.780178907671789e-05, "loss": 0.4358, "step": 28 }, { "epoch": 0.0037696607305342518, "grad_norm": 14.2472562789917, "learning_rate": 9.755282581475769e-05, "loss": 0.5543, "step": 29 }, { "epoch": 0.003899649031587157, "grad_norm": 7.047728061676025, "learning_rate": 9.729086208503174e-05, "loss": 0.6627, "step": 30 }, { "epoch": 0.004029637332640062, "grad_norm": 6.600000858306885, "learning_rate": 9.701596950580806e-05, "loss": 0.4999, "step": 31 }, { "epoch": 0.004159625633692968, "grad_norm": 6.267518520355225, "learning_rate": 9.672822322997305e-05, "loss": 0.4127, "step": 32 }, { "epoch": 0.004289613934745873, "grad_norm": 3.9115278720855713, "learning_rate": 9.642770192448536e-05, "loss": 0.2551, "step": 33 }, { "epoch": 0.004419602235798778, "grad_norm": 3.7888052463531494, "learning_rate": 9.611448774886924e-05, "loss": 0.2697, "step": 34 }, { "epoch": 0.004549590536851683, "grad_norm": 6.660991668701172, "learning_rate": 9.578866633275288e-05, "loss": 0.4979, "step": 35 }, { "epoch": 0.004679578837904589, "grad_norm": 5.288132667541504, "learning_rate": 9.545032675245813e-05, "loss": 0.3436, "step": 36 }, { "epoch": 0.004809567138957494, "grad_norm": 6.1838226318359375, "learning_rate": 9.509956150664796e-05, "loss": 0.4442, "step": 37 }, { "epoch": 0.004939555440010399, "grad_norm": 6.035123825073242, "learning_rate": 9.473646649103818e-05, "loss": 0.3234, "step": 38 }, { "epoch": 0.005069543741063305, "grad_norm": 5.086143493652344, "learning_rate": 9.43611409721806e-05, "loss": 0.3324, "step": 39 }, { "epoch": 0.00519953204211621, "grad_norm": 5.471978187561035, "learning_rate": 9.397368756032445e-05, "loss": 0.3007, "step": 40 }, { "epoch": 0.005329520343169115, "grad_norm": 5.744053840637207, "learning_rate": 9.357421218136386e-05, "loss": 0.273, "step": 41 }, { "epoch": 0.00545950864422202, "grad_norm": 11.946670532226562, "learning_rate": 9.316282404787871e-05, "loss": 0.1784, "step": 42 }, { "epoch": 0.005589496945274925, "grad_norm": 7.70154333114624, "learning_rate": 9.273963562927695e-05, "loss": 0.4239, "step": 43 }, { "epoch": 0.00571948524632783, "grad_norm": 5.410760879516602, "learning_rate": 9.230476262104677e-05, "loss": 0.4391, "step": 44 }, { "epoch": 0.005849473547380735, "grad_norm": 4.884847164154053, "learning_rate": 9.185832391312644e-05, "loss": 0.3154, "step": 45 }, { "epoch": 0.005979461848433641, "grad_norm": 5.657925128936768, "learning_rate": 9.140044155740101e-05, "loss": 0.3881, "step": 46 }, { "epoch": 0.006109450149486546, "grad_norm": 3.236168622970581, "learning_rate": 9.093124073433463e-05, "loss": 0.2024, "step": 47 }, { "epoch": 0.006239438450539451, "grad_norm": 4.553912162780762, "learning_rate": 9.045084971874738e-05, "loss": 0.3, "step": 48 }, { "epoch": 0.006369426751592357, "grad_norm": 3.5105910301208496, "learning_rate": 8.995939984474624e-05, "loss": 0.2236, "step": 49 }, { "epoch": 0.006499415052645262, "grad_norm": 5.97371244430542, "learning_rate": 8.945702546981969e-05, "loss": 0.2381, "step": 50 }, { "epoch": 0.006499415052645262, "eval_loss": 0.2108517289161682, "eval_runtime": 1417.8328, "eval_samples_per_second": 9.139, "eval_steps_per_second": 2.285, "step": 50 }, { "epoch": 0.006629403353698167, "grad_norm": 11.96389389038086, "learning_rate": 8.894386393810563e-05, "loss": 1.8051, "step": 51 }, { "epoch": 0.006759391654751073, "grad_norm": 7.560161113739014, "learning_rate": 8.842005554284296e-05, "loss": 1.4103, "step": 52 }, { "epoch": 0.006889379955803978, "grad_norm": 6.23681116104126, "learning_rate": 8.788574348801675e-05, "loss": 1.4256, "step": 53 }, { "epoch": 0.007019368256856883, "grad_norm": 9.883326530456543, "learning_rate": 8.73410738492077e-05, "loss": 2.1999, "step": 54 }, { "epoch": 0.007149356557909788, "grad_norm": 7.169539451599121, "learning_rate": 8.678619553365659e-05, "loss": 0.9196, "step": 55 }, { "epoch": 0.007279344858962694, "grad_norm": 6.351007461547852, "learning_rate": 8.622126023955446e-05, "loss": 1.1004, "step": 56 }, { "epoch": 0.007409333160015599, "grad_norm": 4.663862705230713, "learning_rate": 8.564642241456986e-05, "loss": 0.4545, "step": 57 }, { "epoch": 0.0075393214610685036, "grad_norm": 5.961542129516602, "learning_rate": 8.506183921362443e-05, "loss": 0.6011, "step": 58 }, { "epoch": 0.007669309762121409, "grad_norm": 7.969834327697754, "learning_rate": 8.44676704559283e-05, "loss": 0.6682, "step": 59 }, { "epoch": 0.007799298063174314, "grad_norm": 6.4743170738220215, "learning_rate": 8.386407858128706e-05, "loss": 0.6514, "step": 60 }, { "epoch": 0.00792928636422722, "grad_norm": 6.987910270690918, "learning_rate": 8.32512286056924e-05, "loss": 0.5891, "step": 61 }, { "epoch": 0.008059274665280124, "grad_norm": 6.665630340576172, "learning_rate": 8.262928807620843e-05, "loss": 0.4296, "step": 62 }, { "epoch": 0.00818926296633303, "grad_norm": 8.244565963745117, "learning_rate": 8.199842702516583e-05, "loss": 0.5505, "step": 63 }, { "epoch": 0.008319251267385936, "grad_norm": 5.804681777954102, "learning_rate": 8.135881792367686e-05, "loss": 0.4528, "step": 64 }, { "epoch": 0.00844923956843884, "grad_norm": 9.024893760681152, "learning_rate": 8.07106356344834e-05, "loss": 0.8494, "step": 65 }, { "epoch": 0.008579227869491746, "grad_norm": 9.78223991394043, "learning_rate": 8.005405736415126e-05, "loss": 0.7119, "step": 66 }, { "epoch": 0.008709216170544652, "grad_norm": 6.717483043670654, "learning_rate": 7.938926261462366e-05, "loss": 0.5105, "step": 67 }, { "epoch": 0.008839204471597556, "grad_norm": 5.113883018493652, "learning_rate": 7.871643313414718e-05, "loss": 0.5204, "step": 68 }, { "epoch": 0.008969192772650462, "grad_norm": 4.767594814300537, "learning_rate": 7.803575286758364e-05, "loss": 0.4807, "step": 69 }, { "epoch": 0.009099181073703366, "grad_norm": 5.869354724884033, "learning_rate": 7.734740790612136e-05, "loss": 0.3917, "step": 70 }, { "epoch": 0.009229169374756272, "grad_norm": 4.560206413269043, "learning_rate": 7.66515864363997e-05, "loss": 0.4034, "step": 71 }, { "epoch": 0.009359157675809178, "grad_norm": 6.740488052368164, "learning_rate": 7.594847868906076e-05, "loss": 0.4844, "step": 72 }, { "epoch": 0.009489145976862082, "grad_norm": 4.010514259338379, "learning_rate": 7.52382768867422e-05, "loss": 0.3006, "step": 73 }, { "epoch": 0.009619134277914988, "grad_norm": 10.616462707519531, "learning_rate": 7.452117519152542e-05, "loss": 0.3638, "step": 74 }, { "epoch": 0.009749122578967893, "grad_norm": 3.899102210998535, "learning_rate": 7.379736965185368e-05, "loss": 0.2581, "step": 75 }, { "epoch": 0.009879110880020798, "grad_norm": 5.232759475708008, "learning_rate": 7.30670581489344e-05, "loss": 0.4197, "step": 76 }, { "epoch": 0.010009099181073703, "grad_norm": 3.8902716636657715, "learning_rate": 7.233044034264034e-05, "loss": 0.2863, "step": 77 }, { "epoch": 0.01013908748212661, "grad_norm": 4.209787845611572, "learning_rate": 7.158771761692464e-05, "loss": 0.2249, "step": 78 }, { "epoch": 0.010269075783179513, "grad_norm": 4.976950645446777, "learning_rate": 7.083909302476453e-05, "loss": 0.3716, "step": 79 }, { "epoch": 0.01039906408423242, "grad_norm": 5.116728782653809, "learning_rate": 7.008477123264848e-05, "loss": 0.3137, "step": 80 }, { "epoch": 0.010529052385285325, "grad_norm": 3.287550449371338, "learning_rate": 6.932495846462261e-05, "loss": 0.1725, "step": 81 }, { "epoch": 0.01065904068633823, "grad_norm": 4.446608543395996, "learning_rate": 6.855986244591104e-05, "loss": 0.284, "step": 82 }, { "epoch": 0.010789028987391135, "grad_norm": 3.303908348083496, "learning_rate": 6.778969234612584e-05, "loss": 0.1945, "step": 83 }, { "epoch": 0.01091901728844404, "grad_norm": 7.167092800140381, "learning_rate": 6.701465872208216e-05, "loss": 0.4386, "step": 84 }, { "epoch": 0.011049005589496945, "grad_norm": 3.590325355529785, "learning_rate": 6.623497346023418e-05, "loss": 0.1954, "step": 85 }, { "epoch": 0.01117899389054985, "grad_norm": 4.736059188842773, "learning_rate": 6.545084971874738e-05, "loss": 0.5428, "step": 86 }, { "epoch": 0.011308982191602755, "grad_norm": 6.112609386444092, "learning_rate": 6.466250186922325e-05, "loss": 0.3665, "step": 87 }, { "epoch": 0.01143897049265566, "grad_norm": 3.9378249645233154, "learning_rate": 6.387014543809223e-05, "loss": 0.2136, "step": 88 }, { "epoch": 0.011568958793708567, "grad_norm": 4.579885959625244, "learning_rate": 6.307399704769099e-05, "loss": 0.2942, "step": 89 }, { "epoch": 0.01169894709476147, "grad_norm": 4.84573221206665, "learning_rate": 6.227427435703997e-05, "loss": 0.3229, "step": 90 }, { "epoch": 0.011828935395814377, "grad_norm": 3.348799705505371, "learning_rate": 6.147119600233758e-05, "loss": 0.2288, "step": 91 }, { "epoch": 0.011958923696867282, "grad_norm": 3.677680015563965, "learning_rate": 6.066498153718735e-05, "loss": 0.2188, "step": 92 }, { "epoch": 0.012088911997920187, "grad_norm": 3.9599173069000244, "learning_rate": 5.985585137257401e-05, "loss": 0.2934, "step": 93 }, { "epoch": 0.012218900298973092, "grad_norm": 1.8040279150009155, "learning_rate": 5.90440267166055e-05, "loss": 0.1396, "step": 94 }, { "epoch": 0.012348888600025998, "grad_norm": 2.4542367458343506, "learning_rate": 5.8229729514036705e-05, "loss": 0.158, "step": 95 }, { "epoch": 0.012478876901078902, "grad_norm": 2.886835813522339, "learning_rate": 5.74131823855921e-05, "loss": 0.1485, "step": 96 }, { "epoch": 0.012608865202131808, "grad_norm": 4.41395902633667, "learning_rate": 5.6594608567103456e-05, "loss": 0.1855, "step": 97 }, { "epoch": 0.012738853503184714, "grad_norm": 2.9605488777160645, "learning_rate": 5.577423184847932e-05, "loss": 0.1654, "step": 98 }, { "epoch": 0.012868841804237618, "grad_norm": 4.90177583694458, "learning_rate": 5.495227651252315e-05, "loss": 0.1931, "step": 99 }, { "epoch": 0.012998830105290524, "grad_norm": 6.01702880859375, "learning_rate": 5.4128967273616625e-05, "loss": 0.3606, "step": 100 }, { "epoch": 0.012998830105290524, "eval_loss": 0.14507097005844116, "eval_runtime": 1415.1288, "eval_samples_per_second": 9.156, "eval_steps_per_second": 2.29, "step": 100 }, { "epoch": 0.01312881840634343, "grad_norm": 7.9503583908081055, "learning_rate": 5.330452921628497e-05, "loss": 1.8375, "step": 101 }, { "epoch": 0.013258806707396334, "grad_norm": 6.233389854431152, "learning_rate": 5.247918773366112e-05, "loss": 1.3804, "step": 102 }, { "epoch": 0.01338879500844924, "grad_norm": 4.569299697875977, "learning_rate": 5.165316846586541e-05, "loss": 1.0248, "step": 103 }, { "epoch": 0.013518783309502146, "grad_norm": 5.200125694274902, "learning_rate": 5.0826697238317935e-05, "loss": 1.3332, "step": 104 }, { "epoch": 0.01364877161055505, "grad_norm": 5.104859352111816, "learning_rate": 5e-05, "loss": 0.907, "step": 105 }, { "epoch": 0.013778759911607956, "grad_norm": 3.8999855518341064, "learning_rate": 4.917330276168208e-05, "loss": 0.5831, "step": 106 }, { "epoch": 0.01390874821266086, "grad_norm": 4.339993000030518, "learning_rate": 4.834683153413459e-05, "loss": 0.3018, "step": 107 }, { "epoch": 0.014038736513713766, "grad_norm": 4.26090145111084, "learning_rate": 4.7520812266338885e-05, "loss": 0.4824, "step": 108 }, { "epoch": 0.014168724814766671, "grad_norm": 6.476984024047852, "learning_rate": 4.669547078371504e-05, "loss": 0.5009, "step": 109 }, { "epoch": 0.014298713115819576, "grad_norm": 6.847250461578369, "learning_rate": 4.5871032726383386e-05, "loss": 0.51, "step": 110 }, { "epoch": 0.014428701416872481, "grad_norm": 5.437959671020508, "learning_rate": 4.504772348747687e-05, "loss": 0.5212, "step": 111 }, { "epoch": 0.014558689717925387, "grad_norm": 4.896420478820801, "learning_rate": 4.4225768151520694e-05, "loss": 0.4352, "step": 112 }, { "epoch": 0.014688678018978291, "grad_norm": 5.313551425933838, "learning_rate": 4.3405391432896555e-05, "loss": 0.4218, "step": 113 }, { "epoch": 0.014818666320031197, "grad_norm": 4.771443843841553, "learning_rate": 4.2586817614407895e-05, "loss": 0.3939, "step": 114 }, { "epoch": 0.014948654621084103, "grad_norm": 3.681682825088501, "learning_rate": 4.17702704859633e-05, "loss": 0.5179, "step": 115 }, { "epoch": 0.015078642922137007, "grad_norm": 4.138582229614258, "learning_rate": 4.095597328339452e-05, "loss": 0.3389, "step": 116 }, { "epoch": 0.015208631223189913, "grad_norm": 4.465811252593994, "learning_rate": 4.0144148627425993e-05, "loss": 0.4372, "step": 117 }, { "epoch": 0.015338619524242819, "grad_norm": 4.707912445068359, "learning_rate": 3.933501846281267e-05, "loss": 0.4408, "step": 118 }, { "epoch": 0.015468607825295723, "grad_norm": 4.273300647735596, "learning_rate": 3.852880399766243e-05, "loss": 0.4049, "step": 119 }, { "epoch": 0.015598596126348629, "grad_norm": 3.297325849533081, "learning_rate": 3.772572564296005e-05, "loss": 0.2983, "step": 120 }, { "epoch": 0.015728584427401535, "grad_norm": 3.599745035171509, "learning_rate": 3.6926002952309016e-05, "loss": 0.332, "step": 121 }, { "epoch": 0.01585857272845444, "grad_norm": 3.2352163791656494, "learning_rate": 3.612985456190778e-05, "loss": 0.2846, "step": 122 }, { "epoch": 0.015988561029507343, "grad_norm": 5.459647178649902, "learning_rate": 3.533749813077677e-05, "loss": 0.3044, "step": 123 }, { "epoch": 0.01611854933056025, "grad_norm": 3.7283079624176025, "learning_rate": 3.4549150281252636e-05, "loss": 0.2579, "step": 124 }, { "epoch": 0.016248537631613155, "grad_norm": 2.8069612979888916, "learning_rate": 3.3765026539765834e-05, "loss": 0.2671, "step": 125 }, { "epoch": 0.01637852593266606, "grad_norm": 3.1048874855041504, "learning_rate": 3.298534127791785e-05, "loss": 0.1552, "step": 126 }, { "epoch": 0.016508514233718966, "grad_norm": 4.251136779785156, "learning_rate": 3.221030765387417e-05, "loss": 0.2977, "step": 127 }, { "epoch": 0.016638502534771872, "grad_norm": 5.176329135894775, "learning_rate": 3.144013755408895e-05, "loss": 0.2595, "step": 128 }, { "epoch": 0.016768490835824774, "grad_norm": 4.689629554748535, "learning_rate": 3.0675041535377405e-05, "loss": 0.3759, "step": 129 }, { "epoch": 0.01689847913687768, "grad_norm": 3.1052284240722656, "learning_rate": 2.991522876735154e-05, "loss": 0.1697, "step": 130 }, { "epoch": 0.017028467437930586, "grad_norm": 4.357040882110596, "learning_rate": 2.916090697523549e-05, "loss": 0.2108, "step": 131 }, { "epoch": 0.017158455738983492, "grad_norm": 5.174962520599365, "learning_rate": 2.8412282383075363e-05, "loss": 0.2725, "step": 132 }, { "epoch": 0.017288444040036398, "grad_norm": 4.341920375823975, "learning_rate": 2.766955965735968e-05, "loss": 0.1915, "step": 133 }, { "epoch": 0.017418432341089304, "grad_norm": 4.376591682434082, "learning_rate": 2.693294185106562e-05, "loss": 0.2614, "step": 134 }, { "epoch": 0.017548420642142206, "grad_norm": 3.130718231201172, "learning_rate": 2.6202630348146324e-05, "loss": 0.1921, "step": 135 }, { "epoch": 0.017678408943195112, "grad_norm": 2.5387256145477295, "learning_rate": 2.547882480847461e-05, "loss": 0.1437, "step": 136 }, { "epoch": 0.017808397244248018, "grad_norm": 4.208658695220947, "learning_rate": 2.476172311325783e-05, "loss": 0.2688, "step": 137 }, { "epoch": 0.017938385545300924, "grad_norm": 2.6489620208740234, "learning_rate": 2.405152131093926e-05, "loss": 0.2148, "step": 138 }, { "epoch": 0.01806837384635383, "grad_norm": 4.43926477432251, "learning_rate": 2.3348413563600325e-05, "loss": 0.2774, "step": 139 }, { "epoch": 0.018198362147406732, "grad_norm": 2.5864620208740234, "learning_rate": 2.2652592093878666e-05, "loss": 0.159, "step": 140 }, { "epoch": 0.018328350448459638, "grad_norm": 4.264463424682617, "learning_rate": 2.196424713241637e-05, "loss": 0.321, "step": 141 }, { "epoch": 0.018458338749512544, "grad_norm": 5.146592140197754, "learning_rate": 2.128356686585282e-05, "loss": 0.18, "step": 142 }, { "epoch": 0.01858832705056545, "grad_norm": 3.3703808784484863, "learning_rate": 2.061073738537635e-05, "loss": 0.1344, "step": 143 }, { "epoch": 0.018718315351618355, "grad_norm": 1.4023449420928955, "learning_rate": 1.9945942635848748e-05, "loss": 0.063, "step": 144 }, { "epoch": 0.01884830365267126, "grad_norm": 3.0542232990264893, "learning_rate": 1.928936436551661e-05, "loss": 0.16, "step": 145 }, { "epoch": 0.018978291953724163, "grad_norm": 2.733213424682617, "learning_rate": 1.8641182076323148e-05, "loss": 0.1332, "step": 146 }, { "epoch": 0.01910828025477707, "grad_norm": 2.248521327972412, "learning_rate": 1.800157297483417e-05, "loss": 0.1503, "step": 147 }, { "epoch": 0.019238268555829975, "grad_norm": 3.6398096084594727, "learning_rate": 1.7370711923791567e-05, "loss": 0.2957, "step": 148 }, { "epoch": 0.01936825685688288, "grad_norm": 2.906625747680664, "learning_rate": 1.6748771394307585e-05, "loss": 0.1492, "step": 149 }, { "epoch": 0.019498245157935787, "grad_norm": 2.7944815158843994, "learning_rate": 1.6135921418712956e-05, "loss": 0.1233, "step": 150 }, { "epoch": 0.019498245157935787, "eval_loss": 0.11835423856973648, "eval_runtime": 1419.5313, "eval_samples_per_second": 9.128, "eval_steps_per_second": 2.282, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3468367347397427e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }