{ "best_metric": 1.0028241872787476, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.1305909239307868, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000652954619653934, "grad_norm": 2.9607129096984863, "learning_rate": 6e-06, "loss": 0.9838, "step": 1 }, { "epoch": 0.000652954619653934, "eval_loss": 1.7666047811508179, "eval_runtime": 120.808, "eval_samples_per_second": 21.348, "eval_steps_per_second": 5.339, "step": 1 }, { "epoch": 0.001305909239307868, "grad_norm": 2.886413097381592, "learning_rate": 1.2e-05, "loss": 1.07, "step": 2 }, { "epoch": 0.0019588638589618022, "grad_norm": 3.1968891620635986, "learning_rate": 1.8e-05, "loss": 1.07, "step": 3 }, { "epoch": 0.002611818478615736, "grad_norm": 2.6857988834381104, "learning_rate": 2.4e-05, "loss": 1.0531, "step": 4 }, { "epoch": 0.00326477309826967, "grad_norm": 3.397486925125122, "learning_rate": 3e-05, "loss": 1.0427, "step": 5 }, { "epoch": 0.0039177277179236044, "grad_norm": 2.537473678588867, "learning_rate": 3.6e-05, "loss": 1.0788, "step": 6 }, { "epoch": 0.004570682337577538, "grad_norm": 3.305051565170288, "learning_rate": 4.2e-05, "loss": 1.0845, "step": 7 }, { "epoch": 0.005223636957231472, "grad_norm": 4.0556159019470215, "learning_rate": 4.8e-05, "loss": 1.1394, "step": 8 }, { "epoch": 0.005876591576885406, "grad_norm": 3.8339462280273438, "learning_rate": 5.4000000000000005e-05, "loss": 1.1144, "step": 9 }, { "epoch": 0.00652954619653934, "grad_norm": 3.7050435543060303, "learning_rate": 6e-05, "loss": 1.1493, "step": 10 }, { "epoch": 0.007182500816193275, "grad_norm": 3.626809597015381, "learning_rate": 5.999589914977407e-05, "loss": 1.0889, "step": 11 }, { "epoch": 0.007835455435847209, "grad_norm": 2.6016409397125244, "learning_rate": 5.998359772022778e-05, "loss": 1.0795, "step": 12 }, { "epoch": 0.008488410055501142, "grad_norm": 3.216207981109619, "learning_rate": 5.996309907444915e-05, "loss": 1.1189, "step": 13 }, { "epoch": 0.009141364675155077, "grad_norm": 2.8661720752716064, "learning_rate": 5.9934408816563236e-05, "loss": 1.1257, "step": 14 }, { "epoch": 0.009794319294809012, "grad_norm": 3.571786403656006, "learning_rate": 5.98975347902001e-05, "loss": 1.1956, "step": 15 }, { "epoch": 0.010447273914462945, "grad_norm": 3.807826519012451, "learning_rate": 5.9852487076350345e-05, "loss": 1.1381, "step": 16 }, { "epoch": 0.01110022853411688, "grad_norm": 3.1027400493621826, "learning_rate": 5.979927799060915e-05, "loss": 1.2279, "step": 17 }, { "epoch": 0.011753183153770812, "grad_norm": 8.466021537780762, "learning_rate": 5.9737922079809257e-05, "loss": 1.0925, "step": 18 }, { "epoch": 0.012406137773424747, "grad_norm": 5.115357875823975, "learning_rate": 5.9668436118044054e-05, "loss": 1.1874, "step": 19 }, { "epoch": 0.01305909239307868, "grad_norm": 3.0616984367370605, "learning_rate": 5.959083910208167e-05, "loss": 1.1619, "step": 20 }, { "epoch": 0.013712047012732615, "grad_norm": 3.804137706756592, "learning_rate": 5.9505152246171474e-05, "loss": 1.2, "step": 21 }, { "epoch": 0.01436500163238655, "grad_norm": 4.98964786529541, "learning_rate": 5.941139897624428e-05, "loss": 1.2752, "step": 22 }, { "epoch": 0.015017956252040483, "grad_norm": 5.878356456756592, "learning_rate": 5.9309604923507984e-05, "loss": 1.1876, "step": 23 }, { "epoch": 0.015670910871694418, "grad_norm": 4.656700134277344, "learning_rate": 5.9199797917440176e-05, "loss": 1.1782, "step": 24 }, { "epoch": 0.01632386549134835, "grad_norm": 4.261521816253662, "learning_rate": 5.908200797817991e-05, "loss": 1.0939, "step": 25 }, { "epoch": 0.016976820111002284, "grad_norm": 4.2994561195373535, "learning_rate": 5.895626730832046e-05, "loss": 1.2696, "step": 26 }, { "epoch": 0.01762977473065622, "grad_norm": 4.130359172821045, "learning_rate": 5.882261028410545e-05, "loss": 1.0851, "step": 27 }, { "epoch": 0.018282729350310153, "grad_norm": 4.892334461212158, "learning_rate": 5.8681073446030734e-05, "loss": 1.238, "step": 28 }, { "epoch": 0.018935683969964087, "grad_norm": 5.284782886505127, "learning_rate": 5.853169548885461e-05, "loss": 1.1572, "step": 29 }, { "epoch": 0.019588638589618023, "grad_norm": 5.572746753692627, "learning_rate": 5.8374517251019035e-05, "loss": 1.2621, "step": 30 }, { "epoch": 0.020241593209271956, "grad_norm": 5.857466220855713, "learning_rate": 5.820958170348484e-05, "loss": 1.2328, "step": 31 }, { "epoch": 0.02089454782892589, "grad_norm": 5.471181869506836, "learning_rate": 5.8036933937983825e-05, "loss": 1.1946, "step": 32 }, { "epoch": 0.021547502448579822, "grad_norm": 6.936777591705322, "learning_rate": 5.7856621154691217e-05, "loss": 1.3557, "step": 33 }, { "epoch": 0.02220045706823376, "grad_norm": 7.256147384643555, "learning_rate": 5.766869264932154e-05, "loss": 1.3517, "step": 34 }, { "epoch": 0.022853411687887692, "grad_norm": 6.057352066040039, "learning_rate": 5.747319979965172e-05, "loss": 1.32, "step": 35 }, { "epoch": 0.023506366307541625, "grad_norm": 7.9997992515563965, "learning_rate": 5.727019605147488e-05, "loss": 1.3627, "step": 36 }, { "epoch": 0.02415932092719556, "grad_norm": 7.625100612640381, "learning_rate": 5.7059736903988775e-05, "loss": 1.4194, "step": 37 }, { "epoch": 0.024812275546849494, "grad_norm": 9.102681159973145, "learning_rate": 5.684187989462291e-05, "loss": 1.3099, "step": 38 }, { "epoch": 0.025465230166503428, "grad_norm": 28.361309051513672, "learning_rate": 5.661668458330836e-05, "loss": 1.3782, "step": 39 }, { "epoch": 0.02611818478615736, "grad_norm": 11.431262969970703, "learning_rate": 5.638421253619467e-05, "loss": 1.4085, "step": 40 }, { "epoch": 0.026771139405811297, "grad_norm": 10.391942024230957, "learning_rate": 5.614452730881832e-05, "loss": 1.1281, "step": 41 }, { "epoch": 0.02742409402546523, "grad_norm": 8.589218139648438, "learning_rate": 5.589769442872722e-05, "loss": 1.4111, "step": 42 }, { "epoch": 0.028077048645119163, "grad_norm": 10.96375846862793, "learning_rate": 5.5643781377566175e-05, "loss": 1.3594, "step": 43 }, { "epoch": 0.0287300032647731, "grad_norm": 9.945216178894043, "learning_rate": 5.538285757262806e-05, "loss": 1.3844, "step": 44 }, { "epoch": 0.029382957884427033, "grad_norm": 9.55181884765625, "learning_rate": 5.5114994347875856e-05, "loss": 1.5334, "step": 45 }, { "epoch": 0.030035912504080966, "grad_norm": 14.02538776397705, "learning_rate": 5.48402649344406e-05, "loss": 1.0552, "step": 46 }, { "epoch": 0.0306888671237349, "grad_norm": 10.34814739227295, "learning_rate": 5.455874444060078e-05, "loss": 1.3677, "step": 47 }, { "epoch": 0.031341821743388835, "grad_norm": 12.232339859008789, "learning_rate": 5.427050983124843e-05, "loss": 1.3311, "step": 48 }, { "epoch": 0.031994776363042765, "grad_norm": 16.909160614013672, "learning_rate": 5.397563990684774e-05, "loss": 1.354, "step": 49 }, { "epoch": 0.0326477309826967, "grad_norm": 11.846768379211426, "learning_rate": 5.367421528189181e-05, "loss": 2.1489, "step": 50 }, { "epoch": 0.0326477309826967, "eval_loss": 1.139583945274353, "eval_runtime": 122.1204, "eval_samples_per_second": 21.119, "eval_steps_per_second": 5.282, "step": 50 }, { "epoch": 0.03330068560235064, "grad_norm": 2.604781150817871, "learning_rate": 5.336631836286338e-05, "loss": 0.9021, "step": 51 }, { "epoch": 0.03395364022200457, "grad_norm": 3.153728485107422, "learning_rate": 5.3052033325705774e-05, "loss": 1.0348, "step": 52 }, { "epoch": 0.034606594841658504, "grad_norm": 3.466165781021118, "learning_rate": 5.2731446092810044e-05, "loss": 0.9443, "step": 53 }, { "epoch": 0.03525954946131244, "grad_norm": 3.162385940551758, "learning_rate": 5.240464430952462e-05, "loss": 0.9966, "step": 54 }, { "epoch": 0.03591250408096637, "grad_norm": 3.3062691688537598, "learning_rate": 5.207171732019395e-05, "loss": 1.0428, "step": 55 }, { "epoch": 0.03656545870062031, "grad_norm": 3.6018426418304443, "learning_rate": 5.1732756143732675e-05, "loss": 0.9448, "step": 56 }, { "epoch": 0.03721841332027424, "grad_norm": 3.296633243560791, "learning_rate": 5.1387853448741916e-05, "loss": 0.9189, "step": 57 }, { "epoch": 0.03787136793992817, "grad_norm": 3.173064708709717, "learning_rate": 5.103710352817465e-05, "loss": 1.0277, "step": 58 }, { "epoch": 0.03852432255958211, "grad_norm": 3.3730928897857666, "learning_rate": 5.068060227355698e-05, "loss": 0.8936, "step": 59 }, { "epoch": 0.039177277179236046, "grad_norm": 3.60870623588562, "learning_rate": 5.0318447148772234e-05, "loss": 1.0102, "step": 60 }, { "epoch": 0.039830231798889976, "grad_norm": 3.396580219268799, "learning_rate": 4.995073716341545e-05, "loss": 1.1103, "step": 61 }, { "epoch": 0.04048318641854391, "grad_norm": 4.471563339233398, "learning_rate": 4.957757284572506e-05, "loss": 0.9807, "step": 62 }, { "epoch": 0.04113614103819784, "grad_norm": 3.622544527053833, "learning_rate": 4.91990562150995e-05, "loss": 1.0711, "step": 63 }, { "epoch": 0.04178909565785178, "grad_norm": 3.4821181297302246, "learning_rate": 4.881529075420611e-05, "loss": 1.0609, "step": 64 }, { "epoch": 0.042442050277505715, "grad_norm": 4.031519412994385, "learning_rate": 4.8426381380690036e-05, "loss": 1.1398, "step": 65 }, { "epoch": 0.043095004897159644, "grad_norm": 3.9957878589630127, "learning_rate": 4.8032434418490753e-05, "loss": 1.1471, "step": 66 }, { "epoch": 0.04374795951681358, "grad_norm": 3.3050076961517334, "learning_rate": 4.7633557568774194e-05, "loss": 1.0089, "step": 67 }, { "epoch": 0.04440091413646752, "grad_norm": 5.516901016235352, "learning_rate": 4.722985988048831e-05, "loss": 1.0998, "step": 68 }, { "epoch": 0.04505386875612145, "grad_norm": 3.5296335220336914, "learning_rate": 4.6821451720550184e-05, "loss": 1.035, "step": 69 }, { "epoch": 0.045706823375775384, "grad_norm": 3.975694417953491, "learning_rate": 4.640844474367282e-05, "loss": 1.044, "step": 70 }, { "epoch": 0.04635977799542932, "grad_norm": 3.943890333175659, "learning_rate": 4.5990951861839815e-05, "loss": 1.1156, "step": 71 }, { "epoch": 0.04701273261508325, "grad_norm": 2.91097092628479, "learning_rate": 4.5569087213436455e-05, "loss": 1.1876, "step": 72 }, { "epoch": 0.047665687234737186, "grad_norm": 3.305521011352539, "learning_rate": 4.514296613204532e-05, "loss": 1.1768, "step": 73 }, { "epoch": 0.04831864185439112, "grad_norm": 3.6090002059936523, "learning_rate": 4.471270511491525e-05, "loss": 1.1285, "step": 74 }, { "epoch": 0.04897159647404505, "grad_norm": 3.2527575492858887, "learning_rate": 4.427842179111221e-05, "loss": 1.1102, "step": 75 }, { "epoch": 0.04962455109369899, "grad_norm": 3.3647749423980713, "learning_rate": 4.3840234889360634e-05, "loss": 1.2065, "step": 76 }, { "epoch": 0.05027750571335292, "grad_norm": 3.749720573425293, "learning_rate": 4.33982642055842e-05, "loss": 1.1856, "step": 77 }, { "epoch": 0.050930460333006855, "grad_norm": 3.3767154216766357, "learning_rate": 4.2952630570154785e-05, "loss": 1.1158, "step": 78 }, { "epoch": 0.05158341495266079, "grad_norm": 3.9333693981170654, "learning_rate": 4.250345581485871e-05, "loss": 1.0961, "step": 79 }, { "epoch": 0.05223636957231472, "grad_norm": 4.0832414627075195, "learning_rate": 4.205086273958909e-05, "loss": 1.0504, "step": 80 }, { "epoch": 0.05288932419196866, "grad_norm": 4.657516956329346, "learning_rate": 4.1594975078773565e-05, "loss": 1.1912, "step": 81 }, { "epoch": 0.053542278811622594, "grad_norm": 4.331697940826416, "learning_rate": 4.113591746754662e-05, "loss": 1.2054, "step": 82 }, { "epoch": 0.054195233431276524, "grad_norm": 4.666406631469727, "learning_rate": 4.06738154076755e-05, "loss": 1.0689, "step": 83 }, { "epoch": 0.05484818805093046, "grad_norm": 4.582425117492676, "learning_rate": 4.020879523324929e-05, "loss": 1.0477, "step": 84 }, { "epoch": 0.0555011426705844, "grad_norm": 5.275778293609619, "learning_rate": 3.974098407614051e-05, "loss": 1.1634, "step": 85 }, { "epoch": 0.056154097290238326, "grad_norm": 7.426105499267578, "learning_rate": 3.927050983124842e-05, "loss": 0.9985, "step": 86 }, { "epoch": 0.05680705190989226, "grad_norm": 7.216943264007568, "learning_rate": 3.8797501121533946e-05, "loss": 1.0157, "step": 87 }, { "epoch": 0.0574600065295462, "grad_norm": 6.098451137542725, "learning_rate": 3.832208726285534e-05, "loss": 1.1652, "step": 88 }, { "epoch": 0.05811296114920013, "grad_norm": 8.16521167755127, "learning_rate": 3.784439822861459e-05, "loss": 1.1323, "step": 89 }, { "epoch": 0.058765915768854066, "grad_norm": 7.356335163116455, "learning_rate": 3.7364564614223976e-05, "loss": 1.22, "step": 90 }, { "epoch": 0.059418870388507995, "grad_norm": 9.59832763671875, "learning_rate": 3.688271760140255e-05, "loss": 0.9902, "step": 91 }, { "epoch": 0.06007182500816193, "grad_norm": 8.946552276611328, "learning_rate": 3.6398988922312406e-05, "loss": 1.1894, "step": 92 }, { "epoch": 0.06072477962781587, "grad_norm": 8.753064155578613, "learning_rate": 3.591351082354441e-05, "loss": 0.9602, "step": 93 }, { "epoch": 0.0613777342474698, "grad_norm": 8.148518562316895, "learning_rate": 3.54264160299633e-05, "loss": 0.9199, "step": 94 }, { "epoch": 0.062030688867123734, "grad_norm": 12.24012565612793, "learning_rate": 3.493783770842202e-05, "loss": 1.4617, "step": 95 }, { "epoch": 0.06268364348677767, "grad_norm": 8.7044038772583, "learning_rate": 3.444790943135526e-05, "loss": 1.4332, "step": 96 }, { "epoch": 0.06333659810643161, "grad_norm": 10.754229545593262, "learning_rate": 3.3956765140262074e-05, "loss": 1.2384, "step": 97 }, { "epoch": 0.06398955272608553, "grad_norm": 7.212235927581787, "learning_rate": 3.346453910908759e-05, "loss": 1.0087, "step": 98 }, { "epoch": 0.06464250734573947, "grad_norm": 6.35690450668335, "learning_rate": 3.297136590751389e-05, "loss": 1.1121, "step": 99 }, { "epoch": 0.0652954619653934, "grad_norm": 9.522856712341309, "learning_rate": 3.247738036416998e-05, "loss": 1.295, "step": 100 }, { "epoch": 0.0652954619653934, "eval_loss": 1.0394381284713745, "eval_runtime": 122.5877, "eval_samples_per_second": 21.038, "eval_steps_per_second": 5.262, "step": 100 }, { "epoch": 0.06594841658504734, "grad_norm": 2.2772409915924072, "learning_rate": 3.1982717529770985e-05, "loss": 0.9383, "step": 101 }, { "epoch": 0.06660137120470128, "grad_norm": 2.253962278366089, "learning_rate": 3.148751264019667e-05, "loss": 0.9758, "step": 102 }, { "epoch": 0.06725432582435521, "grad_norm": 2.918980598449707, "learning_rate": 3.099190107951924e-05, "loss": 0.9812, "step": 103 }, { "epoch": 0.06790728044400914, "grad_norm": 2.450403928756714, "learning_rate": 3.049601834299076e-05, "loss": 0.9601, "step": 104 }, { "epoch": 0.06856023506366307, "grad_norm": 2.9561350345611572, "learning_rate": 3e-05, "loss": 0.9834, "step": 105 }, { "epoch": 0.06921318968331701, "grad_norm": 2.328831195831299, "learning_rate": 2.9503981657009246e-05, "loss": 0.9963, "step": 106 }, { "epoch": 0.06986614430297095, "grad_norm": 2.268932580947876, "learning_rate": 2.9008098920480752e-05, "loss": 1.0565, "step": 107 }, { "epoch": 0.07051909892262488, "grad_norm": 2.5179338455200195, "learning_rate": 2.851248735980333e-05, "loss": 1.0076, "step": 108 }, { "epoch": 0.07117205354227882, "grad_norm": 2.6005704402923584, "learning_rate": 2.801728247022902e-05, "loss": 1.0477, "step": 109 }, { "epoch": 0.07182500816193274, "grad_norm": 2.822859048843384, "learning_rate": 2.7522619635830034e-05, "loss": 1.0568, "step": 110 }, { "epoch": 0.07247796278158668, "grad_norm": 2.977989435195923, "learning_rate": 2.702863409248612e-05, "loss": 1.0618, "step": 111 }, { "epoch": 0.07313091740124061, "grad_norm": 2.6255922317504883, "learning_rate": 2.6535460890912416e-05, "loss": 0.9556, "step": 112 }, { "epoch": 0.07378387202089455, "grad_norm": 2.65173077583313, "learning_rate": 2.604323485973793e-05, "loss": 1.0104, "step": 113 }, { "epoch": 0.07443682664054849, "grad_norm": 2.734870672225952, "learning_rate": 2.555209056864474e-05, "loss": 1.0723, "step": 114 }, { "epoch": 0.07508978126020241, "grad_norm": 2.6934852600097656, "learning_rate": 2.5062162291577978e-05, "loss": 1.0174, "step": 115 }, { "epoch": 0.07574273587985635, "grad_norm": 2.8896446228027344, "learning_rate": 2.4573583970036712e-05, "loss": 1.0758, "step": 116 }, { "epoch": 0.07639569049951028, "grad_norm": 5.038157939910889, "learning_rate": 2.4086489176455595e-05, "loss": 0.9793, "step": 117 }, { "epoch": 0.07704864511916422, "grad_norm": 2.9270153045654297, "learning_rate": 2.36010110776876e-05, "loss": 1.0536, "step": 118 }, { "epoch": 0.07770159973881816, "grad_norm": 3.7477188110351562, "learning_rate": 2.3117282398597456e-05, "loss": 0.9753, "step": 119 }, { "epoch": 0.07835455435847209, "grad_norm": 2.9560868740081787, "learning_rate": 2.263543538577603e-05, "loss": 1.0558, "step": 120 }, { "epoch": 0.07900750897812601, "grad_norm": 2.799459218978882, "learning_rate": 2.215560177138541e-05, "loss": 0.9952, "step": 121 }, { "epoch": 0.07966046359777995, "grad_norm": 3.058806896209717, "learning_rate": 2.167791273714467e-05, "loss": 1.129, "step": 122 }, { "epoch": 0.08031341821743389, "grad_norm": 3.49177885055542, "learning_rate": 2.1202498878466062e-05, "loss": 1.1132, "step": 123 }, { "epoch": 0.08096637283708782, "grad_norm": 3.7546982765197754, "learning_rate": 2.072949016875158e-05, "loss": 1.1489, "step": 124 }, { "epoch": 0.08161932745674176, "grad_norm": 3.177138566970825, "learning_rate": 2.0259015923859498e-05, "loss": 1.0056, "step": 125 }, { "epoch": 0.08227228207639568, "grad_norm": 3.8052380084991455, "learning_rate": 1.979120476675071e-05, "loss": 1.0421, "step": 126 }, { "epoch": 0.08292523669604962, "grad_norm": 4.195981979370117, "learning_rate": 1.9326184592324503e-05, "loss": 1.006, "step": 127 }, { "epoch": 0.08357819131570356, "grad_norm": 5.595157146453857, "learning_rate": 1.8864082532453373e-05, "loss": 1.1052, "step": 128 }, { "epoch": 0.0842311459353575, "grad_norm": 4.070886611938477, "learning_rate": 1.840502492122644e-05, "loss": 1.0596, "step": 129 }, { "epoch": 0.08488410055501143, "grad_norm": 4.806919574737549, "learning_rate": 1.7949137260410924e-05, "loss": 1.2327, "step": 130 }, { "epoch": 0.08553705517466537, "grad_norm": 4.85729455947876, "learning_rate": 1.7496544185141295e-05, "loss": 1.1303, "step": 131 }, { "epoch": 0.08619000979431929, "grad_norm": 4.837055206298828, "learning_rate": 1.7047369429845216e-05, "loss": 1.0845, "step": 132 }, { "epoch": 0.08684296441397323, "grad_norm": 7.230628490447998, "learning_rate": 1.6601735794415806e-05, "loss": 0.9928, "step": 133 }, { "epoch": 0.08749591903362716, "grad_norm": 5.271618366241455, "learning_rate": 1.615976511063937e-05, "loss": 1.0808, "step": 134 }, { "epoch": 0.0881488736532811, "grad_norm": 7.045250415802002, "learning_rate": 1.5721578208887793e-05, "loss": 1.0926, "step": 135 }, { "epoch": 0.08880182827293504, "grad_norm": 6.365724086761475, "learning_rate": 1.5287294885084766e-05, "loss": 1.0479, "step": 136 }, { "epoch": 0.08945478289258897, "grad_norm": 7.801453590393066, "learning_rate": 1.4857033867954697e-05, "loss": 1.1132, "step": 137 }, { "epoch": 0.0901077375122429, "grad_norm": 7.563957691192627, "learning_rate": 1.4430912786563554e-05, "loss": 1.315, "step": 138 }, { "epoch": 0.09076069213189683, "grad_norm": 7.684714317321777, "learning_rate": 1.4009048138160195e-05, "loss": 0.9579, "step": 139 }, { "epoch": 0.09141364675155077, "grad_norm": 9.783828735351562, "learning_rate": 1.3591555256327199e-05, "loss": 1.1081, "step": 140 }, { "epoch": 0.0920666013712047, "grad_norm": 11.417814254760742, "learning_rate": 1.3178548279449822e-05, "loss": 0.8645, "step": 141 }, { "epoch": 0.09271955599085864, "grad_norm": 10.159150123596191, "learning_rate": 1.2770140119511693e-05, "loss": 1.1358, "step": 142 }, { "epoch": 0.09337251061051256, "grad_norm": 8.352884292602539, "learning_rate": 1.2366442431225809e-05, "loss": 1.3205, "step": 143 }, { "epoch": 0.0940254652301665, "grad_norm": 7.829765319824219, "learning_rate": 1.1967565581509248e-05, "loss": 1.0611, "step": 144 }, { "epoch": 0.09467841984982044, "grad_norm": 10.132458686828613, "learning_rate": 1.1573618619309965e-05, "loss": 1.0457, "step": 145 }, { "epoch": 0.09533137446947437, "grad_norm": 16.793420791625977, "learning_rate": 1.1184709245793889e-05, "loss": 0.9384, "step": 146 }, { "epoch": 0.09598432908912831, "grad_norm": 8.721817016601562, "learning_rate": 1.0800943784900502e-05, "loss": 0.9181, "step": 147 }, { "epoch": 0.09663728370878225, "grad_norm": 9.623841285705566, "learning_rate": 1.042242715427494e-05, "loss": 0.8591, "step": 148 }, { "epoch": 0.09729023832843617, "grad_norm": 11.314678192138672, "learning_rate": 1.004926283658455e-05, "loss": 1.2406, "step": 149 }, { "epoch": 0.0979431929480901, "grad_norm": 11.438794136047363, "learning_rate": 9.681552851227774e-06, "loss": 1.5576, "step": 150 }, { "epoch": 0.0979431929480901, "eval_loss": 1.0103774070739746, "eval_runtime": 122.1066, "eval_samples_per_second": 21.121, "eval_steps_per_second": 5.282, "step": 150 }, { "epoch": 0.09859614756774404, "grad_norm": 2.021636724472046, "learning_rate": 9.319397726443026e-06, "loss": 0.9084, "step": 151 }, { "epoch": 0.09924910218739798, "grad_norm": 2.2391059398651123, "learning_rate": 8.962896471825342e-06, "loss": 0.9427, "step": 152 }, { "epoch": 0.09990205680705191, "grad_norm": 3.0561001300811768, "learning_rate": 8.61214655125809e-06, "loss": 0.8915, "step": 153 }, { "epoch": 0.10055501142670584, "grad_norm": 2.604928493499756, "learning_rate": 8.267243856267331e-06, "loss": 0.9242, "step": 154 }, { "epoch": 0.10120796604635977, "grad_norm": 2.782693386077881, "learning_rate": 7.928282679806052e-06, "loss": 0.923, "step": 155 }, { "epoch": 0.10186092066601371, "grad_norm": 2.6287972927093506, "learning_rate": 7.595355690475393e-06, "loss": 0.9864, "step": 156 }, { "epoch": 0.10251387528566765, "grad_norm": 2.7638707160949707, "learning_rate": 7.268553907189964e-06, "loss": 0.9488, "step": 157 }, { "epoch": 0.10316682990532158, "grad_norm": 3.8539583683013916, "learning_rate": 6.947966674294236e-06, "loss": 0.9282, "step": 158 }, { "epoch": 0.10381978452497552, "grad_norm": 2.9983620643615723, "learning_rate": 6.6336816371366305e-06, "loss": 0.9185, "step": 159 }, { "epoch": 0.10447273914462944, "grad_norm": 2.6667168140411377, "learning_rate": 6.325784718108196e-06, "loss": 1.0198, "step": 160 }, { "epoch": 0.10512569376428338, "grad_norm": 2.199368476867676, "learning_rate": 6.0243600931522595e-06, "loss": 0.9514, "step": 161 }, { "epoch": 0.10577864838393732, "grad_norm": 2.339479684829712, "learning_rate": 5.72949016875158e-06, "loss": 0.8627, "step": 162 }, { "epoch": 0.10643160300359125, "grad_norm": 2.4051907062530518, "learning_rate": 5.44125555939923e-06, "loss": 0.9412, "step": 163 }, { "epoch": 0.10708455762324519, "grad_norm": 2.6235029697418213, "learning_rate": 5.159735065559399e-06, "loss": 0.9661, "step": 164 }, { "epoch": 0.10773751224289912, "grad_norm": 2.3630259037017822, "learning_rate": 4.885005652124144e-06, "loss": 1.161, "step": 165 }, { "epoch": 0.10839046686255305, "grad_norm": 2.62069034576416, "learning_rate": 4.617142427371934e-06, "loss": 1.0818, "step": 166 }, { "epoch": 0.10904342148220698, "grad_norm": 3.125945568084717, "learning_rate": 4.3562186224338265e-06, "loss": 1.0923, "step": 167 }, { "epoch": 0.10969637610186092, "grad_norm": 2.823786497116089, "learning_rate": 4.102305571272783e-06, "loss": 0.9918, "step": 168 }, { "epoch": 0.11034933072151486, "grad_norm": 3.086836338043213, "learning_rate": 3.855472691181678e-06, "loss": 1.0645, "step": 169 }, { "epoch": 0.1110022853411688, "grad_norm": 2.968472719192505, "learning_rate": 3.615787463805331e-06, "loss": 1.0943, "step": 170 }, { "epoch": 0.11165523996082272, "grad_norm": 3.3311610221862793, "learning_rate": 3.383315416691646e-06, "loss": 1.0482, "step": 171 }, { "epoch": 0.11230819458047665, "grad_norm": 2.8356263637542725, "learning_rate": 3.158120105377096e-06, "loss": 1.1126, "step": 172 }, { "epoch": 0.11296114920013059, "grad_norm": 3.495161294937134, "learning_rate": 2.940263096011233e-06, "loss": 1.1292, "step": 173 }, { "epoch": 0.11361410381978453, "grad_norm": 3.611652135848999, "learning_rate": 2.729803948525125e-06, "loss": 1.2087, "step": 174 }, { "epoch": 0.11426705843943846, "grad_norm": 3.4223086833953857, "learning_rate": 2.526800200348275e-06, "loss": 1.0086, "step": 175 }, { "epoch": 0.1149200130590924, "grad_norm": 3.590174913406372, "learning_rate": 2.3313073506784575e-06, "loss": 1.0571, "step": 176 }, { "epoch": 0.11557296767874632, "grad_norm": 5.3133649826049805, "learning_rate": 2.143378845308791e-06, "loss": 1.079, "step": 177 }, { "epoch": 0.11622592229840026, "grad_norm": 4.763728141784668, "learning_rate": 1.9630660620161777e-06, "loss": 1.1924, "step": 178 }, { "epoch": 0.1168788769180542, "grad_norm": 4.005756378173828, "learning_rate": 1.790418296515165e-06, "loss": 1.1013, "step": 179 }, { "epoch": 0.11753183153770813, "grad_norm": 4.59070348739624, "learning_rate": 1.625482748980961e-06, "loss": 1.0473, "step": 180 }, { "epoch": 0.11818478615736207, "grad_norm": 5.296746253967285, "learning_rate": 1.4683045111453942e-06, "loss": 1.2648, "step": 181 }, { "epoch": 0.11883774077701599, "grad_norm": 4.656338691711426, "learning_rate": 1.3189265539692707e-06, "loss": 1.0275, "step": 182 }, { "epoch": 0.11949069539666993, "grad_norm": 5.707005977630615, "learning_rate": 1.1773897158945557e-06, "loss": 1.1523, "step": 183 }, { "epoch": 0.12014365001632386, "grad_norm": 5.811980247497559, "learning_rate": 1.0437326916795432e-06, "loss": 1.1355, "step": 184 }, { "epoch": 0.1207966046359778, "grad_norm": 5.786341190338135, "learning_rate": 9.179920218200888e-07, "loss": 1.0562, "step": 185 }, { "epoch": 0.12144955925563174, "grad_norm": 9.454066276550293, "learning_rate": 8.002020825598277e-07, "loss": 1.1791, "step": 186 }, { "epoch": 0.12210251387528567, "grad_norm": 7.248098850250244, "learning_rate": 6.90395076492022e-07, "loss": 0.9874, "step": 187 }, { "epoch": 0.1227554684949396, "grad_norm": 6.36809778213501, "learning_rate": 5.886010237557194e-07, "loss": 0.9503, "step": 188 }, { "epoch": 0.12340842311459353, "grad_norm": 6.659921646118164, "learning_rate": 4.94847753828529e-07, "loss": 1.0327, "step": 189 }, { "epoch": 0.12406137773424747, "grad_norm": 6.8771562576293945, "learning_rate": 4.091608979183303e-07, "loss": 0.9623, "step": 190 }, { "epoch": 0.1247143323539014, "grad_norm": 6.988889694213867, "learning_rate": 3.315638819559452e-07, "loss": 1.0579, "step": 191 }, { "epoch": 0.12536728697355534, "grad_norm": 7.6399641036987305, "learning_rate": 2.6207792019074414e-07, "loss": 0.8439, "step": 192 }, { "epoch": 0.12602024159320926, "grad_norm": 10.259600639343262, "learning_rate": 2.0072200939085573e-07, "loss": 1.2599, "step": 193 }, { "epoch": 0.12667319621286321, "grad_norm": 11.773065567016602, "learning_rate": 1.475129236496575e-07, "loss": 1.0644, "step": 194 }, { "epoch": 0.12732615083251714, "grad_norm": 7.667246341705322, "learning_rate": 1.0246520979990459e-07, "loss": 1.133, "step": 195 }, { "epoch": 0.12797910545217106, "grad_norm": 9.011076927185059, "learning_rate": 6.559118343676396e-08, "loss": 0.992, "step": 196 }, { "epoch": 0.128632060071825, "grad_norm": 16.59012794494629, "learning_rate": 3.690092555085789e-08, "loss": 1.1446, "step": 197 }, { "epoch": 0.12928501469147893, "grad_norm": 13.964167594909668, "learning_rate": 1.640227977221853e-08, "loss": 1.1686, "step": 198 }, { "epoch": 0.12993796931113288, "grad_norm": 9.262879371643066, "learning_rate": 4.1008502259298755e-09, "loss": 0.8989, "step": 199 }, { "epoch": 0.1305909239307868, "grad_norm": 14.184370994567871, "learning_rate": 0.0, "loss": 1.3632, "step": 200 }, { "epoch": 0.1305909239307868, "eval_loss": 1.0028241872787476, "eval_runtime": 122.1359, "eval_samples_per_second": 21.116, "eval_steps_per_second": 5.281, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0799333020532736e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }