{ "best_metric": 0.33944934606552124, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.025076273665733274, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016717515777155514, "grad_norm": 66.1699447631836, "learning_rate": 5e-06, "loss": 20.8217, "step": 1 }, { "epoch": 0.00016717515777155514, "eval_loss": 4.830644607543945, "eval_runtime": 1176.2555, "eval_samples_per_second": 8.565, "eval_steps_per_second": 4.283, "step": 1 }, { "epoch": 0.00033435031554311027, "grad_norm": 55.28761672973633, "learning_rate": 1e-05, "loss": 22.0512, "step": 2 }, { "epoch": 0.0005015254733146654, "grad_norm": 34.258522033691406, "learning_rate": 1.5e-05, "loss": 16.6346, "step": 3 }, { "epoch": 0.0006687006310862205, "grad_norm": 35.852420806884766, "learning_rate": 2e-05, "loss": 17.7986, "step": 4 }, { "epoch": 0.0008358757888577757, "grad_norm": 44.334144592285156, "learning_rate": 2.5e-05, "loss": 18.5218, "step": 5 }, { "epoch": 0.0010030509466293308, "grad_norm": 39.2244987487793, "learning_rate": 3e-05, "loss": 15.099, "step": 6 }, { "epoch": 0.001170226104400886, "grad_norm": 31.828834533691406, "learning_rate": 3.5e-05, "loss": 14.2471, "step": 7 }, { "epoch": 0.001337401262172441, "grad_norm": 31.544771194458008, "learning_rate": 4e-05, "loss": 15.0025, "step": 8 }, { "epoch": 0.0015045764199439963, "grad_norm": 38.78912353515625, "learning_rate": 4.5e-05, "loss": 12.5948, "step": 9 }, { "epoch": 0.0016717515777155514, "grad_norm": 31.890718460083008, "learning_rate": 5e-05, "loss": 9.837, "step": 10 }, { "epoch": 0.0018389267354871066, "grad_norm": 1420.156005859375, "learning_rate": 5.500000000000001e-05, "loss": 9.1239, "step": 11 }, { "epoch": 0.0020061018932586616, "grad_norm": 25.306236267089844, "learning_rate": 6e-05, "loss": 7.4396, "step": 12 }, { "epoch": 0.002173277051030217, "grad_norm": 26.349756240844727, "learning_rate": 6.500000000000001e-05, "loss": 8.3412, "step": 13 }, { "epoch": 0.002340452208801772, "grad_norm": 21.180355072021484, "learning_rate": 7e-05, "loss": 6.5257, "step": 14 }, { "epoch": 0.0025076273665733274, "grad_norm": 23.34146499633789, "learning_rate": 7.500000000000001e-05, "loss": 5.9087, "step": 15 }, { "epoch": 0.002674802524344882, "grad_norm": 22.092315673828125, "learning_rate": 8e-05, "loss": 5.759, "step": 16 }, { "epoch": 0.0028419776821164374, "grad_norm": 20.397918701171875, "learning_rate": 8.5e-05, "loss": 4.5725, "step": 17 }, { "epoch": 0.0030091528398879927, "grad_norm": 18.830806732177734, "learning_rate": 9e-05, "loss": 4.2111, "step": 18 }, { "epoch": 0.003176327997659548, "grad_norm": 15.320545196533203, "learning_rate": 9.5e-05, "loss": 3.3252, "step": 19 }, { "epoch": 0.0033435031554311027, "grad_norm": 21.95885467529297, "learning_rate": 0.0001, "loss": 4.4532, "step": 20 }, { "epoch": 0.003510678313202658, "grad_norm": 15.07732105255127, "learning_rate": 9.999238475781957e-05, "loss": 2.7677, "step": 21 }, { "epoch": 0.003677853470974213, "grad_norm": 14.574703216552734, "learning_rate": 9.99695413509548e-05, "loss": 3.1061, "step": 22 }, { "epoch": 0.0038450286287457685, "grad_norm": 16.069921493530273, "learning_rate": 9.99314767377287e-05, "loss": 2.7588, "step": 23 }, { "epoch": 0.004012203786517323, "grad_norm": 13.70803451538086, "learning_rate": 9.987820251299122e-05, "loss": 2.7997, "step": 24 }, { "epoch": 0.004179378944288879, "grad_norm": 10.898150444030762, "learning_rate": 9.980973490458728e-05, "loss": 2.7861, "step": 25 }, { "epoch": 0.004346554102060434, "grad_norm": 11.45754623413086, "learning_rate": 9.972609476841367e-05, "loss": 1.7007, "step": 26 }, { "epoch": 0.004513729259831989, "grad_norm": 11.79631519317627, "learning_rate": 9.962730758206611e-05, "loss": 2.3383, "step": 27 }, { "epoch": 0.004680904417603544, "grad_norm": 11.452874183654785, "learning_rate": 9.951340343707852e-05, "loss": 2.8686, "step": 28 }, { "epoch": 0.004848079575375099, "grad_norm": 11.521985054016113, "learning_rate": 9.938441702975689e-05, "loss": 2.8974, "step": 29 }, { "epoch": 0.005015254733146655, "grad_norm": 8.12588882446289, "learning_rate": 9.924038765061042e-05, "loss": 1.7916, "step": 30 }, { "epoch": 0.0051824298909182095, "grad_norm": 8.71818733215332, "learning_rate": 9.908135917238321e-05, "loss": 1.7343, "step": 31 }, { "epoch": 0.005349605048689764, "grad_norm": 7.344757080078125, "learning_rate": 9.890738003669029e-05, "loss": 2.0697, "step": 32 }, { "epoch": 0.00551678020646132, "grad_norm": 7.359161376953125, "learning_rate": 9.871850323926177e-05, "loss": 1.9041, "step": 33 }, { "epoch": 0.005683955364232875, "grad_norm": 6.432816982269287, "learning_rate": 9.851478631379982e-05, "loss": 1.5344, "step": 34 }, { "epoch": 0.0058511305220044305, "grad_norm": 7.933882713317871, "learning_rate": 9.829629131445342e-05, "loss": 1.6259, "step": 35 }, { "epoch": 0.006018305679775985, "grad_norm": 11.05882740020752, "learning_rate": 9.806308479691595e-05, "loss": 2.3703, "step": 36 }, { "epoch": 0.00618548083754754, "grad_norm": 6.093107223510742, "learning_rate": 9.781523779815179e-05, "loss": 1.5933, "step": 37 }, { "epoch": 0.006352655995319096, "grad_norm": 7.78998327255249, "learning_rate": 9.755282581475769e-05, "loss": 2.0594, "step": 38 }, { "epoch": 0.006519831153090651, "grad_norm": 12.237889289855957, "learning_rate": 9.727592877996585e-05, "loss": 2.5323, "step": 39 }, { "epoch": 0.0066870063108622054, "grad_norm": 6.168281078338623, "learning_rate": 9.698463103929542e-05, "loss": 2.0113, "step": 40 }, { "epoch": 0.006854181468633761, "grad_norm": 6.7144293785095215, "learning_rate": 9.667902132486009e-05, "loss": 1.621, "step": 41 }, { "epoch": 0.007021356626405316, "grad_norm": 6.263976573944092, "learning_rate": 9.635919272833938e-05, "loss": 1.9262, "step": 42 }, { "epoch": 0.007188531784176872, "grad_norm": 5.80394172668457, "learning_rate": 9.602524267262203e-05, "loss": 2.3022, "step": 43 }, { "epoch": 0.007355706941948426, "grad_norm": 5.633117198944092, "learning_rate": 9.567727288213005e-05, "loss": 1.4775, "step": 44 }, { "epoch": 0.007522882099719981, "grad_norm": 9.90259075164795, "learning_rate": 9.53153893518325e-05, "loss": 2.639, "step": 45 }, { "epoch": 0.007690057257491537, "grad_norm": 6.790462970733643, "learning_rate": 9.493970231495835e-05, "loss": 2.1576, "step": 46 }, { "epoch": 0.007857232415263092, "grad_norm": 7.405107498168945, "learning_rate": 9.45503262094184e-05, "loss": 2.4812, "step": 47 }, { "epoch": 0.008024407573034647, "grad_norm": 5.291300296783447, "learning_rate": 9.414737964294636e-05, "loss": 1.6901, "step": 48 }, { "epoch": 0.008191582730806201, "grad_norm": 7.042820453643799, "learning_rate": 9.373098535696979e-05, "loss": 2.4124, "step": 49 }, { "epoch": 0.008358757888577758, "grad_norm": 6.7682905197143555, "learning_rate": 9.330127018922194e-05, "loss": 2.4717, "step": 50 }, { "epoch": 0.008358757888577758, "eval_loss": 0.6559868454933167, "eval_runtime": 1183.0601, "eval_samples_per_second": 8.516, "eval_steps_per_second": 4.258, "step": 50 }, { "epoch": 0.008525933046349313, "grad_norm": 89.21569061279297, "learning_rate": 9.285836503510562e-05, "loss": 7.3617, "step": 51 }, { "epoch": 0.008693108204120868, "grad_norm": 18.17483901977539, "learning_rate": 9.24024048078213e-05, "loss": 2.9755, "step": 52 }, { "epoch": 0.008860283361892422, "grad_norm": 19.885055541992188, "learning_rate": 9.193352839727121e-05, "loss": 3.7002, "step": 53 }, { "epoch": 0.009027458519663977, "grad_norm": 9.078715324401855, "learning_rate": 9.145187862775209e-05, "loss": 3.1884, "step": 54 }, { "epoch": 0.009194633677435534, "grad_norm": 11.488103866577148, "learning_rate": 9.09576022144496e-05, "loss": 2.6635, "step": 55 }, { "epoch": 0.009361808835207088, "grad_norm": 15.700331687927246, "learning_rate": 9.045084971874738e-05, "loss": 2.4526, "step": 56 }, { "epoch": 0.009528983992978643, "grad_norm": 6.886660575866699, "learning_rate": 8.993177550236464e-05, "loss": 2.0983, "step": 57 }, { "epoch": 0.009696159150750198, "grad_norm": 7.425085067749023, "learning_rate": 8.940053768033609e-05, "loss": 2.3903, "step": 58 }, { "epoch": 0.009863334308521753, "grad_norm": 7.20428991317749, "learning_rate": 8.885729807284856e-05, "loss": 1.9328, "step": 59 }, { "epoch": 0.01003050946629331, "grad_norm": 7.445702075958252, "learning_rate": 8.83022221559489e-05, "loss": 2.2298, "step": 60 }, { "epoch": 0.010197684624064864, "grad_norm": 6.846135139465332, "learning_rate": 8.773547901113862e-05, "loss": 2.3124, "step": 61 }, { "epoch": 0.010364859781836419, "grad_norm": 10.974035263061523, "learning_rate": 8.715724127386972e-05, "loss": 2.0219, "step": 62 }, { "epoch": 0.010532034939607974, "grad_norm": 4.6794843673706055, "learning_rate": 8.656768508095853e-05, "loss": 1.4291, "step": 63 }, { "epoch": 0.010699210097379529, "grad_norm": 4.862498760223389, "learning_rate": 8.596699001693255e-05, "loss": 1.5775, "step": 64 }, { "epoch": 0.010866385255151085, "grad_norm": 6.960198402404785, "learning_rate": 8.535533905932738e-05, "loss": 1.8942, "step": 65 }, { "epoch": 0.01103356041292264, "grad_norm": 5.895086765289307, "learning_rate": 8.473291852294987e-05, "loss": 1.4487, "step": 66 }, { "epoch": 0.011200735570694195, "grad_norm": 5.19366979598999, "learning_rate": 8.409991800312493e-05, "loss": 1.3874, "step": 67 }, { "epoch": 0.01136791072846575, "grad_norm": 5.545513153076172, "learning_rate": 8.345653031794292e-05, "loss": 1.8828, "step": 68 }, { "epoch": 0.011535085886237305, "grad_norm": 4.446866035461426, "learning_rate": 8.280295144952536e-05, "loss": 1.2592, "step": 69 }, { "epoch": 0.011702261044008861, "grad_norm": 4.8530659675598145, "learning_rate": 8.213938048432697e-05, "loss": 1.5734, "step": 70 }, { "epoch": 0.011869436201780416, "grad_norm": 6.671334266662598, "learning_rate": 8.146601955249188e-05, "loss": 1.2566, "step": 71 }, { "epoch": 0.01203661135955197, "grad_norm": 5.107539176940918, "learning_rate": 8.07830737662829e-05, "loss": 1.1086, "step": 72 }, { "epoch": 0.012203786517323525, "grad_norm": 5.993882656097412, "learning_rate": 8.009075115760243e-05, "loss": 1.3573, "step": 73 }, { "epoch": 0.01237096167509508, "grad_norm": 4.740096569061279, "learning_rate": 7.938926261462366e-05, "loss": 1.3498, "step": 74 }, { "epoch": 0.012538136832866637, "grad_norm": 6.381992340087891, "learning_rate": 7.86788218175523e-05, "loss": 1.2942, "step": 75 }, { "epoch": 0.012705311990638192, "grad_norm": 4.995385646820068, "learning_rate": 7.795964517353735e-05, "loss": 1.205, "step": 76 }, { "epoch": 0.012872487148409746, "grad_norm": 4.214781284332275, "learning_rate": 7.723195175075136e-05, "loss": 1.1073, "step": 77 }, { "epoch": 0.013039662306181301, "grad_norm": 4.874910831451416, "learning_rate": 7.649596321166024e-05, "loss": 1.3798, "step": 78 }, { "epoch": 0.013206837463952856, "grad_norm": 5.055243492126465, "learning_rate": 7.575190374550272e-05, "loss": 1.2199, "step": 79 }, { "epoch": 0.013374012621724411, "grad_norm": 4.44370698928833, "learning_rate": 7.500000000000001e-05, "loss": 1.1394, "step": 80 }, { "epoch": 0.013541187779495967, "grad_norm": 5.529434680938721, "learning_rate": 7.424048101231686e-05, "loss": 1.2294, "step": 81 }, { "epoch": 0.013708362937267522, "grad_norm": 6.484023094177246, "learning_rate": 7.347357813929454e-05, "loss": 1.5444, "step": 82 }, { "epoch": 0.013875538095039077, "grad_norm": 6.249424934387207, "learning_rate": 7.269952498697734e-05, "loss": 1.5945, "step": 83 }, { "epoch": 0.014042713252810632, "grad_norm": 3.867103099822998, "learning_rate": 7.191855733945387e-05, "loss": 1.0598, "step": 84 }, { "epoch": 0.014209888410582187, "grad_norm": 7.214300632476807, "learning_rate": 7.113091308703498e-05, "loss": 1.549, "step": 85 }, { "epoch": 0.014377063568353743, "grad_norm": 7.799088478088379, "learning_rate": 7.033683215379002e-05, "loss": 1.3431, "step": 86 }, { "epoch": 0.014544238726125298, "grad_norm": 4.070684432983398, "learning_rate": 6.953655642446368e-05, "loss": 0.9098, "step": 87 }, { "epoch": 0.014711413883896853, "grad_norm": 7.896481990814209, "learning_rate": 6.873032967079561e-05, "loss": 2.171, "step": 88 }, { "epoch": 0.014878589041668408, "grad_norm": 6.196599960327148, "learning_rate": 6.7918397477265e-05, "loss": 2.0469, "step": 89 }, { "epoch": 0.015045764199439962, "grad_norm": 4.253558158874512, "learning_rate": 6.710100716628344e-05, "loss": 1.7969, "step": 90 }, { "epoch": 0.015212939357211519, "grad_norm": 4.0346784591674805, "learning_rate": 6.627840772285784e-05, "loss": 1.5023, "step": 91 }, { "epoch": 0.015380114514983074, "grad_norm": 4.52296257019043, "learning_rate": 6.545084971874738e-05, "loss": 1.6998, "step": 92 }, { "epoch": 0.015547289672754629, "grad_norm": 4.166619777679443, "learning_rate": 6.461858523613684e-05, "loss": 1.2437, "step": 93 }, { "epoch": 0.015714464830526183, "grad_norm": 3.6152470111846924, "learning_rate": 6.378186779084995e-05, "loss": 0.9291, "step": 94 }, { "epoch": 0.015881639988297738, "grad_norm": 5.102316379547119, "learning_rate": 6.294095225512603e-05, "loss": 1.4993, "step": 95 }, { "epoch": 0.016048815146069293, "grad_norm": 3.7798969745635986, "learning_rate": 6.209609477998338e-05, "loss": 1.4346, "step": 96 }, { "epoch": 0.016215990303840848, "grad_norm": 6.013340473175049, "learning_rate": 6.124755271719325e-05, "loss": 2.4886, "step": 97 }, { "epoch": 0.016383165461612403, "grad_norm": 4.5874924659729, "learning_rate": 6.0395584540887963e-05, "loss": 1.4179, "step": 98 }, { "epoch": 0.01655034061938396, "grad_norm": 3.6912167072296143, "learning_rate": 5.9540449768827246e-05, "loss": 1.276, "step": 99 }, { "epoch": 0.016717515777155516, "grad_norm": 5.08207368850708, "learning_rate": 5.868240888334653e-05, "loss": 2.1187, "step": 100 }, { "epoch": 0.016717515777155516, "eval_loss": 0.3840712308883667, "eval_runtime": 1180.8021, "eval_samples_per_second": 8.532, "eval_steps_per_second": 4.267, "step": 100 }, { "epoch": 0.01688469093492707, "grad_norm": 8.497836112976074, "learning_rate": 5.782172325201155e-05, "loss": 1.1939, "step": 101 }, { "epoch": 0.017051866092698625, "grad_norm": 7.477349758148193, "learning_rate": 5.695865504800327e-05, "loss": 2.0787, "step": 102 }, { "epoch": 0.01721904125047018, "grad_norm": 7.172429084777832, "learning_rate": 5.6093467170257374e-05, "loss": 1.6864, "step": 103 }, { "epoch": 0.017386216408241735, "grad_norm": 8.384211540222168, "learning_rate": 5.522642316338268e-05, "loss": 1.8249, "step": 104 }, { "epoch": 0.01755339156601329, "grad_norm": 5.726856231689453, "learning_rate": 5.435778713738292e-05, "loss": 1.5703, "step": 105 }, { "epoch": 0.017720566723784845, "grad_norm": 6.198292255401611, "learning_rate": 5.348782368720626e-05, "loss": 2.1273, "step": 106 }, { "epoch": 0.0178877418815564, "grad_norm": 4.357252597808838, "learning_rate": 5.26167978121472e-05, "loss": 1.1151, "step": 107 }, { "epoch": 0.018054917039327954, "grad_norm": 4.520287036895752, "learning_rate": 5.174497483512506e-05, "loss": 1.7011, "step": 108 }, { "epoch": 0.018222092197099513, "grad_norm": 3.671816349029541, "learning_rate": 5.0872620321864185e-05, "loss": 1.0517, "step": 109 }, { "epoch": 0.018389267354871067, "grad_norm": 5.582248687744141, "learning_rate": 5e-05, "loss": 1.6454, "step": 110 }, { "epoch": 0.018556442512642622, "grad_norm": 4.559932708740234, "learning_rate": 4.912737967813583e-05, "loss": 1.6997, "step": 111 }, { "epoch": 0.018723617670414177, "grad_norm": 4.87907600402832, "learning_rate": 4.825502516487497e-05, "loss": 1.6121, "step": 112 }, { "epoch": 0.018890792828185732, "grad_norm": 5.985311508178711, "learning_rate": 4.738320218785281e-05, "loss": 1.894, "step": 113 }, { "epoch": 0.019057967985957287, "grad_norm": 5.563694477081299, "learning_rate": 4.6512176312793736e-05, "loss": 1.4096, "step": 114 }, { "epoch": 0.01922514314372884, "grad_norm": 4.012616157531738, "learning_rate": 4.564221286261709e-05, "loss": 1.1671, "step": 115 }, { "epoch": 0.019392318301500396, "grad_norm": 5.445400238037109, "learning_rate": 4.477357683661734e-05, "loss": 1.1218, "step": 116 }, { "epoch": 0.01955949345927195, "grad_norm": 4.193072319030762, "learning_rate": 4.390653282974264e-05, "loss": 1.1577, "step": 117 }, { "epoch": 0.019726668617043506, "grad_norm": 4.184262752532959, "learning_rate": 4.3041344951996746e-05, "loss": 1.3176, "step": 118 }, { "epoch": 0.019893843774815064, "grad_norm": 3.5578956604003906, "learning_rate": 4.2178276747988446e-05, "loss": 1.0705, "step": 119 }, { "epoch": 0.02006101893258662, "grad_norm": 4.154637336730957, "learning_rate": 4.131759111665349e-05, "loss": 1.6145, "step": 120 }, { "epoch": 0.020228194090358174, "grad_norm": 8.214539527893066, "learning_rate": 4.045955023117276e-05, "loss": 1.3255, "step": 121 }, { "epoch": 0.02039536924812973, "grad_norm": 3.9702048301696777, "learning_rate": 3.960441545911204e-05, "loss": 1.3089, "step": 122 }, { "epoch": 0.020562544405901283, "grad_norm": 4.168085098266602, "learning_rate": 3.875244728280676e-05, "loss": 1.2921, "step": 123 }, { "epoch": 0.020729719563672838, "grad_norm": 4.158751487731934, "learning_rate": 3.790390522001662e-05, "loss": 1.1961, "step": 124 }, { "epoch": 0.020896894721444393, "grad_norm": 3.0978500843048096, "learning_rate": 3.705904774487396e-05, "loss": 1.027, "step": 125 }, { "epoch": 0.021064069879215948, "grad_norm": 4.032132625579834, "learning_rate": 3.6218132209150045e-05, "loss": 1.1975, "step": 126 }, { "epoch": 0.021231245036987503, "grad_norm": 3.2967450618743896, "learning_rate": 3.5381414763863166e-05, "loss": 1.1724, "step": 127 }, { "epoch": 0.021398420194759057, "grad_norm": 4.184244632720947, "learning_rate": 3.4549150281252636e-05, "loss": 1.0997, "step": 128 }, { "epoch": 0.021565595352530612, "grad_norm": 3.61553955078125, "learning_rate": 3.372159227714218e-05, "loss": 1.2168, "step": 129 }, { "epoch": 0.02173277051030217, "grad_norm": 2.7184441089630127, "learning_rate": 3.289899283371657e-05, "loss": 0.9683, "step": 130 }, { "epoch": 0.021899945668073725, "grad_norm": 2.7383294105529785, "learning_rate": 3.2081602522734986e-05, "loss": 0.7704, "step": 131 }, { "epoch": 0.02206712082584528, "grad_norm": 3.5409862995147705, "learning_rate": 3.12696703292044e-05, "loss": 0.9742, "step": 132 }, { "epoch": 0.022234295983616835, "grad_norm": 2.9273834228515625, "learning_rate": 3.046344357553632e-05, "loss": 1.0346, "step": 133 }, { "epoch": 0.02240147114138839, "grad_norm": 3.1921045780181885, "learning_rate": 2.9663167846209998e-05, "loss": 1.1151, "step": 134 }, { "epoch": 0.022568646299159945, "grad_norm": 3.4800987243652344, "learning_rate": 2.886908691296504e-05, "loss": 1.0467, "step": 135 }, { "epoch": 0.0227358214569315, "grad_norm": 3.3189215660095215, "learning_rate": 2.8081442660546125e-05, "loss": 1.1769, "step": 136 }, { "epoch": 0.022902996614703054, "grad_norm": 3.6555469036102295, "learning_rate": 2.7300475013022663e-05, "loss": 1.1048, "step": 137 }, { "epoch": 0.02307017177247461, "grad_norm": 3.6582908630371094, "learning_rate": 2.6526421860705473e-05, "loss": 1.1974, "step": 138 }, { "epoch": 0.023237346930246164, "grad_norm": 2.606125593185425, "learning_rate": 2.575951898768315e-05, "loss": 0.7513, "step": 139 }, { "epoch": 0.023404522088017722, "grad_norm": 3.5903327465057373, "learning_rate": 2.500000000000001e-05, "loss": 1.2009, "step": 140 }, { "epoch": 0.023571697245789277, "grad_norm": 4.257547855377197, "learning_rate": 2.4248096254497288e-05, "loss": 1.7713, "step": 141 }, { "epoch": 0.02373887240356083, "grad_norm": 3.8268728256225586, "learning_rate": 2.350403678833976e-05, "loss": 1.3315, "step": 142 }, { "epoch": 0.023906047561332387, "grad_norm": 3.9802815914154053, "learning_rate": 2.2768048249248648e-05, "loss": 1.531, "step": 143 }, { "epoch": 0.02407322271910394, "grad_norm": 3.5218703746795654, "learning_rate": 2.2040354826462668e-05, "loss": 0.9738, "step": 144 }, { "epoch": 0.024240397876875496, "grad_norm": 3.5325393676757812, "learning_rate": 2.132117818244771e-05, "loss": 1.1883, "step": 145 }, { "epoch": 0.02440757303464705, "grad_norm": 2.8327746391296387, "learning_rate": 2.061073738537635e-05, "loss": 1.1112, "step": 146 }, { "epoch": 0.024574748192418606, "grad_norm": 3.83736252784729, "learning_rate": 1.9909248842397584e-05, "loss": 1.6377, "step": 147 }, { "epoch": 0.02474192335019016, "grad_norm": 3.671001672744751, "learning_rate": 1.9216926233717085e-05, "loss": 1.3769, "step": 148 }, { "epoch": 0.024909098507961715, "grad_norm": 5.089761257171631, "learning_rate": 1.8533980447508137e-05, "loss": 2.1632, "step": 149 }, { "epoch": 0.025076273665733274, "grad_norm": 11.317509651184082, "learning_rate": 1.7860619515673033e-05, "loss": 1.8115, "step": 150 }, { "epoch": 0.025076273665733274, "eval_loss": 0.33944934606552124, "eval_runtime": 1179.9193, "eval_samples_per_second": 8.539, "eval_steps_per_second": 4.27, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.4929696483311616e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }