{ "best_metric": 0.04182600975036621, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 3.011086474501109, "eval_steps": 50, "global_step": 169, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017738359201773836, "grad_norm": 6.561187744140625, "learning_rate": 2.333333333333333e-06, "loss": 2.2261, "step": 1 }, { "epoch": 0.017738359201773836, "eval_loss": 3.175419807434082, "eval_runtime": 3.3164, "eval_samples_per_second": 28.645, "eval_steps_per_second": 7.237, "step": 1 }, { "epoch": 0.03547671840354767, "grad_norm": 7.893179416656494, "learning_rate": 4.666666666666666e-06, "loss": 2.7041, "step": 2 }, { "epoch": 0.05321507760532151, "grad_norm": 7.613998889923096, "learning_rate": 7e-06, "loss": 2.6962, "step": 3 }, { "epoch": 0.07095343680709534, "grad_norm": 7.935632228851318, "learning_rate": 9.333333333333333e-06, "loss": 2.818, "step": 4 }, { "epoch": 0.08869179600886919, "grad_norm": 8.119084358215332, "learning_rate": 1.1666666666666665e-05, "loss": 2.7815, "step": 5 }, { "epoch": 0.10643015521064302, "grad_norm": 8.433510780334473, "learning_rate": 1.4e-05, "loss": 2.7249, "step": 6 }, { "epoch": 0.12416851441241686, "grad_norm": 8.061437606811523, "learning_rate": 1.633333333333333e-05, "loss": 2.6852, "step": 7 }, { "epoch": 0.1419068736141907, "grad_norm": 8.439359664916992, "learning_rate": 1.8666666666666665e-05, "loss": 2.8134, "step": 8 }, { "epoch": 0.15964523281596452, "grad_norm": 7.367957592010498, "learning_rate": 2.1e-05, "loss": 2.6652, "step": 9 }, { "epoch": 0.17738359201773837, "grad_norm": 8.145227432250977, "learning_rate": 2.333333333333333e-05, "loss": 2.635, "step": 10 }, { "epoch": 0.1951219512195122, "grad_norm": 7.4146342277526855, "learning_rate": 2.5666666666666663e-05, "loss": 2.5253, "step": 11 }, { "epoch": 0.21286031042128603, "grad_norm": 6.739197731018066, "learning_rate": 2.8e-05, "loss": 2.3472, "step": 12 }, { "epoch": 0.23059866962305986, "grad_norm": 7.35270357131958, "learning_rate": 3.0333333333333333e-05, "loss": 2.2399, "step": 13 }, { "epoch": 0.24833702882483372, "grad_norm": 7.331146717071533, "learning_rate": 3.266666666666666e-05, "loss": 2.1825, "step": 14 }, { "epoch": 0.2660753880266075, "grad_norm": 3.474020481109619, "learning_rate": 3.5e-05, "loss": 1.1274, "step": 15 }, { "epoch": 0.2838137472283814, "grad_norm": 3.6958611011505127, "learning_rate": 3.733333333333333e-05, "loss": 1.1052, "step": 16 }, { "epoch": 0.30155210643015523, "grad_norm": 3.0846071243286133, "learning_rate": 3.9666666666666664e-05, "loss": 0.9945, "step": 17 }, { "epoch": 0.31929046563192903, "grad_norm": 3.12406063079834, "learning_rate": 4.2e-05, "loss": 0.852, "step": 18 }, { "epoch": 0.3370288248337029, "grad_norm": 3.294522523880005, "learning_rate": 4.4333333333333324e-05, "loss": 0.9031, "step": 19 }, { "epoch": 0.35476718403547675, "grad_norm": 3.176948308944702, "learning_rate": 4.666666666666666e-05, "loss": 0.7557, "step": 20 }, { "epoch": 0.37250554323725055, "grad_norm": 2.9709253311157227, "learning_rate": 4.899999999999999e-05, "loss": 0.5902, "step": 21 }, { "epoch": 0.3902439024390244, "grad_norm": 3.4704699516296387, "learning_rate": 5.1333333333333325e-05, "loss": 0.5916, "step": 22 }, { "epoch": 0.4079822616407982, "grad_norm": 2.7083094120025635, "learning_rate": 5.3666666666666666e-05, "loss": 0.417, "step": 23 }, { "epoch": 0.42572062084257206, "grad_norm": 2.854451894760132, "learning_rate": 5.6e-05, "loss": 0.4418, "step": 24 }, { "epoch": 0.4434589800443459, "grad_norm": 2.5099732875823975, "learning_rate": 5.833333333333333e-05, "loss": 0.3734, "step": 25 }, { "epoch": 0.4611973392461197, "grad_norm": 2.644742012023926, "learning_rate": 6.0666666666666666e-05, "loss": 0.4208, "step": 26 }, { "epoch": 0.4789356984478936, "grad_norm": 2.3744261264801025, "learning_rate": 6.3e-05, "loss": 0.3257, "step": 27 }, { "epoch": 0.49667405764966743, "grad_norm": 3.0435965061187744, "learning_rate": 6.533333333333333e-05, "loss": 0.3237, "step": 28 }, { "epoch": 0.5144124168514412, "grad_norm": 3.3591420650482178, "learning_rate": 6.766666666666667e-05, "loss": 0.3175, "step": 29 }, { "epoch": 0.532150776053215, "grad_norm": 1.8017473220825195, "learning_rate": 7e-05, "loss": 0.2331, "step": 30 }, { "epoch": 0.549889135254989, "grad_norm": 1.9260958433151245, "learning_rate": 6.99910609841734e-05, "loss": 0.1805, "step": 31 }, { "epoch": 0.5676274944567627, "grad_norm": 1.9353176355361938, "learning_rate": 6.996424850275102e-05, "loss": 0.2071, "step": 32 }, { "epoch": 0.5853658536585366, "grad_norm": 1.8177722692489624, "learning_rate": 6.991957625157259e-05, "loss": 0.145, "step": 33 }, { "epoch": 0.6031042128603105, "grad_norm": 1.668810486793518, "learning_rate": 6.985706704926442e-05, "loss": 0.149, "step": 34 }, { "epoch": 0.6208425720620843, "grad_norm": 1.7442617416381836, "learning_rate": 6.977675282558359e-05, "loss": 0.1518, "step": 35 }, { "epoch": 0.6385809312638581, "grad_norm": 1.7981549501419067, "learning_rate": 6.967867460510816e-05, "loss": 0.1507, "step": 36 }, { "epoch": 0.656319290465632, "grad_norm": 1.5039801597595215, "learning_rate": 6.956288248628188e-05, "loss": 0.1365, "step": 37 }, { "epoch": 0.6740576496674058, "grad_norm": 1.3298735618591309, "learning_rate": 6.942943561582376e-05, "loss": 0.1016, "step": 38 }, { "epoch": 0.6917960088691796, "grad_norm": 1.396034836769104, "learning_rate": 6.927840215851592e-05, "loss": 0.1319, "step": 39 }, { "epoch": 0.7095343680709535, "grad_norm": 1.561239242553711, "learning_rate": 6.910985926238491e-05, "loss": 0.1257, "step": 40 }, { "epoch": 0.7272727272727273, "grad_norm": 1.8848440647125244, "learning_rate": 6.892389301929454e-05, "loss": 0.1248, "step": 41 }, { "epoch": 0.7450110864745011, "grad_norm": 1.743732213973999, "learning_rate": 6.872059842096996e-05, "loss": 0.1659, "step": 42 }, { "epoch": 0.7627494456762749, "grad_norm": 1.154229760169983, "learning_rate": 6.850007931047583e-05, "loss": 0.103, "step": 43 }, { "epoch": 0.7804878048780488, "grad_norm": 1.1713950634002686, "learning_rate": 6.826244832917323e-05, "loss": 0.1001, "step": 44 }, { "epoch": 0.7982261640798226, "grad_norm": 1.082777738571167, "learning_rate": 6.800782685918231e-05, "loss": 0.0971, "step": 45 }, { "epoch": 0.8159645232815964, "grad_norm": 1.0735238790512085, "learning_rate": 6.773634496138024e-05, "loss": 0.0968, "step": 46 }, { "epoch": 0.8337028824833703, "grad_norm": 1.6097187995910645, "learning_rate": 6.744814130896591e-05, "loss": 0.1091, "step": 47 }, { "epoch": 0.8514412416851441, "grad_norm": 1.0870099067687988, "learning_rate": 6.714336311662564e-05, "loss": 0.0804, "step": 48 }, { "epoch": 0.8691796008869179, "grad_norm": 1.2098581790924072, "learning_rate": 6.68221660653357e-05, "loss": 0.1019, "step": 49 }, { "epoch": 0.8869179600886918, "grad_norm": 1.727752685546875, "learning_rate": 6.648471422284036e-05, "loss": 0.1312, "step": 50 }, { "epoch": 0.8869179600886918, "eval_loss": 0.10260897129774094, "eval_runtime": 3.3796, "eval_samples_per_second": 28.11, "eval_steps_per_second": 7.102, "step": 50 }, { "epoch": 0.9046563192904656, "grad_norm": 1.1542454957962036, "learning_rate": 6.613117995984598e-05, "loss": 0.0781, "step": 51 }, { "epoch": 0.9223946784922394, "grad_norm": 1.1427186727523804, "learning_rate": 6.57617438619738e-05, "loss": 0.0815, "step": 52 }, { "epoch": 0.9401330376940134, "grad_norm": 1.092106819152832, "learning_rate": 6.537659463751674e-05, "loss": 0.0713, "step": 53 }, { "epoch": 0.9578713968957872, "grad_norm": 1.0994936227798462, "learning_rate": 6.497592902104696e-05, "loss": 0.0679, "step": 54 }, { "epoch": 0.975609756097561, "grad_norm": 1.2781437635421753, "learning_rate": 6.455995167292371e-05, "loss": 0.1022, "step": 55 }, { "epoch": 0.9933481152993349, "grad_norm": 1.2600449323654175, "learning_rate": 6.41288750747526e-05, "loss": 0.0481, "step": 56 }, { "epoch": 1.0155210643015522, "grad_norm": 2.7138006687164307, "learning_rate": 6.368291942084986e-05, "loss": 0.1317, "step": 57 }, { "epoch": 1.033259423503326, "grad_norm": 1.1430145502090454, "learning_rate": 6.32223125057668e-05, "loss": 0.0807, "step": 58 }, { "epoch": 1.0509977827050998, "grad_norm": 0.5296788811683655, "learning_rate": 6.274728960793219e-05, "loss": 0.033, "step": 59 }, { "epoch": 1.0687361419068737, "grad_norm": 0.7590862512588501, "learning_rate": 6.225809336947186e-05, "loss": 0.0511, "step": 60 }, { "epoch": 1.0864745011086474, "grad_norm": 1.065050482749939, "learning_rate": 6.175497367226683e-05, "loss": 0.0569, "step": 61 }, { "epoch": 1.1042128603104213, "grad_norm": 0.9178889393806458, "learning_rate": 6.123818751031344e-05, "loss": 0.0565, "step": 62 }, { "epoch": 1.1219512195121952, "grad_norm": 0.8015963435173035, "learning_rate": 6.0707998858450596e-05, "loss": 0.0484, "step": 63 }, { "epoch": 1.139689578713969, "grad_norm": 1.0655574798583984, "learning_rate": 6.016467853752114e-05, "loss": 0.0658, "step": 64 }, { "epoch": 1.1574279379157428, "grad_norm": 0.7798250317573547, "learning_rate": 5.960850407603639e-05, "loss": 0.0446, "step": 65 }, { "epoch": 1.1751662971175167, "grad_norm": 0.8381087183952332, "learning_rate": 5.903975956841425e-05, "loss": 0.0449, "step": 66 }, { "epoch": 1.1929046563192904, "grad_norm": 0.9560292363166809, "learning_rate": 5.845873552986357e-05, "loss": 0.0374, "step": 67 }, { "epoch": 1.2106430155210643, "grad_norm": 0.6753312945365906, "learning_rate": 5.7865728747988714e-05, "loss": 0.0306, "step": 68 }, { "epoch": 1.2283813747228383, "grad_norm": 1.1996972560882568, "learning_rate": 5.7261042131190165e-05, "loss": 0.0735, "step": 69 }, { "epoch": 1.246119733924612, "grad_norm": 0.5936779975891113, "learning_rate": 5.664498455393865e-05, "loss": 0.0288, "step": 70 }, { "epoch": 1.2638580931263859, "grad_norm": 0.7936895489692688, "learning_rate": 5.6017870699001765e-05, "loss": 0.0486, "step": 71 }, { "epoch": 1.2815964523281598, "grad_norm": 1.242922067642212, "learning_rate": 5.538002089670377e-05, "loss": 0.0705, "step": 72 }, { "epoch": 1.2993348115299335, "grad_norm": 0.9512969851493835, "learning_rate": 5.473176096130052e-05, "loss": 0.0558, "step": 73 }, { "epoch": 1.3170731707317074, "grad_norm": 1.130399465560913, "learning_rate": 5.407342202455331e-05, "loss": 0.064, "step": 74 }, { "epoch": 1.3348115299334813, "grad_norm": 0.9151156544685364, "learning_rate": 5.3405340366586404e-05, "loss": 0.0618, "step": 75 }, { "epoch": 1.352549889135255, "grad_norm": 0.7164838910102844, "learning_rate": 5.27278572441149e-05, "loss": 0.0393, "step": 76 }, { "epoch": 1.370288248337029, "grad_norm": 0.8539807200431824, "learning_rate": 5.204131871613044e-05, "loss": 0.0272, "step": 77 }, { "epoch": 1.3880266075388026, "grad_norm": 0.7076650261878967, "learning_rate": 5.1346075467134026e-05, "loss": 0.0348, "step": 78 }, { "epoch": 1.4057649667405765, "grad_norm": 0.7428536415100098, "learning_rate": 5.064248262800598e-05, "loss": 0.0276, "step": 79 }, { "epoch": 1.4235033259423504, "grad_norm": 0.7132293581962585, "learning_rate": 4.993089959460487e-05, "loss": 0.0366, "step": 80 }, { "epoch": 1.441241685144124, "grad_norm": 0.8481555581092834, "learning_rate": 4.921168984418769e-05, "loss": 0.0383, "step": 81 }, { "epoch": 1.458980044345898, "grad_norm": 0.5887975692749023, "learning_rate": 4.8485220749745375e-05, "loss": 0.0187, "step": 82 }, { "epoch": 1.476718403547672, "grad_norm": 0.9297077059745789, "learning_rate": 4.775186339234836e-05, "loss": 0.0354, "step": 83 }, { "epoch": 1.4944567627494456, "grad_norm": 0.6232367753982544, "learning_rate": 4.7011992371598065e-05, "loss": 0.0209, "step": 84 }, { "epoch": 1.5121951219512195, "grad_norm": 1.0839964151382446, "learning_rate": 4.626598561428101e-05, "loss": 0.0557, "step": 85 }, { "epoch": 1.5299334811529932, "grad_norm": 1.3176695108413696, "learning_rate": 4.551422418132348e-05, "loss": 0.0794, "step": 86 }, { "epoch": 1.5476718403547673, "grad_norm": 0.6819478273391724, "learning_rate": 4.4757092073145303e-05, "loss": 0.0305, "step": 87 }, { "epoch": 1.565410199556541, "grad_norm": 0.6436930298805237, "learning_rate": 4.399497603351209e-05, "loss": 0.0271, "step": 88 }, { "epoch": 1.5831485587583147, "grad_norm": 0.8760166168212891, "learning_rate": 4.322826535198612e-05, "loss": 0.0426, "step": 89 }, { "epoch": 1.6008869179600886, "grad_norm": 0.8322381377220154, "learning_rate": 4.245735166507691e-05, "loss": 0.0414, "step": 90 }, { "epoch": 1.6186252771618626, "grad_norm": 1.2027863264083862, "learning_rate": 4.1682628756192915e-05, "loss": 0.0447, "step": 91 }, { "epoch": 1.6363636363636362, "grad_norm": 0.8682803511619568, "learning_rate": 4.090449235449665e-05, "loss": 0.0396, "step": 92 }, { "epoch": 1.6541019955654102, "grad_norm": 0.8628066182136536, "learning_rate": 4.012333993276583e-05, "loss": 0.0409, "step": 93 }, { "epoch": 1.671840354767184, "grad_norm": 0.6862711906433105, "learning_rate": 3.933957050436392e-05, "loss": 0.022, "step": 94 }, { "epoch": 1.6895787139689578, "grad_norm": 1.174164891242981, "learning_rate": 3.855358441942378e-05, "loss": 0.0481, "step": 95 }, { "epoch": 1.7073170731707317, "grad_norm": 0.7916390895843506, "learning_rate": 3.7765783160348416e-05, "loss": 0.0384, "step": 96 }, { "epoch": 1.7250554323725056, "grad_norm": 0.7421106696128845, "learning_rate": 3.697656913673344e-05, "loss": 0.028, "step": 97 }, { "epoch": 1.7427937915742793, "grad_norm": 1.0858561992645264, "learning_rate": 3.618634547981586e-05, "loss": 0.0238, "step": 98 }, { "epoch": 1.7605321507760532, "grad_norm": 0.7256781458854675, "learning_rate": 3.5395515836554294e-05, "loss": 0.0389, "step": 99 }, { "epoch": 1.778270509977827, "grad_norm": 0.559615969657898, "learning_rate": 3.4604484163445714e-05, "loss": 0.0271, "step": 100 }, { "epoch": 1.778270509977827, "eval_loss": 0.046199291944503784, "eval_runtime": 3.3918, "eval_samples_per_second": 28.009, "eval_steps_per_second": 7.076, "step": 100 }, { "epoch": 1.7960088691796008, "grad_norm": 0.8190221786499023, "learning_rate": 3.381365452018413e-05, "loss": 0.0369, "step": 101 }, { "epoch": 1.8137472283813747, "grad_norm": 0.9738173484802246, "learning_rate": 3.302343086326655e-05, "loss": 0.0358, "step": 102 }, { "epoch": 1.8314855875831486, "grad_norm": 0.39320364594459534, "learning_rate": 3.223421683965158e-05, "loss": 0.0207, "step": 103 }, { "epoch": 1.8492239467849223, "grad_norm": 0.848658561706543, "learning_rate": 3.1446415580576215e-05, "loss": 0.0383, "step": 104 }, { "epoch": 1.8669623059866962, "grad_norm": 0.9253813624382019, "learning_rate": 3.066042949563608e-05, "loss": 0.0296, "step": 105 }, { "epoch": 1.8847006651884701, "grad_norm": 0.615013837814331, "learning_rate": 2.9876660067234164e-05, "loss": 0.0267, "step": 106 }, { "epoch": 1.9024390243902438, "grad_norm": 0.9796949625015259, "learning_rate": 2.9095507645503347e-05, "loss": 0.0402, "step": 107 }, { "epoch": 1.9201773835920177, "grad_norm": 0.7294319868087769, "learning_rate": 2.8317371243807085e-05, "loss": 0.0325, "step": 108 }, { "epoch": 1.9379157427937916, "grad_norm": 0.7841516137123108, "learning_rate": 2.754264833492309e-05, "loss": 0.0313, "step": 109 }, { "epoch": 1.9556541019955653, "grad_norm": 1.2115143537521362, "learning_rate": 2.677173464801388e-05, "loss": 0.0477, "step": 110 }, { "epoch": 1.9733924611973392, "grad_norm": 0.7074465155601501, "learning_rate": 2.60050239664879e-05, "loss": 0.032, "step": 111 }, { "epoch": 1.9911308203991132, "grad_norm": 0.8888975381851196, "learning_rate": 2.524290792685469e-05, "loss": 0.0332, "step": 112 }, { "epoch": 2.0133037694013303, "grad_norm": 2.435009479522705, "learning_rate": 2.4485775818676527e-05, "loss": 0.0736, "step": 113 }, { "epoch": 2.0310421286031044, "grad_norm": 0.46157458424568176, "learning_rate": 2.373401438571899e-05, "loss": 0.0257, "step": 114 }, { "epoch": 2.048780487804878, "grad_norm": 0.36576923727989197, "learning_rate": 2.298800762840193e-05, "loss": 0.014, "step": 115 }, { "epoch": 2.066518847006652, "grad_norm": 0.6250067949295044, "learning_rate": 2.2248136607651628e-05, "loss": 0.0211, "step": 116 }, { "epoch": 2.084257206208426, "grad_norm": 0.415303111076355, "learning_rate": 2.151477925025463e-05, "loss": 0.0163, "step": 117 }, { "epoch": 2.1019955654101996, "grad_norm": 0.2299083173274994, "learning_rate": 2.0788310155812323e-05, "loss": 0.0097, "step": 118 }, { "epoch": 2.1197339246119733, "grad_norm": 0.3910021185874939, "learning_rate": 2.0069100405395126e-05, "loss": 0.0115, "step": 119 }, { "epoch": 2.1374722838137474, "grad_norm": 0.3724380135536194, "learning_rate": 1.9357517371994013e-05, "loss": 0.0116, "step": 120 }, { "epoch": 2.155210643015521, "grad_norm": 0.45361191034317017, "learning_rate": 1.8653924532865978e-05, "loss": 0.0159, "step": 121 }, { "epoch": 2.172949002217295, "grad_norm": 0.2910288870334625, "learning_rate": 1.7958681283869553e-05, "loss": 0.0082, "step": 122 }, { "epoch": 2.1906873614190685, "grad_norm": 0.3233855366706848, "learning_rate": 1.72721427558851e-05, "loss": 0.0125, "step": 123 }, { "epoch": 2.2084257206208426, "grad_norm": 0.3714958727359772, "learning_rate": 1.6594659633413593e-05, "loss": 0.0111, "step": 124 }, { "epoch": 2.2261640798226163, "grad_norm": 0.5783699154853821, "learning_rate": 1.5926577975446687e-05, "loss": 0.0199, "step": 125 }, { "epoch": 2.2439024390243905, "grad_norm": 0.32942745089530945, "learning_rate": 1.5268239038699476e-05, "loss": 0.0077, "step": 126 }, { "epoch": 2.261640798226164, "grad_norm": 0.3331102430820465, "learning_rate": 1.4619979103296232e-05, "loss": 0.0162, "step": 127 }, { "epoch": 2.279379157427938, "grad_norm": 0.49725037813186646, "learning_rate": 1.3982129300998235e-05, "loss": 0.0279, "step": 128 }, { "epoch": 2.2971175166297115, "grad_norm": 0.37144234776496887, "learning_rate": 1.335501544606135e-05, "loss": 0.0091, "step": 129 }, { "epoch": 2.3148558758314857, "grad_norm": 0.4682086706161499, "learning_rate": 1.2738957868809829e-05, "loss": 0.0131, "step": 130 }, { "epoch": 2.3325942350332594, "grad_norm": 0.36083656549453735, "learning_rate": 1.2134271252011281e-05, "loss": 0.0129, "step": 131 }, { "epoch": 2.3503325942350335, "grad_norm": 0.4948185980319977, "learning_rate": 1.1541264470136426e-05, "loss": 0.0183, "step": 132 }, { "epoch": 2.368070953436807, "grad_norm": 0.3490493893623352, "learning_rate": 1.0960240431585748e-05, "loss": 0.0095, "step": 133 }, { "epoch": 2.385809312638581, "grad_norm": 0.6870535612106323, "learning_rate": 1.0391495923963608e-05, "loss": 0.0124, "step": 134 }, { "epoch": 2.4035476718403546, "grad_norm": 0.26921790838241577, "learning_rate": 9.835321462478854e-06, "loss": 0.0049, "step": 135 }, { "epoch": 2.4212860310421287, "grad_norm": 0.3638523519039154, "learning_rate": 9.292001141549393e-06, "loss": 0.0076, "step": 136 }, { "epoch": 2.4390243902439024, "grad_norm": 0.49972501397132874, "learning_rate": 8.761812489686555e-06, "loss": 0.0143, "step": 137 }, { "epoch": 2.4567627494456765, "grad_norm": 0.6110863089561462, "learning_rate": 8.245026327733162e-06, "loss": 0.0164, "step": 138 }, { "epoch": 2.47450110864745, "grad_norm": 0.6249895095825195, "learning_rate": 7.741906630528142e-06, "loss": 0.0249, "step": 139 }, { "epoch": 2.492239467849224, "grad_norm": 0.2525762915611267, "learning_rate": 7.2527103920678165e-06, "loss": 0.0051, "step": 140 }, { "epoch": 2.5099778270509976, "grad_norm": 0.41319364309310913, "learning_rate": 6.777687494233199e-06, "loss": 0.0172, "step": 141 }, { "epoch": 2.5277161862527717, "grad_norm": 0.7748384475708008, "learning_rate": 6.317080579150139e-06, "loss": 0.0346, "step": 142 }, { "epoch": 2.5454545454545454, "grad_norm": 0.5955628156661987, "learning_rate": 5.871124925247383e-06, "loss": 0.0191, "step": 143 }, { "epoch": 2.5631929046563195, "grad_norm": 0.40232712030410767, "learning_rate": 5.440048327076285e-06, "loss": 0.0105, "step": 144 }, { "epoch": 2.5809312638580932, "grad_norm": 0.31075894832611084, "learning_rate": 5.024070978953045e-06, "loss": 0.0114, "step": 145 }, { "epoch": 2.598669623059867, "grad_norm": 0.40698862075805664, "learning_rate": 4.623405362483257e-06, "loss": 0.0166, "step": 146 }, { "epoch": 2.6164079822616406, "grad_norm": 0.33942076563835144, "learning_rate": 4.238256138026202e-06, "loss": 0.0114, "step": 147 }, { "epoch": 2.6341463414634148, "grad_norm": 0.22500275075435638, "learning_rate": 3.8688200401540185e-06, "loss": 0.0062, "step": 148 }, { "epoch": 2.6518847006651884, "grad_norm": 1.124889850616455, "learning_rate": 3.515285777159631e-06, "loss": 0.0108, "step": 149 }, { "epoch": 2.6696230598669626, "grad_norm": 0.30653268098831177, "learning_rate": 3.177833934664312e-06, "loss": 0.0055, "step": 150 }, { "epoch": 2.6696230598669626, "eval_loss": 0.04182600975036621, "eval_runtime": 3.3815, "eval_samples_per_second": 28.094, "eval_steps_per_second": 7.098, "step": 150 }, { "epoch": 2.6873614190687363, "grad_norm": 0.46853962540626526, "learning_rate": 2.85663688337436e-06, "loss": 0.0156, "step": 151 }, { "epoch": 2.70509977827051, "grad_norm": 0.45606735348701477, "learning_rate": 2.551858691034086e-06, "loss": 0.0086, "step": 152 }, { "epoch": 2.7228381374722836, "grad_norm": 0.39163070917129517, "learning_rate": 2.26365503861976e-06, "loss": 0.0061, "step": 153 }, { "epoch": 2.740576496674058, "grad_norm": 0.22401034832000732, "learning_rate": 1.992173140817682e-06, "loss": 0.0048, "step": 154 }, { "epoch": 2.7583148558758315, "grad_norm": 0.3343549370765686, "learning_rate": 1.737551670826774e-06, "loss": 0.0111, "step": 155 }, { "epoch": 2.776053215077605, "grad_norm": 0.33819764852523804, "learning_rate": 1.49992068952417e-06, "loss": 0.0101, "step": 156 }, { "epoch": 2.7937915742793793, "grad_norm": 0.3976474404335022, "learning_rate": 1.27940157903004e-06, "loss": 0.0145, "step": 157 }, { "epoch": 2.811529933481153, "grad_norm": 0.3741917014122009, "learning_rate": 1.0761069807054472e-06, "loss": 0.0116, "step": 158 }, { "epoch": 2.8292682926829267, "grad_norm": 0.6680158972740173, "learning_rate": 8.901407376150799e-07, "loss": 0.0174, "step": 159 }, { "epoch": 2.847006651884701, "grad_norm": 0.24637554585933685, "learning_rate": 7.215978414840828e-07, "loss": 0.0068, "step": 160 }, { "epoch": 2.8647450110864745, "grad_norm": 0.5326627492904663, "learning_rate": 5.705643841762314e-07, "loss": 0.0114, "step": 161 }, { "epoch": 2.882483370288248, "grad_norm": 0.42392855882644653, "learning_rate": 4.371175137181088e-07, "loss": 0.0097, "step": 162 }, { "epoch": 2.9002217294900223, "grad_norm": 0.5340198874473572, "learning_rate": 3.213253948918315e-07, "loss": 0.016, "step": 163 }, { "epoch": 2.917960088691796, "grad_norm": 1.3395206928253174, "learning_rate": 2.232471744164116e-07, "loss": 0.0197, "step": 164 }, { "epoch": 2.9356984478935697, "grad_norm": 0.18678541481494904, "learning_rate": 1.4293295073557144e-07, "loss": 0.0046, "step": 165 }, { "epoch": 2.953436807095344, "grad_norm": 0.3585246801376343, "learning_rate": 8.042374842740341e-08, "loss": 0.0088, "step": 166 }, { "epoch": 2.9711751662971175, "grad_norm": 0.2283589392900467, "learning_rate": 3.575149724897308e-08, "loss": 0.0052, "step": 167 }, { "epoch": 2.988913525498891, "grad_norm": 0.37096989154815674, "learning_rate": 8.939015826586738e-09, "loss": 0.007, "step": 168 }, { "epoch": 3.011086474501109, "grad_norm": 1.0814924240112305, "learning_rate": 0.0, "loss": 0.0184, "step": 169 } ], "logging_steps": 1, "max_steps": 169, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.715484471191142e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }