{ "best_metric": 0.00019854793208651245, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 2.1390374331550803, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0106951871657754, "grad_norm": 0.4610372483730316, "learning_rate": 5e-06, "loss": 0.0758, "step": 1 }, { "epoch": 0.0106951871657754, "eval_loss": 0.11069836467504501, "eval_runtime": 11.5933, "eval_samples_per_second": 13.629, "eval_steps_per_second": 3.45, "step": 1 }, { "epoch": 0.0213903743315508, "grad_norm": 0.5667491555213928, "learning_rate": 1e-05, "loss": 0.0933, "step": 2 }, { "epoch": 0.03208556149732621, "grad_norm": 0.5521395802497864, "learning_rate": 1.5e-05, "loss": 0.0874, "step": 3 }, { "epoch": 0.0427807486631016, "grad_norm": 0.4779723584651947, "learning_rate": 2e-05, "loss": 0.0826, "step": 4 }, { "epoch": 0.053475935828877004, "grad_norm": 0.48251789808273315, "learning_rate": 2.5e-05, "loss": 0.0755, "step": 5 }, { "epoch": 0.06417112299465241, "grad_norm": 0.3320460319519043, "learning_rate": 3e-05, "loss": 0.059, "step": 6 }, { "epoch": 0.0748663101604278, "grad_norm": 0.25056010484695435, "learning_rate": 3.5e-05, "loss": 0.0409, "step": 7 }, { "epoch": 0.0855614973262032, "grad_norm": 0.21050989627838135, "learning_rate": 4e-05, "loss": 0.0306, "step": 8 }, { "epoch": 0.0962566844919786, "grad_norm": 0.1545867621898651, "learning_rate": 4.5e-05, "loss": 0.0235, "step": 9 }, { "epoch": 0.10695187165775401, "grad_norm": 0.16842831671237946, "learning_rate": 5e-05, "loss": 0.021, "step": 10 }, { "epoch": 0.11764705882352941, "grad_norm": 0.19742771983146667, "learning_rate": 4.9996582624811725e-05, "loss": 0.0182, "step": 11 }, { "epoch": 0.12834224598930483, "grad_norm": 0.1528479903936386, "learning_rate": 4.9986331433523156e-05, "loss": 0.0131, "step": 12 }, { "epoch": 0.13903743315508021, "grad_norm": 0.07175078988075256, "learning_rate": 4.996924922870762e-05, "loss": 0.01, "step": 13 }, { "epoch": 0.1497326203208556, "grad_norm": 0.11119536310434341, "learning_rate": 4.994534068046937e-05, "loss": 0.0108, "step": 14 }, { "epoch": 0.16042780748663102, "grad_norm": 0.07921694219112396, "learning_rate": 4.991461232516675e-05, "loss": 0.0072, "step": 15 }, { "epoch": 0.1711229946524064, "grad_norm": 0.06734706461429596, "learning_rate": 4.9877072563625285e-05, "loss": 0.003, "step": 16 }, { "epoch": 0.18181818181818182, "grad_norm": 0.06462294608354568, "learning_rate": 4.9832731658840956e-05, "loss": 0.0033, "step": 17 }, { "epoch": 0.1925133689839572, "grad_norm": 0.04150952398777008, "learning_rate": 4.978160173317438e-05, "loss": 0.003, "step": 18 }, { "epoch": 0.20320855614973263, "grad_norm": 0.07595030963420868, "learning_rate": 4.972369676503672e-05, "loss": 0.0047, "step": 19 }, { "epoch": 0.21390374331550802, "grad_norm": 0.06094742938876152, "learning_rate": 4.965903258506806e-05, "loss": 0.0036, "step": 20 }, { "epoch": 0.22459893048128343, "grad_norm": 0.0497884601354599, "learning_rate": 4.958762687180956e-05, "loss": 0.0021, "step": 21 }, { "epoch": 0.23529411764705882, "grad_norm": 0.04656745493412018, "learning_rate": 4.9509499146870236e-05, "loss": 0.0019, "step": 22 }, { "epoch": 0.24598930481283424, "grad_norm": 0.11492959409952164, "learning_rate": 4.9424670769589984e-05, "loss": 0.0042, "step": 23 }, { "epoch": 0.25668449197860965, "grad_norm": 0.02536396123468876, "learning_rate": 4.933316493120015e-05, "loss": 0.0015, "step": 24 }, { "epoch": 0.26737967914438504, "grad_norm": 0.025364691391587257, "learning_rate": 4.923500664848326e-05, "loss": 0.0017, "step": 25 }, { "epoch": 0.27807486631016043, "grad_norm": 0.049785222858190536, "learning_rate": 4.913022275693372e-05, "loss": 0.0022, "step": 26 }, { "epoch": 0.2887700534759358, "grad_norm": 0.011821591295301914, "learning_rate": 4.901884190342121e-05, "loss": 0.001, "step": 27 }, { "epoch": 0.2994652406417112, "grad_norm": 0.07001033425331116, "learning_rate": 4.8900894538358944e-05, "loss": 0.0016, "step": 28 }, { "epoch": 0.31016042780748665, "grad_norm": 0.030404146760702133, "learning_rate": 4.877641290737884e-05, "loss": 0.0035, "step": 29 }, { "epoch": 0.32085561497326204, "grad_norm": 0.015079368837177753, "learning_rate": 4.864543104251587e-05, "loss": 0.0011, "step": 30 }, { "epoch": 0.3315508021390374, "grad_norm": 0.05200629308819771, "learning_rate": 4.850798475290403e-05, "loss": 0.0017, "step": 31 }, { "epoch": 0.3422459893048128, "grad_norm": 0.013489915989339352, "learning_rate": 4.8364111614986527e-05, "loss": 0.0007, "step": 32 }, { "epoch": 0.35294117647058826, "grad_norm": 0.016825033351778984, "learning_rate": 4.821385096224268e-05, "loss": 0.0021, "step": 33 }, { "epoch": 0.36363636363636365, "grad_norm": 0.0052557820454239845, "learning_rate": 4.805724387443462e-05, "loss": 0.0006, "step": 34 }, { "epoch": 0.37433155080213903, "grad_norm": 0.02335667796432972, "learning_rate": 4.789433316637644e-05, "loss": 0.001, "step": 35 }, { "epoch": 0.3850267379679144, "grad_norm": 0.006106918212026358, "learning_rate": 4.7725163376229064e-05, "loss": 0.0007, "step": 36 }, { "epoch": 0.39572192513368987, "grad_norm": 0.01695733517408371, "learning_rate": 4.754978075332398e-05, "loss": 0.0007, "step": 37 }, { "epoch": 0.40641711229946526, "grad_norm": 0.008565555326640606, "learning_rate": 4.736823324551909e-05, "loss": 0.0006, "step": 38 }, { "epoch": 0.41711229946524064, "grad_norm": 0.010268477723002434, "learning_rate": 4.71805704860903e-05, "loss": 0.0005, "step": 39 }, { "epoch": 0.42780748663101603, "grad_norm": 0.004667960572987795, "learning_rate": 4.698684378016222e-05, "loss": 0.0005, "step": 40 }, { "epoch": 0.4385026737967914, "grad_norm": 0.026039620861411095, "learning_rate": 4.678710609068193e-05, "loss": 0.0012, "step": 41 }, { "epoch": 0.44919786096256686, "grad_norm": 0.05465618148446083, "learning_rate": 4.6581412023939354e-05, "loss": 0.0006, "step": 42 }, { "epoch": 0.45989304812834225, "grad_norm": 0.03302493691444397, "learning_rate": 4.6369817814638475e-05, "loss": 0.0008, "step": 43 }, { "epoch": 0.47058823529411764, "grad_norm": 0.005473017692565918, "learning_rate": 4.6152381310523387e-05, "loss": 0.0005, "step": 44 }, { "epoch": 0.48128342245989303, "grad_norm": 0.006090235896408558, "learning_rate": 4.592916195656322e-05, "loss": 0.0005, "step": 45 }, { "epoch": 0.4919786096256685, "grad_norm": 0.015685347840189934, "learning_rate": 4.5700220778700504e-05, "loss": 0.0006, "step": 46 }, { "epoch": 0.5026737967914439, "grad_norm": 0.008048945106565952, "learning_rate": 4.546562036716732e-05, "loss": 0.0006, "step": 47 }, { "epoch": 0.5133689839572193, "grad_norm": 0.033643852919340134, "learning_rate": 4.522542485937369e-05, "loss": 0.0015, "step": 48 }, { "epoch": 0.5240641711229946, "grad_norm": 0.007943148724734783, "learning_rate": 4.497969992237312e-05, "loss": 0.0005, "step": 49 }, { "epoch": 0.5347593582887701, "grad_norm": 0.006010276731103659, "learning_rate": 4.4728512734909844e-05, "loss": 0.0005, "step": 50 }, { "epoch": 0.5347593582887701, "eval_loss": 0.0005459652165882289, "eval_runtime": 11.8622, "eval_samples_per_second": 13.32, "eval_steps_per_second": 3.372, "step": 50 }, { "epoch": 0.5454545454545454, "grad_norm": 0.002803094917908311, "learning_rate": 4.4471931969052816e-05, "loss": 0.0004, "step": 51 }, { "epoch": 0.5561497326203209, "grad_norm": 0.00603801105171442, "learning_rate": 4.421002777142148e-05, "loss": 0.0005, "step": 52 }, { "epoch": 0.5668449197860963, "grad_norm": 0.016559094190597534, "learning_rate": 4.3942871744008374e-05, "loss": 0.0006, "step": 53 }, { "epoch": 0.5775401069518716, "grad_norm": 0.004527962300926447, "learning_rate": 4.367053692460385e-05, "loss": 0.0004, "step": 54 }, { "epoch": 0.5882352941176471, "grad_norm": 0.005123887676745653, "learning_rate": 4.3393097766828293e-05, "loss": 0.0005, "step": 55 }, { "epoch": 0.5989304812834224, "grad_norm": 0.002397982170805335, "learning_rate": 4.311063011977723e-05, "loss": 0.0004, "step": 56 }, { "epoch": 0.6096256684491979, "grad_norm": 0.007608695421367884, "learning_rate": 4.282321120728493e-05, "loss": 0.0005, "step": 57 }, { "epoch": 0.6203208556149733, "grad_norm": 0.003963192459195852, "learning_rate": 4.2530919606812216e-05, "loss": 0.0004, "step": 58 }, { "epoch": 0.6310160427807486, "grad_norm": 0.021777043119072914, "learning_rate": 4.223383522796415e-05, "loss": 0.0007, "step": 59 }, { "epoch": 0.6417112299465241, "grad_norm": 0.004203729331493378, "learning_rate": 4.193203929064353e-05, "loss": 0.0003, "step": 60 }, { "epoch": 0.6524064171122995, "grad_norm": 0.017256399616599083, "learning_rate": 4.16256143028462e-05, "loss": 0.0005, "step": 61 }, { "epoch": 0.6631016042780749, "grad_norm": 0.00783338863402605, "learning_rate": 4.131464403810422e-05, "loss": 0.0004, "step": 62 }, { "epoch": 0.6737967914438503, "grad_norm": 0.018286529928445816, "learning_rate": 4.099921351258292e-05, "loss": 0.0005, "step": 63 }, { "epoch": 0.6844919786096256, "grad_norm": 0.012721989303827286, "learning_rate": 4.067940896183843e-05, "loss": 0.0005, "step": 64 }, { "epoch": 0.6951871657754011, "grad_norm": 0.0033641839399933815, "learning_rate": 4.03553178172417e-05, "loss": 0.0003, "step": 65 }, { "epoch": 0.7058823529411765, "grad_norm": 0.0063002267852425575, "learning_rate": 4.002702868207563e-05, "loss": 0.0004, "step": 66 }, { "epoch": 0.7165775401069518, "grad_norm": 0.009623183868825436, "learning_rate": 3.969463130731183e-05, "loss": 0.0004, "step": 67 }, { "epoch": 0.7272727272727273, "grad_norm": 0.0021998609881848097, "learning_rate": 3.935821656707359e-05, "loss": 0.0003, "step": 68 }, { "epoch": 0.7379679144385026, "grad_norm": 0.017077744007110596, "learning_rate": 3.901787643379182e-05, "loss": 0.0004, "step": 69 }, { "epoch": 0.7486631016042781, "grad_norm": 0.005753326695412397, "learning_rate": 3.867370395306068e-05, "loss": 0.0004, "step": 70 }, { "epoch": 0.7593582887700535, "grad_norm": 0.007127939723432064, "learning_rate": 3.832579321819985e-05, "loss": 0.0005, "step": 71 }, { "epoch": 0.7700534759358288, "grad_norm": 0.014801083132624626, "learning_rate": 3.797423934453038e-05, "loss": 0.0005, "step": 72 }, { "epoch": 0.7807486631016043, "grad_norm": 0.0020635039545595646, "learning_rate": 3.76191384433711e-05, "loss": 0.0003, "step": 73 }, { "epoch": 0.7914438502673797, "grad_norm": 0.0026627369225025177, "learning_rate": 3.726058759576271e-05, "loss": 0.0003, "step": 74 }, { "epoch": 0.8021390374331551, "grad_norm": 0.006531054619699717, "learning_rate": 3.689868482592684e-05, "loss": 0.0004, "step": 75 }, { "epoch": 0.8128342245989305, "grad_norm": 0.00441792281344533, "learning_rate": 3.65335290744672e-05, "loss": 0.0004, "step": 76 }, { "epoch": 0.8235294117647058, "grad_norm": 0.0082427728921175, "learning_rate": 3.616522017132017e-05, "loss": 0.0004, "step": 77 }, { "epoch": 0.8342245989304813, "grad_norm": 0.0030491151846945286, "learning_rate": 3.579385880846232e-05, "loss": 0.0003, "step": 78 }, { "epoch": 0.8449197860962567, "grad_norm": 0.0036119227297604084, "learning_rate": 3.5419546512382266e-05, "loss": 0.0003, "step": 79 }, { "epoch": 0.8556149732620321, "grad_norm": 0.009720646776258945, "learning_rate": 3.504238561632424e-05, "loss": 0.0003, "step": 80 }, { "epoch": 0.8663101604278075, "grad_norm": 0.014544278383255005, "learning_rate": 3.4662479232311306e-05, "loss": 0.0005, "step": 81 }, { "epoch": 0.8770053475935828, "grad_norm": 0.0033084533642977476, "learning_rate": 3.427993122295552e-05, "loss": 0.0003, "step": 82 }, { "epoch": 0.8877005347593583, "grad_norm": 0.0023865916300565004, "learning_rate": 3.389484617306292e-05, "loss": 0.0003, "step": 83 }, { "epoch": 0.8983957219251337, "grad_norm": 0.005658437963575125, "learning_rate": 3.350732936104108e-05, "loss": 0.0003, "step": 84 }, { "epoch": 0.9090909090909091, "grad_norm": 0.03401561826467514, "learning_rate": 3.311748673011709e-05, "loss": 0.0004, "step": 85 }, { "epoch": 0.9197860962566845, "grad_norm": 0.029938824474811554, "learning_rate": 3.272542485937369e-05, "loss": 0.0011, "step": 86 }, { "epoch": 0.93048128342246, "grad_norm": 0.0016886080848053098, "learning_rate": 3.2331250934611624e-05, "loss": 0.0002, "step": 87 }, { "epoch": 0.9411764705882353, "grad_norm": 0.03508773818612099, "learning_rate": 3.1935072719046115e-05, "loss": 0.0004, "step": 88 }, { "epoch": 0.9518716577540107, "grad_norm": 0.0011655918788164854, "learning_rate": 3.1536998523845494e-05, "loss": 0.0002, "step": 89 }, { "epoch": 0.9625668449197861, "grad_norm": 0.002312800381332636, "learning_rate": 3.1137137178519985e-05, "loss": 0.0002, "step": 90 }, { "epoch": 0.9732620320855615, "grad_norm": 0.0012323955306783319, "learning_rate": 3.073559800116879e-05, "loss": 0.0002, "step": 91 }, { "epoch": 0.983957219251337, "grad_norm": 0.005305045284330845, "learning_rate": 3.0332490768593675e-05, "loss": 0.0002, "step": 92 }, { "epoch": 0.9946524064171123, "grad_norm": 0.01276116631925106, "learning_rate": 2.9927925686287006e-05, "loss": 0.0004, "step": 93 }, { "epoch": 1.0053475935828877, "grad_norm": 0.0070867943577468395, "learning_rate": 2.952201335830275e-05, "loss": 0.0005, "step": 94 }, { "epoch": 1.0160427807486632, "grad_norm": 0.004585223272442818, "learning_rate": 2.9114864757018352e-05, "loss": 0.0003, "step": 95 }, { "epoch": 1.0267379679144386, "grad_norm": 0.002973524620756507, "learning_rate": 2.870659119279605e-05, "loss": 0.0003, "step": 96 }, { "epoch": 1.0374331550802138, "grad_norm": 0.0011876296484842896, "learning_rate": 2.8297304283551728e-05, "loss": 0.0002, "step": 97 }, { "epoch": 1.0481283422459893, "grad_norm": 0.001789945992641151, "learning_rate": 2.788711592423966e-05, "loss": 0.0002, "step": 98 }, { "epoch": 1.0588235294117647, "grad_norm": 0.0016729391645640135, "learning_rate": 2.7476138256261575e-05, "loss": 0.0002, "step": 99 }, { "epoch": 1.0695187165775402, "grad_norm": 0.016477957367897034, "learning_rate": 2.7064483636808313e-05, "loss": 0.0013, "step": 100 }, { "epoch": 1.0695187165775402, "eval_loss": 0.0002533694205339998, "eval_runtime": 11.8766, "eval_samples_per_second": 13.303, "eval_steps_per_second": 3.368, "step": 100 }, { "epoch": 1.0802139037433156, "grad_norm": 0.0016368901124224067, "learning_rate": 2.6652264608142484e-05, "loss": 0.0003, "step": 101 }, { "epoch": 1.0909090909090908, "grad_norm": 0.0019716303795576096, "learning_rate": 2.623959386683056e-05, "loss": 0.0002, "step": 102 }, { "epoch": 1.1016042780748663, "grad_norm": 0.0013684361474588513, "learning_rate": 2.5826584232932706e-05, "loss": 0.0002, "step": 103 }, { "epoch": 1.1122994652406417, "grad_norm": 0.0016918659675866365, "learning_rate": 2.5413348619158967e-05, "loss": 0.0002, "step": 104 }, { "epoch": 1.1229946524064172, "grad_norm": 0.0012239411007612944, "learning_rate": 2.5e-05, "loss": 0.0002, "step": 105 }, { "epoch": 1.1336898395721926, "grad_norm": 0.0017103370046243072, "learning_rate": 2.458665138084104e-05, "loss": 0.0002, "step": 106 }, { "epoch": 1.1443850267379678, "grad_norm": 0.0011442365357652307, "learning_rate": 2.4173415767067297e-05, "loss": 0.0002, "step": 107 }, { "epoch": 1.1550802139037433, "grad_norm": 0.0015665042446926236, "learning_rate": 2.3760406133169443e-05, "loss": 0.0002, "step": 108 }, { "epoch": 1.1657754010695187, "grad_norm": 0.0026721807662397623, "learning_rate": 2.334773539185752e-05, "loss": 0.0003, "step": 109 }, { "epoch": 1.1764705882352942, "grad_norm": 0.0018524781335145235, "learning_rate": 2.2935516363191693e-05, "loss": 0.0002, "step": 110 }, { "epoch": 1.1871657754010696, "grad_norm": 0.003370043123140931, "learning_rate": 2.2523861743738434e-05, "loss": 0.0002, "step": 111 }, { "epoch": 1.1978609625668448, "grad_norm": 0.0017906591529026628, "learning_rate": 2.2112884075760347e-05, "loss": 0.0002, "step": 112 }, { "epoch": 1.2085561497326203, "grad_norm": 0.004777679685503244, "learning_rate": 2.1702695716448278e-05, "loss": 0.0003, "step": 113 }, { "epoch": 1.2192513368983957, "grad_norm": 0.002267079660668969, "learning_rate": 2.1293408807203947e-05, "loss": 0.0002, "step": 114 }, { "epoch": 1.2299465240641712, "grad_norm": 0.003986462950706482, "learning_rate": 2.088513524298165e-05, "loss": 0.0002, "step": 115 }, { "epoch": 1.2406417112299466, "grad_norm": 0.0009038595599122345, "learning_rate": 2.047798664169726e-05, "loss": 0.0002, "step": 116 }, { "epoch": 1.251336898395722, "grad_norm": 0.004659716505557299, "learning_rate": 2.0072074313712997e-05, "loss": 0.0003, "step": 117 }, { "epoch": 1.2620320855614973, "grad_norm": 0.001987988129258156, "learning_rate": 1.9667509231406334e-05, "loss": 0.0002, "step": 118 }, { "epoch": 1.2727272727272727, "grad_norm": 0.002171223284676671, "learning_rate": 1.9264401998831213e-05, "loss": 0.0003, "step": 119 }, { "epoch": 1.2834224598930482, "grad_norm": 0.003317313501611352, "learning_rate": 1.8862862821480025e-05, "loss": 0.0003, "step": 120 }, { "epoch": 1.2941176470588236, "grad_norm": 0.0016816252609714866, "learning_rate": 1.8463001476154508e-05, "loss": 0.0002, "step": 121 }, { "epoch": 1.3048128342245988, "grad_norm": 0.002052134368568659, "learning_rate": 1.806492728095389e-05, "loss": 0.0002, "step": 122 }, { "epoch": 1.3155080213903743, "grad_norm": 0.0012377082603052258, "learning_rate": 1.7668749065388385e-05, "loss": 0.0002, "step": 123 }, { "epoch": 1.3262032085561497, "grad_norm": 0.0017503959825262427, "learning_rate": 1.7274575140626318e-05, "loss": 0.0002, "step": 124 }, { "epoch": 1.3368983957219251, "grad_norm": 0.0020569455809891224, "learning_rate": 1.6882513269882917e-05, "loss": 0.0002, "step": 125 }, { "epoch": 1.3475935828877006, "grad_norm": 0.0011728674871847034, "learning_rate": 1.6492670638958924e-05, "loss": 0.0002, "step": 126 }, { "epoch": 1.358288770053476, "grad_norm": 0.00124787213280797, "learning_rate": 1.6105153826937085e-05, "loss": 0.0002, "step": 127 }, { "epoch": 1.3689839572192513, "grad_norm": 0.002361771184951067, "learning_rate": 1.5720068777044476e-05, "loss": 0.0002, "step": 128 }, { "epoch": 1.3796791443850267, "grad_norm": 0.025998368859291077, "learning_rate": 1.5337520767688703e-05, "loss": 0.0003, "step": 129 }, { "epoch": 1.3903743315508021, "grad_norm": 0.012260783463716507, "learning_rate": 1.495761438367577e-05, "loss": 0.0003, "step": 130 }, { "epoch": 1.4010695187165776, "grad_norm": 0.0020026592537760735, "learning_rate": 1.4580453487617745e-05, "loss": 0.0002, "step": 131 }, { "epoch": 1.4117647058823528, "grad_norm": 0.0011957072420045733, "learning_rate": 1.4206141191537682e-05, "loss": 0.0002, "step": 132 }, { "epoch": 1.4224598930481283, "grad_norm": 0.0011699367314577103, "learning_rate": 1.383477982867984e-05, "loss": 0.0002, "step": 133 }, { "epoch": 1.4331550802139037, "grad_norm": 0.0010813389671966434, "learning_rate": 1.346647092553281e-05, "loss": 0.0002, "step": 134 }, { "epoch": 1.4438502673796791, "grad_norm": 0.001684120506979525, "learning_rate": 1.3101315174073162e-05, "loss": 0.0002, "step": 135 }, { "epoch": 1.4545454545454546, "grad_norm": 0.001540790661238134, "learning_rate": 1.2739412404237306e-05, "loss": 0.0002, "step": 136 }, { "epoch": 1.46524064171123, "grad_norm": 0.0029055785853415728, "learning_rate": 1.2380861556628915e-05, "loss": 0.0002, "step": 137 }, { "epoch": 1.4759358288770055, "grad_norm": 0.0013636857038363814, "learning_rate": 1.202576065546963e-05, "loss": 0.0002, "step": 138 }, { "epoch": 1.4866310160427807, "grad_norm": 0.0013237000675871968, "learning_rate": 1.1674206781800162e-05, "loss": 0.0002, "step": 139 }, { "epoch": 1.4973262032085561, "grad_norm": 0.0031855355482548475, "learning_rate": 1.1326296046939333e-05, "loss": 0.0002, "step": 140 }, { "epoch": 1.5080213903743316, "grad_norm": 0.0164369884878397, "learning_rate": 1.0982123566208185e-05, "loss": 0.0003, "step": 141 }, { "epoch": 1.5187165775401068, "grad_norm": 0.0026839314959943295, "learning_rate": 1.064178343292641e-05, "loss": 0.0002, "step": 142 }, { "epoch": 1.5294117647058822, "grad_norm": 0.0014627689961344004, "learning_rate": 1.0305368692688174e-05, "loss": 0.0002, "step": 143 }, { "epoch": 1.5401069518716577, "grad_norm": 0.0018020343268290162, "learning_rate": 9.972971317924374e-06, "loss": 0.0002, "step": 144 }, { "epoch": 1.5508021390374331, "grad_norm": 0.0024791641626507044, "learning_rate": 9.644682182758306e-06, "loss": 0.0002, "step": 145 }, { "epoch": 1.5614973262032086, "grad_norm": 0.001727481372654438, "learning_rate": 9.320591038161574e-06, "loss": 0.0002, "step": 146 }, { "epoch": 1.572192513368984, "grad_norm": 0.003578837728127837, "learning_rate": 9.000786487417085e-06, "loss": 0.0002, "step": 147 }, { "epoch": 1.5828877005347595, "grad_norm": 0.03577272221446037, "learning_rate": 8.685355961895784e-06, "loss": 0.001, "step": 148 }, { "epoch": 1.593582887700535, "grad_norm": 0.0013863133499398828, "learning_rate": 8.374385697153792e-06, "loss": 0.0002, "step": 149 }, { "epoch": 1.6042780748663101, "grad_norm": 0.013459622859954834, "learning_rate": 8.067960709356478e-06, "loss": 0.001, "step": 150 }, { "epoch": 1.6042780748663101, "eval_loss": 0.00020489019516389817, "eval_runtime": 11.8775, "eval_samples_per_second": 13.302, "eval_steps_per_second": 3.368, "step": 150 }, { "epoch": 1.6149732620320856, "grad_norm": 0.0012236441252753139, "learning_rate": 7.766164772035856e-06, "loss": 0.0002, "step": 151 }, { "epoch": 1.6256684491978608, "grad_norm": 0.0012642454821616411, "learning_rate": 7.469080393187786e-06, "loss": 0.0002, "step": 152 }, { "epoch": 1.6363636363636362, "grad_norm": 0.002232413273304701, "learning_rate": 7.176788792715075e-06, "loss": 0.0002, "step": 153 }, { "epoch": 1.6470588235294117, "grad_norm": 0.0014532230561599135, "learning_rate": 6.889369880222776e-06, "loss": 0.0002, "step": 154 }, { "epoch": 1.6577540106951871, "grad_norm": 0.0011404341785237193, "learning_rate": 6.606902233171711e-06, "loss": 0.0002, "step": 155 }, { "epoch": 1.6684491978609626, "grad_norm": 0.0011395354522392154, "learning_rate": 6.329463075396161e-06, "loss": 0.0001, "step": 156 }, { "epoch": 1.679144385026738, "grad_norm": 0.0009083832264877856, "learning_rate": 6.057128255991637e-06, "loss": 0.0002, "step": 157 }, { "epoch": 1.6898395721925135, "grad_norm": 0.001107500633224845, "learning_rate": 5.78997222857853e-06, "loss": 0.0002, "step": 158 }, { "epoch": 1.700534759358289, "grad_norm": 0.0009984674397855997, "learning_rate": 5.528068030947192e-06, "loss": 0.0002, "step": 159 }, { "epoch": 1.7112299465240641, "grad_norm": 0.0010274855885654688, "learning_rate": 5.271487265090163e-06, "loss": 0.0002, "step": 160 }, { "epoch": 1.7219251336898396, "grad_norm": 0.0048374817706644535, "learning_rate": 5.0203000776268825e-06, "loss": 0.0002, "step": 161 }, { "epoch": 1.732620320855615, "grad_norm": 0.001355843967758119, "learning_rate": 4.7745751406263165e-06, "loss": 0.0002, "step": 162 }, { "epoch": 1.7433155080213902, "grad_norm": 0.002731623128056526, "learning_rate": 4.534379632832692e-06, "loss": 0.0002, "step": 163 }, { "epoch": 1.7540106951871657, "grad_norm": 0.0027442830614745617, "learning_rate": 4.299779221299499e-06, "loss": 0.0002, "step": 164 }, { "epoch": 1.7647058823529411, "grad_norm": 0.0019349497742950916, "learning_rate": 4.070838043436786e-06, "loss": 0.0002, "step": 165 }, { "epoch": 1.7754010695187166, "grad_norm": 0.006677249446511269, "learning_rate": 3.847618689476612e-06, "loss": 0.0005, "step": 166 }, { "epoch": 1.786096256684492, "grad_norm": 0.0013465003576129675, "learning_rate": 3.630182185361522e-06, "loss": 0.0002, "step": 167 }, { "epoch": 1.7967914438502675, "grad_norm": 0.0011671106331050396, "learning_rate": 3.418587976060653e-06, "loss": 0.0002, "step": 168 }, { "epoch": 1.807486631016043, "grad_norm": 0.0013127396814525127, "learning_rate": 3.2128939093180655e-06, "loss": 0.0002, "step": 169 }, { "epoch": 1.8181818181818183, "grad_norm": 0.0008585858740843832, "learning_rate": 3.013156219837776e-06, "loss": 0.0002, "step": 170 }, { "epoch": 1.8288770053475936, "grad_norm": 0.002401969162747264, "learning_rate": 2.8194295139097048e-06, "loss": 0.0002, "step": 171 }, { "epoch": 1.839572192513369, "grad_norm": 0.0011273876298218966, "learning_rate": 2.6317667544809134e-06, "loss": 0.0002, "step": 172 }, { "epoch": 1.8502673796791442, "grad_norm": 0.0010006314842030406, "learning_rate": 2.4502192466760276e-06, "loss": 0.0002, "step": 173 }, { "epoch": 1.8609625668449197, "grad_norm": 0.0028830089140683413, "learning_rate": 2.2748366237709374e-06, "loss": 0.0002, "step": 174 }, { "epoch": 1.8716577540106951, "grad_norm": 0.0019230460748076439, "learning_rate": 2.1056668336235622e-06, "loss": 0.0002, "step": 175 }, { "epoch": 1.8823529411764706, "grad_norm": 0.0018979047890752554, "learning_rate": 1.9427561255653816e-06, "loss": 0.0002, "step": 176 }, { "epoch": 1.893048128342246, "grad_norm": 0.0009563955245539546, "learning_rate": 1.7861490377573258e-06, "loss": 0.0002, "step": 177 }, { "epoch": 1.9037433155080214, "grad_norm": 0.0010765568586066365, "learning_rate": 1.6358883850134816e-06, "loss": 0.0002, "step": 178 }, { "epoch": 1.914438502673797, "grad_norm": 0.00405028834939003, "learning_rate": 1.4920152470959707e-06, "loss": 0.0002, "step": 179 }, { "epoch": 1.9251336898395723, "grad_norm": 0.0011017459910362959, "learning_rate": 1.3545689574841342e-06, "loss": 0.0002, "step": 180 }, { "epoch": 1.9358288770053476, "grad_norm": 0.0009190815035253763, "learning_rate": 1.2235870926211619e-06, "loss": 0.0001, "step": 181 }, { "epoch": 1.946524064171123, "grad_norm": 0.0008144082967191935, "learning_rate": 1.0991054616410589e-06, "loss": 0.0001, "step": 182 }, { "epoch": 1.9572192513368984, "grad_norm": 0.0012387970928102732, "learning_rate": 9.811580965787965e-07, "loss": 0.0002, "step": 183 }, { "epoch": 1.9679144385026737, "grad_norm": 0.0013082154328003526, "learning_rate": 8.697772430662859e-07, "loss": 0.0002, "step": 184 }, { "epoch": 1.9786096256684491, "grad_norm": 0.011731316335499287, "learning_rate": 7.649933515167407e-07, "loss": 0.0002, "step": 185 }, { "epoch": 1.9893048128342246, "grad_norm": 0.03708391636610031, "learning_rate": 6.668350687998565e-07, "loss": 0.0003, "step": 186 }, { "epoch": 2.0, "grad_norm": 0.0013201060937717557, "learning_rate": 5.753292304100183e-07, "loss": 0.0002, "step": 187 }, { "epoch": 2.0106951871657754, "grad_norm": 0.0018095995765179396, "learning_rate": 4.905008531297661e-07, "loss": 0.0002, "step": 188 }, { "epoch": 2.021390374331551, "grad_norm": 0.0036144822370260954, "learning_rate": 4.1237312819044085e-07, "loss": 0.0002, "step": 189 }, { "epoch": 2.0320855614973263, "grad_norm": 0.0013612692710012197, "learning_rate": 3.4096741493194197e-07, "loss": 0.0002, "step": 190 }, { "epoch": 2.0427807486631018, "grad_norm": 0.0009508001385256648, "learning_rate": 2.763032349632877e-07, "loss": 0.0002, "step": 191 }, { "epoch": 2.053475935828877, "grad_norm": 0.0010036603780463338, "learning_rate": 2.1839826682562015e-07, "loss": 0.0002, "step": 192 }, { "epoch": 2.064171122994652, "grad_norm": 0.0011032938491553068, "learning_rate": 1.6726834115904643e-07, "loss": 0.0002, "step": 193 }, { "epoch": 2.0748663101604277, "grad_norm": 0.000993466004729271, "learning_rate": 1.229274363747146e-07, "loss": 0.0002, "step": 194 }, { "epoch": 2.085561497326203, "grad_norm": 0.001971643418073654, "learning_rate": 8.538767483325383e-08, "loss": 0.0002, "step": 195 }, { "epoch": 2.0962566844919786, "grad_norm": 0.000998842646367848, "learning_rate": 5.4659319530636633e-08, "loss": 0.0002, "step": 196 }, { "epoch": 2.106951871657754, "grad_norm": 0.0022234790958464146, "learning_rate": 3.075077129238158e-08, "loss": 0.0002, "step": 197 }, { "epoch": 2.1176470588235294, "grad_norm": 0.0017989481566473842, "learning_rate": 1.3668566476848777e-08, "loss": 0.0002, "step": 198 }, { "epoch": 2.128342245989305, "grad_norm": 0.002519871573895216, "learning_rate": 3.417375188274896e-09, "loss": 0.0002, "step": 199 }, { "epoch": 2.1390374331550803, "grad_norm": 0.0016417063307017088, "learning_rate": 0.0, "loss": 0.0002, "step": 200 }, { "epoch": 2.1390374331550803, "eval_loss": 0.00019854793208651245, "eval_runtime": 11.872, "eval_samples_per_second": 13.309, "eval_steps_per_second": 3.369, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.69069410369536e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }