youtubevtuberlora / trainer_state.json
kahou1234's picture
Upload 11 files
2bf6292 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 64.48,
"eval_steps": 500,
"global_step": 4030,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.32,
"grad_norm": 2.246424436569214,
"learning_rate": 2.3573200992555833e-06,
"loss": 2.826,
"step": 20
},
{
"epoch": 0.64,
"grad_norm": 0.9050242900848389,
"learning_rate": 4.838709677419355e-06,
"loss": 2.72,
"step": 40
},
{
"epoch": 0.96,
"grad_norm": 2.6034655570983887,
"learning_rate": 7.320099255583126e-06,
"loss": 2.4912,
"step": 60
},
{
"epoch": 1.28,
"grad_norm": 1.3487274646759033,
"learning_rate": 9.801488833746898e-06,
"loss": 2.0561,
"step": 80
},
{
"epoch": 1.6,
"grad_norm": 1.6185756921768188,
"learning_rate": 1.2282878411910669e-05,
"loss": 1.7744,
"step": 100
},
{
"epoch": 1.92,
"grad_norm": 3.017139196395874,
"learning_rate": 1.4764267990074444e-05,
"loss": 1.8387,
"step": 120
},
{
"epoch": 2.24,
"grad_norm": 2.2100813388824463,
"learning_rate": 1.7245657568238215e-05,
"loss": 1.4478,
"step": 140
},
{
"epoch": 2.56,
"grad_norm": 1.574629545211792,
"learning_rate": 1.9727047146401986e-05,
"loss": 1.285,
"step": 160
},
{
"epoch": 2.88,
"grad_norm": 4.586638450622559,
"learning_rate": 2.2208436724565757e-05,
"loss": 1.2235,
"step": 180
},
{
"epoch": 3.2,
"grad_norm": 2.7081515789031982,
"learning_rate": 2.468982630272953e-05,
"loss": 0.9575,
"step": 200
},
{
"epoch": 3.52,
"grad_norm": 0.9670729041099548,
"learning_rate": 2.7171215880893302e-05,
"loss": 0.7086,
"step": 220
},
{
"epoch": 3.84,
"grad_norm": 3.229243040084839,
"learning_rate": 2.9652605459057077e-05,
"loss": 0.8587,
"step": 240
},
{
"epoch": 4.16,
"grad_norm": 1.1293463706970215,
"learning_rate": 3.2133995037220844e-05,
"loss": 0.5978,
"step": 260
},
{
"epoch": 4.48,
"grad_norm": 1.7043830156326294,
"learning_rate": 3.461538461538462e-05,
"loss": 0.4668,
"step": 280
},
{
"epoch": 4.8,
"grad_norm": 2.565268039703369,
"learning_rate": 3.7096774193548386e-05,
"loss": 0.5667,
"step": 300
},
{
"epoch": 5.12,
"grad_norm": 1.158849835395813,
"learning_rate": 3.957816377171216e-05,
"loss": 0.4373,
"step": 320
},
{
"epoch": 5.44,
"grad_norm": 2.714164972305298,
"learning_rate": 4.205955334987593e-05,
"loss": 0.3492,
"step": 340
},
{
"epoch": 5.76,
"grad_norm": 2.2089672088623047,
"learning_rate": 4.45409429280397e-05,
"loss": 0.4018,
"step": 360
},
{
"epoch": 6.08,
"grad_norm": 1.8179335594177246,
"learning_rate": 4.702233250620348e-05,
"loss": 0.279,
"step": 380
},
{
"epoch": 6.4,
"grad_norm": 1.4858269691467285,
"learning_rate": 4.950372208436725e-05,
"loss": 0.2362,
"step": 400
},
{
"epoch": 6.72,
"grad_norm": 1.7704375982284546,
"learning_rate": 4.99975992459978e-05,
"loss": 0.2665,
"step": 420
},
{
"epoch": 7.04,
"grad_norm": 1.2611212730407715,
"learning_rate": 4.9987846973104825e-05,
"loss": 0.2029,
"step": 440
},
{
"epoch": 7.36,
"grad_norm": 2.994542360305786,
"learning_rate": 4.9970596058519116e-05,
"loss": 0.1747,
"step": 460
},
{
"epoch": 7.68,
"grad_norm": 2.7456889152526855,
"learning_rate": 4.994585167909436e-05,
"loss": 0.1486,
"step": 480
},
{
"epoch": 8.0,
"grad_norm": 1.8236416578292847,
"learning_rate": 4.9913621260409695e-05,
"loss": 0.1866,
"step": 500
},
{
"epoch": 8.32,
"grad_norm": 2.636003017425537,
"learning_rate": 4.987391447454136e-05,
"loss": 0.1476,
"step": 520
},
{
"epoch": 8.64,
"grad_norm": 2.879154920578003,
"learning_rate": 4.982674323716023e-05,
"loss": 0.1403,
"step": 540
},
{
"epoch": 8.96,
"grad_norm": 0.9377075433731079,
"learning_rate": 4.977212170395598e-05,
"loss": 0.1018,
"step": 560
},
{
"epoch": 9.28,
"grad_norm": 0.311233788728714,
"learning_rate": 4.9710066266389074e-05,
"loss": 0.0992,
"step": 580
},
{
"epoch": 9.6,
"grad_norm": 0.8316205739974976,
"learning_rate": 4.964059554677187e-05,
"loss": 0.1134,
"step": 600
},
{
"epoch": 9.92,
"grad_norm": 2.567354679107666,
"learning_rate": 4.956373039268022e-05,
"loss": 0.0781,
"step": 620
},
{
"epoch": 10.24,
"grad_norm": 0.0829504132270813,
"learning_rate": 4.947949387069721e-05,
"loss": 0.0892,
"step": 640
},
{
"epoch": 10.56,
"grad_norm": 0.8588472008705139,
"learning_rate": 4.938791125949119e-05,
"loss": 0.0499,
"step": 660
},
{
"epoch": 10.88,
"grad_norm": 1.2792423963546753,
"learning_rate": 4.9289010042229765e-05,
"loss": 0.0831,
"step": 680
},
{
"epoch": 11.2,
"grad_norm": 0.4728279709815979,
"learning_rate": 4.918281989833238e-05,
"loss": 0.0715,
"step": 700
},
{
"epoch": 11.52,
"grad_norm": 2.5855355262756348,
"learning_rate": 4.9069372694563756e-05,
"loss": 0.0718,
"step": 720
},
{
"epoch": 11.84,
"grad_norm": 0.8059779405593872,
"learning_rate": 4.8948702475470933e-05,
"loss": 0.0849,
"step": 740
},
{
"epoch": 12.16,
"grad_norm": 1.2841193675994873,
"learning_rate": 4.882084545316684e-05,
"loss": 0.0683,
"step": 760
},
{
"epoch": 12.48,
"grad_norm": 1.3422589302062988,
"learning_rate": 4.868583999646329e-05,
"loss": 0.0808,
"step": 780
},
{
"epoch": 12.8,
"grad_norm": 1.3376965522766113,
"learning_rate": 4.8543726619356846e-05,
"loss": 0.0607,
"step": 800
},
{
"epoch": 13.12,
"grad_norm": 1.008899450302124,
"learning_rate": 4.83945479688709e-05,
"loss": 0.062,
"step": 820
},
{
"epoch": 13.44,
"grad_norm": 0.441413551568985,
"learning_rate": 4.8238348812257684e-05,
"loss": 0.0461,
"step": 840
},
{
"epoch": 13.76,
"grad_norm": 1.296985149383545,
"learning_rate": 4.808349953928184e-05,
"loss": 0.0482,
"step": 860
},
{
"epoch": 14.08,
"grad_norm": 0.035805635154247284,
"learning_rate": 4.791374712344622e-05,
"loss": 0.0388,
"step": 880
},
{
"epoch": 14.4,
"grad_norm": 0.10618308186531067,
"learning_rate": 4.7737118485753564e-05,
"loss": 0.0251,
"step": 900
},
{
"epoch": 14.72,
"grad_norm": 0.866423487663269,
"learning_rate": 4.75536666309653e-05,
"loss": 0.0515,
"step": 920
},
{
"epoch": 15.04,
"grad_norm": 0.5916399955749512,
"learning_rate": 4.73634466114326e-05,
"loss": 0.0536,
"step": 940
},
{
"epoch": 15.36,
"grad_norm": 0.1653570532798767,
"learning_rate": 4.7166515510575676e-05,
"loss": 0.0392,
"step": 960
},
{
"epoch": 15.68,
"grad_norm": 0.027391331270337105,
"learning_rate": 4.696293242575356e-05,
"loss": 0.0369,
"step": 980
},
{
"epoch": 16.0,
"grad_norm": 2.17256760597229,
"learning_rate": 4.675275845052942e-05,
"loss": 0.0651,
"step": 1000
},
{
"epoch": 16.32,
"grad_norm": 0.8612786531448364,
"learning_rate": 4.6536056656336947e-05,
"loss": 0.037,
"step": 1020
},
{
"epoch": 16.64,
"grad_norm": 4.489969253540039,
"learning_rate": 4.631289207355313e-05,
"loss": 0.0272,
"step": 1040
},
{
"epoch": 16.96,
"grad_norm": 0.4311043918132782,
"learning_rate": 4.6083331671983185e-05,
"loss": 0.0507,
"step": 1060
},
{
"epoch": 17.28,
"grad_norm": 0.4327545762062073,
"learning_rate": 4.584744434076352e-05,
"loss": 0.0274,
"step": 1080
},
{
"epoch": 17.6,
"grad_norm": 0.12099918723106384,
"learning_rate": 4.560530086768863e-05,
"loss": 0.0565,
"step": 1100
},
{
"epoch": 17.92,
"grad_norm": 0.103216253221035,
"learning_rate": 4.535697391796832e-05,
"loss": 0.0425,
"step": 1120
},
{
"epoch": 18.24,
"grad_norm": 0.419209748506546,
"learning_rate": 4.510253801242147e-05,
"loss": 0.0273,
"step": 1140
},
{
"epoch": 18.56,
"grad_norm": 1.3193784952163696,
"learning_rate": 4.4842069505112984e-05,
"loss": 0.0438,
"step": 1160
},
{
"epoch": 18.88,
"grad_norm": 1.5185387134552002,
"learning_rate": 4.457564656044056e-05,
"loss": 0.0544,
"step": 1180
},
{
"epoch": 19.2,
"grad_norm": 0.4024270474910736,
"learning_rate": 4.430334912967824e-05,
"loss": 0.0283,
"step": 1200
},
{
"epoch": 19.52,
"grad_norm": 0.16141988337039948,
"learning_rate": 4.402525892698367e-05,
"loss": 0.0393,
"step": 1220
},
{
"epoch": 19.84,
"grad_norm": 0.07228437811136246,
"learning_rate": 4.374145940487641e-05,
"loss": 0.0249,
"step": 1240
},
{
"epoch": 20.16,
"grad_norm": 0.7919737696647644,
"learning_rate": 4.345203572919454e-05,
"loss": 0.0293,
"step": 1260
},
{
"epoch": 20.48,
"grad_norm": 0.26585039496421814,
"learning_rate": 4.315707475353706e-05,
"loss": 0.0287,
"step": 1280
},
{
"epoch": 20.8,
"grad_norm": 0.5761149525642395,
"learning_rate": 4.285666499319992e-05,
"loss": 0.0521,
"step": 1300
},
{
"epoch": 21.12,
"grad_norm": 0.018601374700665474,
"learning_rate": 4.25508965986133e-05,
"loss": 0.0285,
"step": 1320
},
{
"epoch": 21.44,
"grad_norm": 0.00528874434530735,
"learning_rate": 4.2239861328288214e-05,
"loss": 0.0346,
"step": 1340
},
{
"epoch": 21.76,
"grad_norm": 0.3073647618293762,
"learning_rate": 4.1923652521280585e-05,
"loss": 0.022,
"step": 1360
},
{
"epoch": 22.08,
"grad_norm": 0.42911043763160706,
"learning_rate": 4.160236506918098e-05,
"loss": 0.0482,
"step": 1380
},
{
"epoch": 22.4,
"grad_norm": 0.6457176804542542,
"learning_rate": 4.127609538763842e-05,
"loss": 0.019,
"step": 1400
},
{
"epoch": 22.72,
"grad_norm": 2.3716557025909424,
"learning_rate": 4.094494138742685e-05,
"loss": 0.0312,
"step": 1420
},
{
"epoch": 23.04,
"grad_norm": 0.01667410507798195,
"learning_rate": 4.0609002445063036e-05,
"loss": 0.0377,
"step": 1440
},
{
"epoch": 23.36,
"grad_norm": 0.6381007432937622,
"learning_rate": 4.02683793729844e-05,
"loss": 0.0307,
"step": 1460
},
{
"epoch": 23.68,
"grad_norm": 0.42919328808784485,
"learning_rate": 3.9923174389296085e-05,
"loss": 0.0419,
"step": 1480
},
{
"epoch": 24.0,
"grad_norm": 0.01456019002944231,
"learning_rate": 3.957349108709623e-05,
"loss": 0.0223,
"step": 1500
},
{
"epoch": 24.32,
"grad_norm": 0.31073492765426636,
"learning_rate": 3.921943440338849e-05,
"loss": 0.0209,
"step": 1520
},
{
"epoch": 24.64,
"grad_norm": 0.38279736042022705,
"learning_rate": 3.886111058759132e-05,
"loss": 0.0491,
"step": 1540
},
{
"epoch": 24.96,
"grad_norm": 0.30651962757110596,
"learning_rate": 3.849862716965352e-05,
"loss": 0.0298,
"step": 1560
},
{
"epoch": 25.28,
"grad_norm": 0.4538489580154419,
"learning_rate": 3.813209292778527e-05,
"loss": 0.0319,
"step": 1580
},
{
"epoch": 25.6,
"grad_norm": 0.11643072962760925,
"learning_rate": 3.776161785581481e-05,
"loss": 0.0302,
"step": 1600
},
{
"epoch": 25.92,
"grad_norm": 0.008515519089996815,
"learning_rate": 3.738731313018019e-05,
"loss": 0.04,
"step": 1620
},
{
"epoch": 26.24,
"grad_norm": 0.002214708598330617,
"learning_rate": 3.700929107656614e-05,
"loss": 0.0354,
"step": 1640
},
{
"epoch": 26.56,
"grad_norm": 0.02200801856815815,
"learning_rate": 3.662766513619611e-05,
"loss": 0.0186,
"step": 1660
},
{
"epoch": 26.88,
"grad_norm": 0.1882447600364685,
"learning_rate": 3.62425498317895e-05,
"loss": 0.022,
"step": 1680
},
{
"epoch": 27.2,
"grad_norm": 0.004948125686496496,
"learning_rate": 3.585406073319439e-05,
"loss": 0.015,
"step": 1700
},
{
"epoch": 27.52,
"grad_norm": 0.3387264013290405,
"learning_rate": 3.546231442270596e-05,
"loss": 0.0381,
"step": 1720
},
{
"epoch": 27.84,
"grad_norm": 0.09048642963171005,
"learning_rate": 3.506742846008116e-05,
"loss": 0.0277,
"step": 1740
},
{
"epoch": 28.16,
"grad_norm": 0.6405784487724304,
"learning_rate": 3.4669521347259996e-05,
"loss": 0.0423,
"step": 1760
},
{
"epoch": 28.48,
"grad_norm": 0.16012047231197357,
"learning_rate": 3.426871249280414e-05,
"loss": 0.0115,
"step": 1780
},
{
"epoch": 28.8,
"grad_norm": 0.3279825448989868,
"learning_rate": 3.386512217606339e-05,
"loss": 0.0275,
"step": 1800
},
{
"epoch": 29.12,
"grad_norm": 0.005494344513863325,
"learning_rate": 3.345887151108087e-05,
"loss": 0.0309,
"step": 1820
},
{
"epoch": 29.44,
"grad_norm": 0.0037028896622359753,
"learning_rate": 3.305008241024774e-05,
"loss": 0.0294,
"step": 1840
},
{
"epoch": 29.76,
"grad_norm": 0.003084386931732297,
"learning_rate": 3.2638877547718264e-05,
"loss": 0.0213,
"step": 1860
},
{
"epoch": 30.08,
"grad_norm": 0.0017954249633476138,
"learning_rate": 3.222538032259643e-05,
"loss": 0.0326,
"step": 1880
},
{
"epoch": 30.4,
"grad_norm": 0.26840922236442566,
"learning_rate": 3.1809714821904834e-05,
"loss": 0.0249,
"step": 1900
},
{
"epoch": 30.72,
"grad_norm": 0.7214370965957642,
"learning_rate": 3.1392005783347244e-05,
"loss": 0.0115,
"step": 1920
},
{
"epoch": 31.04,
"grad_norm": 0.1613769233226776,
"learning_rate": 3.0972378557875884e-05,
"loss": 0.0322,
"step": 1940
},
{
"epoch": 31.36,
"grad_norm": 0.18066717684268951,
"learning_rate": 3.055095907207465e-05,
"loss": 0.0316,
"step": 1960
},
{
"epoch": 31.68,
"grad_norm": 0.24756371974945068,
"learning_rate": 3.0127873790369627e-05,
"loss": 0.0248,
"step": 1980
},
{
"epoch": 32.0,
"grad_norm": 0.08604203909635544,
"learning_rate": 2.9703249677078156e-05,
"loss": 0.0234,
"step": 2000
},
{
"epoch": 32.32,
"grad_norm": 0.0022385423071682453,
"learning_rate": 2.9277214158307937e-05,
"loss": 0.0277,
"step": 2020
},
{
"epoch": 32.64,
"grad_norm": 0.0020592950750142336,
"learning_rate": 2.8849895083717537e-05,
"loss": 0.0162,
"step": 2040
},
{
"epoch": 32.96,
"grad_norm": 0.20633552968502045,
"learning_rate": 2.842142068814977e-05,
"loss": 0.022,
"step": 2060
},
{
"epoch": 33.28,
"grad_norm": 0.0019172705942764878,
"learning_rate": 2.7991919553149497e-05,
"loss": 0.0278,
"step": 2080
},
{
"epoch": 33.6,
"grad_norm": 0.0013098755152896047,
"learning_rate": 2.756152056837743e-05,
"loss": 0.0189,
"step": 2100
},
{
"epoch": 33.92,
"grad_norm": 0.09349821507930756,
"learning_rate": 2.7130352892931388e-05,
"loss": 0.0228,
"step": 2120
},
{
"epoch": 34.24,
"grad_norm": 0.0017231553792953491,
"learning_rate": 2.669854591658679e-05,
"loss": 0.0319,
"step": 2140
},
{
"epoch": 34.56,
"grad_norm": 0.047173839062452316,
"learning_rate": 2.6266229220967818e-05,
"loss": 0.0153,
"step": 2160
},
{
"epoch": 34.88,
"grad_norm": 0.2877206802368164,
"learning_rate": 2.5833532540661127e-05,
"loss": 0.0267,
"step": 2180
},
{
"epoch": 35.2,
"grad_norm": 0.25823402404785156,
"learning_rate": 2.540058572428356e-05,
"loss": 0.0178,
"step": 2200
},
{
"epoch": 35.52,
"grad_norm": 0.23003694415092468,
"learning_rate": 2.496751869551567e-05,
"loss": 0.0217,
"step": 2220
},
{
"epoch": 35.84,
"grad_norm": 0.23193888366222382,
"learning_rate": 2.453446141411273e-05,
"loss": 0.017,
"step": 2240
},
{
"epoch": 36.16,
"grad_norm": 0.1941184252500534,
"learning_rate": 2.4101543836904938e-05,
"loss": 0.0257,
"step": 2260
},
{
"epoch": 36.48,
"grad_norm": 0.012731954455375671,
"learning_rate": 2.3668895878798424e-05,
"loss": 0.0237,
"step": 2280
},
{
"epoch": 36.8,
"grad_norm": 0.18219026923179626,
"learning_rate": 2.32366473737889e-05,
"loss": 0.024,
"step": 2300
},
{
"epoch": 37.12,
"grad_norm": 0.256547212600708,
"learning_rate": 2.2804928035999594e-05,
"loss": 0.0225,
"step": 2320
},
{
"epoch": 37.44,
"grad_norm": 0.45314905047416687,
"learning_rate": 2.23738674207551e-05,
"loss": 0.0239,
"step": 2340
},
{
"epoch": 37.76,
"grad_norm": 0.3919714689254761,
"learning_rate": 2.1943594885702984e-05,
"loss": 0.0235,
"step": 2360
},
{
"epoch": 38.08,
"grad_norm": 0.0769328773021698,
"learning_rate": 2.151423955199456e-05,
"loss": 0.0286,
"step": 2380
},
{
"epoch": 38.4,
"grad_norm": 0.3520802855491638,
"learning_rate": 2.108593026553681e-05,
"loss": 0.0323,
"step": 2400
},
{
"epoch": 38.72,
"grad_norm": 0.3691672384738922,
"learning_rate": 2.0658795558326743e-05,
"loss": 0.0241,
"step": 2420
},
{
"epoch": 39.04,
"grad_norm": 0.001480752951465547,
"learning_rate": 2.0232963609880093e-05,
"loss": 0.0158,
"step": 2440
},
{
"epoch": 39.36,
"grad_norm": 0.31921085715293884,
"learning_rate": 1.9808562208765667e-05,
"loss": 0.0241,
"step": 2460
},
{
"epoch": 39.68,
"grad_norm": 0.20936931669712067,
"learning_rate": 1.938571871425715e-05,
"loss": 0.0174,
"step": 2480
},
{
"epoch": 40.0,
"grad_norm": 0.0011563162552192807,
"learning_rate": 1.896456001811357e-05,
"loss": 0.0183,
"step": 2500
},
{
"epoch": 40.32,
"grad_norm": 0.19230084121227264,
"learning_rate": 1.854521250650026e-05,
"loss": 0.012,
"step": 2520
},
{
"epoch": 40.64,
"grad_norm": 0.32013317942619324,
"learning_rate": 1.8127802022061334e-05,
"loss": 0.0225,
"step": 2540
},
{
"epoch": 40.96,
"grad_norm": 0.11989307403564453,
"learning_rate": 1.7712453826155457e-05,
"loss": 0.0391,
"step": 2560
},
{
"epoch": 41.28,
"grad_norm": 0.0009496643324382603,
"learning_rate": 1.72992925612659e-05,
"loss": 0.0229,
"step": 2580
},
{
"epoch": 41.6,
"grad_norm": 0.0012078011641278863,
"learning_rate": 1.688844221359645e-05,
"loss": 0.015,
"step": 2600
},
{
"epoch": 41.92,
"grad_norm": 0.0012093032710254192,
"learning_rate": 1.6480026075864163e-05,
"loss": 0.0287,
"step": 2620
},
{
"epoch": 42.24,
"grad_norm": 0.2027181088924408,
"learning_rate": 1.6074166710300247e-05,
"loss": 0.0229,
"step": 2640
},
{
"epoch": 42.56,
"grad_norm": 0.2977555990219116,
"learning_rate": 1.567098591187021e-05,
"loss": 0.0352,
"step": 2660
},
{
"epoch": 42.88,
"grad_norm": 0.36129167675971985,
"learning_rate": 1.5270604671724188e-05,
"loss": 0.0242,
"step": 2680
},
{
"epoch": 43.2,
"grad_norm": 0.001115540275350213,
"learning_rate": 1.4873143140888538e-05,
"loss": 0.0165,
"step": 2700
},
{
"epoch": 43.52,
"grad_norm": 0.19148553907871246,
"learning_rate": 1.4478720594209532e-05,
"loss": 0.0274,
"step": 2720
},
{
"epoch": 43.84,
"grad_norm": 0.057757727801799774,
"learning_rate": 1.4087455394559984e-05,
"loss": 0.0185,
"step": 2740
},
{
"epoch": 44.16,
"grad_norm": 0.0009874219540506601,
"learning_rate": 1.369946495731954e-05,
"loss": 0.0509,
"step": 2760
},
{
"epoch": 44.48,
"grad_norm": 0.3896861672401428,
"learning_rate": 1.3314865715139346e-05,
"loss": 0.027,
"step": 2780
},
{
"epoch": 44.8,
"grad_norm": 0.19004037976264954,
"learning_rate": 1.2933773083001517e-05,
"loss": 0.0163,
"step": 2800
},
{
"epoch": 45.12,
"grad_norm": 0.0009183284710161388,
"learning_rate": 1.255630142358421e-05,
"loss": 0.0125,
"step": 2820
},
{
"epoch": 45.44,
"grad_norm": 0.1238480657339096,
"learning_rate": 1.2182564012942193e-05,
"loss": 0.0327,
"step": 2840
},
{
"epoch": 45.76,
"grad_norm": 0.0009572324343025684,
"learning_rate": 1.1812673006513789e-05,
"loss": 0.0302,
"step": 2860
},
{
"epoch": 46.08,
"grad_norm": 0.0011610776418820024,
"learning_rate": 1.14467394054639e-05,
"loss": 0.0209,
"step": 2880
},
{
"epoch": 46.4,
"grad_norm": 0.04993343725800514,
"learning_rate": 1.108487302337353e-05,
"loss": 0.025,
"step": 2900
},
{
"epoch": 46.72,
"grad_norm": 0.1806841343641281,
"learning_rate": 1.0727182453285647e-05,
"loss": 0.0284,
"step": 2920
},
{
"epoch": 47.04,
"grad_norm": 0.0011777572799474,
"learning_rate": 1.0373775035117305e-05,
"loss": 0.0174,
"step": 2940
},
{
"epoch": 47.36,
"grad_norm": 0.14497865736484528,
"learning_rate": 1.002475682344792e-05,
"loss": 0.0115,
"step": 2960
},
{
"epoch": 47.68,
"grad_norm": 0.0014984839363023639,
"learning_rate": 9.680232555693067e-06,
"loss": 0.0238,
"step": 2980
},
{
"epoch": 48.0,
"grad_norm": 0.07430601865053177,
"learning_rate": 9.340305620673778e-06,
"loss": 0.0294,
"step": 3000
},
{
"epoch": 48.32,
"grad_norm": 0.07801785320043564,
"learning_rate": 9.005078027590375e-06,
"loss": 0.0226,
"step": 3020
},
{
"epoch": 48.64,
"grad_norm": 0.0007196432561613619,
"learning_rate": 8.67465037541038e-06,
"loss": 0.0196,
"step": 3040
},
{
"epoch": 48.96,
"grad_norm": 0.0008374506141990423,
"learning_rate": 8.34912182267959e-06,
"loss": 0.0175,
"step": 3060
},
{
"epoch": 49.28,
"grad_norm": 0.0010465418454259634,
"learning_rate": 8.028590057765523e-06,
"loss": 0.015,
"step": 3080
},
{
"epoch": 49.6,
"grad_norm": 0.0007761380402371287,
"learning_rate": 7.713151269541844e-06,
"loss": 0.0221,
"step": 3100
},
{
"epoch": 49.92,
"grad_norm": 0.0216947291046381,
"learning_rate": 7.402900118522979e-06,
"loss": 0.0161,
"step": 3120
},
{
"epoch": 50.24,
"grad_norm": 0.26546710729599,
"learning_rate": 7.097929708457282e-06,
"loss": 0.0237,
"step": 3140
},
{
"epoch": 50.56,
"grad_norm": 0.0011781662469729781,
"learning_rate": 6.7983315583873695e-06,
"loss": 0.0172,
"step": 3160
},
{
"epoch": 50.88,
"grad_norm": 0.39518535137176514,
"learning_rate": 6.504195575186009e-06,
"loss": 0.0198,
"step": 3180
},
{
"epoch": 51.2,
"grad_norm": 0.3506232500076294,
"learning_rate": 6.215610026575916e-06,
"loss": 0.0227,
"step": 3200
},
{
"epoch": 51.52,
"grad_norm": 0.31244903802871704,
"learning_rate": 5.93266151464123e-06,
"loss": 0.0156,
"step": 3220
},
{
"epoch": 51.84,
"grad_norm": 0.17840787768363953,
"learning_rate": 5.655434949839061e-06,
"loss": 0.0268,
"step": 3240
},
{
"epoch": 52.16,
"grad_norm": 0.1670505702495575,
"learning_rate": 5.384013525518541e-06,
"loss": 0.0209,
"step": 3260
},
{
"epoch": 52.48,
"grad_norm": 0.0010594127234071493,
"learning_rate": 5.118478692955194e-06,
"loss": 0.0202,
"step": 3280
},
{
"epoch": 52.8,
"grad_norm": 0.0015649694250896573,
"learning_rate": 4.858910136908123e-06,
"loss": 0.0192,
"step": 3300
},
{
"epoch": 53.12,
"grad_norm": 0.19762022793293,
"learning_rate": 4.605385751707248e-06,
"loss": 0.0205,
"step": 3320
},
{
"epoch": 53.44,
"grad_norm": 0.2010522186756134,
"learning_rate": 4.357981617877932e-06,
"loss": 0.0129,
"step": 3340
},
{
"epoch": 53.76,
"grad_norm": 0.19793441891670227,
"learning_rate": 4.116771979309797e-06,
"loss": 0.0258,
"step": 3360
},
{
"epoch": 54.08,
"grad_norm": 0.2605569064617157,
"learning_rate": 3.881829220976807e-06,
"loss": 0.0306,
"step": 3380
},
{
"epoch": 54.4,
"grad_norm": 0.037421807646751404,
"learning_rate": 3.653223847215126e-06,
"loss": 0.0198,
"step": 3400
},
{
"epoch": 54.72,
"grad_norm": 0.0007586870342493057,
"learning_rate": 3.4310244605653797e-06,
"loss": 0.0257,
"step": 3420
},
{
"epoch": 55.04,
"grad_norm": 0.27584579586982727,
"learning_rate": 3.215297741185572e-06,
"loss": 0.0125,
"step": 3440
},
{
"epoch": 55.36,
"grad_norm": 0.0007228174363262951,
"learning_rate": 3.0061084268410006e-06,
"loss": 0.0124,
"step": 3460
},
{
"epoch": 55.68,
"grad_norm": 0.04090801998972893,
"learning_rate": 2.8035192934769362e-06,
"loss": 0.023,
"step": 3480
},
{
"epoch": 56.0,
"grad_norm": 0.3518761694431305,
"learning_rate": 2.607591136380122e-06,
"loss": 0.0194,
"step": 3500
},
{
"epoch": 56.32,
"grad_norm": 0.06331823766231537,
"learning_rate": 2.4183827519346308e-06,
"loss": 0.0162,
"step": 3520
},
{
"epoch": 56.64,
"grad_norm": 0.22303640842437744,
"learning_rate": 2.235950919977545e-06,
"loss": 0.0337,
"step": 3540
},
{
"epoch": 56.96,
"grad_norm": 0.08465743064880371,
"learning_rate": 2.0603503867598182e-06,
"loss": 0.0139,
"step": 3560
},
{
"epoch": 57.28,
"grad_norm": 0.20135080814361572,
"learning_rate": 1.8916338485173823e-06,
"loss": 0.0193,
"step": 3580
},
{
"epoch": 57.6,
"grad_norm": 0.0006721566896885633,
"learning_rate": 1.7298519356574727e-06,
"loss": 0.0203,
"step": 3600
},
{
"epoch": 57.92,
"grad_norm": 0.10799671709537506,
"learning_rate": 1.5750531975648324e-06,
"loss": 0.0212,
"step": 3620
},
{
"epoch": 58.24,
"grad_norm": 0.0010109569411724806,
"learning_rate": 1.4272840880324934e-06,
"loss": 0.0173,
"step": 3640
},
{
"epoch": 58.56,
"grad_norm": 0.0008448906592093408,
"learning_rate": 1.286588951321363e-06,
"loss": 0.0139,
"step": 3660
},
{
"epoch": 58.88,
"grad_norm": 0.0010856656590476632,
"learning_rate": 1.1530100088528867e-06,
"loss": 0.0268,
"step": 3680
},
{
"epoch": 59.2,
"grad_norm": 0.23958024382591248,
"learning_rate": 1.0265873465387516e-06,
"loss": 0.0191,
"step": 3700
},
{
"epoch": 59.52,
"grad_norm": 0.20584586262702942,
"learning_rate": 9.073589027514789e-07,
"loss": 0.0168,
"step": 3720
},
{
"epoch": 59.84,
"grad_norm": 0.031580936163663864,
"learning_rate": 7.953604569393841e-07,
"loss": 0.0246,
"step": 3740
},
{
"epoch": 60.16,
"grad_norm": 0.14215555787086487,
"learning_rate": 6.906256188895038e-07,
"loss": 0.019,
"step": 3760
},
{
"epoch": 60.48,
"grad_norm": 0.0012006442993879318,
"learning_rate": 5.931858186415756e-07,
"loss": 0.0168,
"step": 3780
},
{
"epoch": 60.8,
"grad_norm": 0.0063135698437690735,
"learning_rate": 5.03070297056149e-07,
"loss": 0.0197,
"step": 3800
},
{
"epoch": 61.12,
"grad_norm": 0.07496818155050278,
"learning_rate": 4.203060970396383e-07,
"loss": 0.0207,
"step": 3820
},
{
"epoch": 61.44,
"grad_norm": 0.16551247239112854,
"learning_rate": 3.4491805542899157e-07,
"loss": 0.0224,
"step": 3840
},
{
"epoch": 61.76,
"grad_norm": 0.0008456969517283142,
"learning_rate": 2.769287955383532e-07,
"loss": 0.0151,
"step": 3860
},
{
"epoch": 62.08,
"grad_norm": 0.0008134017698466778,
"learning_rate": 2.1635872037001626e-07,
"loss": 0.0284,
"step": 3880
},
{
"epoch": 62.4,
"grad_norm": 0.18878595530986786,
"learning_rate": 1.6322600649162356e-07,
"loss": 0.0217,
"step": 3900
},
{
"epoch": 62.72,
"grad_norm": 0.0008310906123369932,
"learning_rate": 1.1754659858156659e-07,
"loss": 0.0103,
"step": 3920
},
{
"epoch": 63.04,
"grad_norm": 0.38621172308921814,
"learning_rate": 7.933420464410201e-08,
"loss": 0.0333,
"step": 3940
},
{
"epoch": 63.36,
"grad_norm": 0.016794312745332718,
"learning_rate": 4.860029189569237e-08,
"loss": 0.0231,
"step": 3960
},
{
"epoch": 63.68,
"grad_norm": 0.16253815591335297,
"learning_rate": 2.535408332381417e-08,
"loss": 0.0226,
"step": 3980
},
{
"epoch": 64.0,
"grad_norm": 0.2387680560350418,
"learning_rate": 9.60255491919415e-09,
"loss": 0.0218,
"step": 4000
},
{
"epoch": 64.32,
"grad_norm": 0.16293394565582275,
"learning_rate": 1.3504335823810722e-09,
"loss": 0.0219,
"step": 4020
},
{
"epoch": 64.48,
"step": 4030,
"total_flos": 2.3325606118844006e+17,
"train_loss": 0.1495482857003993,
"train_runtime": 6882.5617,
"train_samples_per_second": 4.722,
"train_steps_per_second": 0.586
}
],
"logging_steps": 20,
"max_steps": 4030,
"num_input_tokens_seen": 0,
"num_train_epochs": 65,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.3325606118844006e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}