Training in progress, step 372, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +871 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58100fe655c8ed92d4518fef45e29df4d60c7e291f66f716456f73e4ea77f392
 size 144805440

 version https://git-lfs.github.com/spec/v1
+oid sha256:4db2aaaf0e71f4163eb1948613e8b578254a0de5a98794daab0fd666ff0e9335
 size 144805440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab152d90f6d47ebd8a356ceecca0993fb077dcff867c7c58ca00456e2cfcd04b
-size 74291604

 version https://git-lfs.github.com/spec/v1
+oid sha256:97e66fb0be2050837da15f447248f80a1c314ca1bc2a805d9029af38817476ae
+size 74292308

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f660891b19594633ced246d59eedd400fe2556d319f4e5ca333df7fb57888180
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:956eaecea1513e07a98b3b792863d7c1c440c5da69923f2e87c397dfc0da01f4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81771ff96e80b84ed048126e169640f8617ceb476fe2f91b8561190057e53b0d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6484c651d3d8bf75888a89e2001dae4da70b271bc6cafa91a994f199bf79e17
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5002521432173475,
   "eval_steps": 500,
-  "global_step": 248,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1743,6 +1743,874 @@
       "learning_rate": 5.079976035714976e-05,
       "loss": 0.7198,
       "step": 248
     }
   ],
   "logging_steps": 1,
@@ -1762,7 +2630,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.194181341983539e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7503782148260212,
   "eval_steps": 500,
+  "global_step": 372,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.079976035714976e-05,
       "loss": 0.7198,
       "step": 248
+    },
+    {
+      "epoch": 0.5022692889561271,
+      "grad_norm": 0.6312721371650696,
+      "learning_rate": 5.047986931116205e-05,
+      "loss": 0.7787,
+      "step": 249
+    },
+    {
+      "epoch": 0.5042864346949067,
+      "grad_norm": 0.6478500962257385,
+      "learning_rate": 5.015995861989287e-05,
+      "loss": 0.6478,
+      "step": 250
+    },
+    {
+      "epoch": 0.5063035804336863,
+      "grad_norm": 0.370857834815979,
+      "learning_rate": 4.984004138010715e-05,
+      "loss": 0.4671,
+      "step": 251
+    },
+    {
+      "epoch": 0.5083207261724659,
+      "grad_norm": 0.432366281747818,
+      "learning_rate": 4.952013068883795e-05,
+      "loss": 0.6026,
+      "step": 252
+    },
+    {
+      "epoch": 0.5103378719112456,
+      "grad_norm": 0.4328523576259613,
+      "learning_rate": 4.920023964285025e-05,
+      "loss": 0.6082,
+      "step": 253
+    },
+    {
+      "epoch": 0.5123550176500252,
+      "grad_norm": 0.40599900484085083,
+      "learning_rate": 4.8880381338104777e-05,
+      "loss": 0.4708,
+      "step": 254
+    },
+    {
+      "epoch": 0.5143721633888049,
+      "grad_norm": 0.43550002574920654,
+      "learning_rate": 4.8560568869221805e-05,
+      "loss": 0.5816,
+      "step": 255
+    },
+    {
+      "epoch": 0.5163893091275845,
+      "grad_norm": 0.40016648173332214,
+      "learning_rate": 4.8240815328945226e-05,
+      "loss": 0.5691,
+      "step": 256
+    },
+    {
+      "epoch": 0.518406454866364,
+      "grad_norm": 0.437029629945755,
+      "learning_rate": 4.7921133807606424e-05,
+      "loss": 0.5555,
+      "step": 257
+    },
+    {
+      "epoch": 0.5204236006051437,
+      "grad_norm": 0.39981645345687866,
+      "learning_rate": 4.760153739258843e-05,
+      "loss": 0.5786,
+      "step": 258
+    },
+    {
+      "epoch": 0.5224407463439233,
+      "grad_norm": 0.4225994646549225,
+      "learning_rate": 4.728203916779009e-05,
+      "loss": 0.5744,
+      "step": 259
+    },
+    {
+      "epoch": 0.524457892082703,
+      "grad_norm": 0.4146437346935272,
+      "learning_rate": 4.69626522130905e-05,
+      "loss": 0.5433,
+      "step": 260
+    },
+    {
+      "epoch": 0.5264750378214826,
+      "grad_norm": 0.41293078660964966,
+      "learning_rate": 4.6643389603813486e-05,
+      "loss": 0.5461,
+      "step": 261
+    },
+    {
+      "epoch": 0.5284921835602622,
+      "grad_norm": 0.4155355989933014,
+      "learning_rate": 4.632426441019227e-05,
+      "loss": 0.5554,
+      "step": 262
+    },
+    {
+      "epoch": 0.5305093292990418,
+      "grad_norm": 0.4077325761318207,
+      "learning_rate": 4.600528969683448e-05,
+      "loss": 0.4872,
+      "step": 263
+    },
+    {
+      "epoch": 0.5325264750378215,
+      "grad_norm": 0.4461796283721924,
+      "learning_rate": 4.568647852218725e-05,
+      "loss": 0.5266,
+      "step": 264
+    },
+    {
+      "epoch": 0.5345436207766011,
+      "grad_norm": 0.40012630820274353,
+      "learning_rate": 4.5367843938002694e-05,
+      "loss": 0.5459,
+      "step": 265
+    },
+    {
+      "epoch": 0.5365607665153808,
+      "grad_norm": 0.45490312576293945,
+      "learning_rate": 4.504939898880339e-05,
+      "loss": 0.6255,
+      "step": 266
+    },
+    {
+      "epoch": 0.5385779122541604,
+      "grad_norm": 0.41126129031181335,
+      "learning_rate": 4.473115671134859e-05,
+      "loss": 0.5786,
+      "step": 267
+    },
+    {
+      "epoch": 0.5405950579929399,
+      "grad_norm": 0.3975431025028229,
+      "learning_rate": 4.441313013410039e-05,
+      "loss": 0.5707,
+      "step": 268
+    },
+    {
+      "epoch": 0.5426122037317196,
+      "grad_norm": 0.40477657318115234,
+      "learning_rate": 4.409533227669033e-05,
+      "loss": 0.5079,
+      "step": 269
+    },
+    {
+      "epoch": 0.5446293494704992,
+      "grad_norm": 0.4053346514701843,
+      "learning_rate": 4.377777614938647e-05,
+      "loss": 0.6502,
+      "step": 270
+    },
+    {
+      "epoch": 0.5466464952092789,
+      "grad_norm": 0.4417206346988678,
+      "learning_rate": 4.3460474752560724e-05,
+      "loss": 0.6215,
+      "step": 271
+    },
+    {
+      "epoch": 0.5486636409480585,
+      "grad_norm": 0.4087637960910797,
+      "learning_rate": 4.314344107615665e-05,
+      "loss": 0.5612,
+      "step": 272
+    },
+    {
+      "epoch": 0.5506807866868382,
+      "grad_norm": 0.3786230683326721,
+      "learning_rate": 4.282668809915758e-05,
+      "loss": 0.5299,
+      "step": 273
+    },
+    {
+      "epoch": 0.5526979324256177,
+      "grad_norm": 0.3989951014518738,
+      "learning_rate": 4.251022878905543e-05,
+      "loss": 0.5832,
+      "step": 274
+    },
+    {
+      "epoch": 0.5547150781643974,
+      "grad_norm": 0.45247653126716614,
+      "learning_rate": 4.219407610131971e-05,
+      "loss": 0.5932,
+      "step": 275
+    },
+    {
+      "epoch": 0.556732223903177,
+      "grad_norm": 0.40874481201171875,
+      "learning_rate": 4.187824297886715e-05,
+      "loss": 0.6128,
+      "step": 276
+    },
+    {
+      "epoch": 0.5587493696419567,
+      "grad_norm": 0.42016932368278503,
+      "learning_rate": 4.156274235153189e-05,
+      "loss": 0.536,
+      "step": 277
+    },
+    {
+      "epoch": 0.5607665153807363,
+      "grad_norm": 0.4106806814670563,
+      "learning_rate": 4.1247587135536126e-05,
+      "loss": 0.587,
+      "step": 278
+    },
+    {
+      "epoch": 0.5627836611195158,
+      "grad_norm": 0.4463154077529907,
+      "learning_rate": 4.0932790232961235e-05,
+      "loss": 0.5121,
+      "step": 279
+    },
+    {
+      "epoch": 0.5648008068582955,
+      "grad_norm": 0.45139235258102417,
+      "learning_rate": 4.0618364531219775e-05,
+      "loss": 0.5219,
+      "step": 280
+    },
+    {
+      "epoch": 0.5668179525970751,
+      "grad_norm": 0.45914503931999207,
+      "learning_rate": 4.030432290252771e-05,
+      "loss": 0.6114,
+      "step": 281
+    },
+    {
+      "epoch": 0.5688350983358548,
+      "grad_norm": 0.43565309047698975,
+      "learning_rate": 3.999067820337757e-05,
+      "loss": 0.577,
+      "step": 282
+    },
+    {
+      "epoch": 0.5708522440746344,
+      "grad_norm": 0.4499742090702057,
+      "learning_rate": 3.967744327401197e-05,
+      "loss": 0.537,
+      "step": 283
+    },
+    {
+      "epoch": 0.5728693898134141,
+      "grad_norm": 0.44511550664901733,
+      "learning_rate": 3.936463093789813e-05,
+      "loss": 0.6069,
+      "step": 284
+    },
+    {
+      "epoch": 0.5748865355521936,
+      "grad_norm": 0.4442926347255707,
+      "learning_rate": 3.9052254001202746e-05,
+      "loss": 0.601,
+      "step": 285
+    },
+    {
+      "epoch": 0.5769036812909732,
+      "grad_norm": 0.4780077636241913,
+      "learning_rate": 3.8740325252267785e-05,
+      "loss": 0.5617,
+      "step": 286
+    },
+    {
+      "epoch": 0.5789208270297529,
+      "grad_norm": 0.4674513638019562,
+      "learning_rate": 3.842885746108693e-05,
+      "loss": 0.6245,
+      "step": 287
+    },
+    {
+      "epoch": 0.5809379727685325,
+      "grad_norm": 0.49114686250686646,
+      "learning_rate": 3.811786337878284e-05,
+      "loss": 0.6625,
+      "step": 288
+    },
+    {
+      "epoch": 0.5829551185073122,
+      "grad_norm": 0.5057556629180908,
+      "learning_rate": 3.780735573708508e-05,
+      "loss": 0.6384,
+      "step": 289
+    },
+    {
+      "epoch": 0.5849722642460918,
+      "grad_norm": 0.4727246165275574,
+      "learning_rate": 3.7497347247808846e-05,
+      "loss": 0.7091,
+      "step": 290
+    },
+    {
+      "epoch": 0.5869894099848714,
+      "grad_norm": 0.5270060300827026,
+      "learning_rate": 3.718785060233471e-05,
+      "loss": 0.673,
+      "step": 291
+    },
+    {
+      "epoch": 0.589006555723651,
+      "grad_norm": 0.5089999437332153,
+      "learning_rate": 3.687887847108894e-05,
+      "loss": 0.6378,
+      "step": 292
+    },
+    {
+      "epoch": 0.5910237014624307,
+      "grad_norm": 0.5260120630264282,
+      "learning_rate": 3.657044350302479e-05,
+      "loss": 0.6278,
+      "step": 293
+    },
+    {
+      "epoch": 0.5930408472012103,
+      "grad_norm": 0.5153591632843018,
+      "learning_rate": 3.6262558325104695e-05,
+      "loss": 0.6463,
+      "step": 294
+    },
+    {
+      "epoch": 0.59505799293999,
+      "grad_norm": 0.5067986845970154,
+      "learning_rate": 3.595523554178336e-05,
+      "loss": 0.6946,
+      "step": 295
+    },
+    {
+      "epoch": 0.5970751386787695,
+      "grad_norm": 0.5289713740348816,
+      "learning_rate": 3.564848773449172e-05,
+      "loss": 0.6358,
+      "step": 296
+    },
+    {
+      "epoch": 0.5990922844175491,
+      "grad_norm": 0.5743799805641174,
+      "learning_rate": 3.5342327461121805e-05,
+      "loss": 0.6664,
+      "step": 297
+    },
+    {
+      "epoch": 0.6011094301563288,
+      "grad_norm": 0.6473139524459839,
+      "learning_rate": 3.503676725551278e-05,
+      "loss": 0.6851,
+      "step": 298
+    },
+    {
+      "epoch": 0.6031265758951084,
+      "grad_norm": 0.6861950755119324,
+      "learning_rate": 3.473181962693773e-05,
+      "loss": 0.6171,
+      "step": 299
+    },
+    {
+      "epoch": 0.6051437216338881,
+      "grad_norm": 0.6407880783081055,
+      "learning_rate": 3.442749705959152e-05,
+      "loss": 0.5958,
+      "step": 300
+    },
+    {
+      "epoch": 0.6071608673726677,
+      "grad_norm": 0.3541804552078247,
+      "learning_rate": 3.412381201207979e-05,
+      "loss": 0.5327,
+      "step": 301
+    },
+    {
+      "epoch": 0.6091780131114473,
+      "grad_norm": 0.3857956528663635,
+      "learning_rate": 3.3820776916908857e-05,
+      "loss": 0.5675,
+      "step": 302
+    },
+    {
+      "epoch": 0.6111951588502269,
+      "grad_norm": 0.41454771161079407,
+      "learning_rate": 3.351840417997679e-05,
+      "loss": 0.5419,
+      "step": 303
+    },
+    {
+      "epoch": 0.6132123045890066,
+      "grad_norm": 0.41526558995246887,
+      "learning_rate": 3.321670618006543e-05,
+      "loss": 0.5429,
+      "step": 304
+    },
+    {
+      "epoch": 0.6152294503277862,
+      "grad_norm": 0.4046734571456909,
+      "learning_rate": 3.291569526833372e-05,
+      "loss": 0.5813,
+      "step": 305
+    },
+    {
+      "epoch": 0.6172465960665658,
+      "grad_norm": 0.43739229440689087,
+      "learning_rate": 3.2615383767812056e-05,
+      "loss": 0.6181,
+      "step": 306
+    },
+    {
+      "epoch": 0.6192637418053455,
+      "grad_norm": 0.42888522148132324,
+      "learning_rate": 3.231578397289772e-05,
+      "loss": 0.492,
+      "step": 307
+    },
+    {
+      "epoch": 0.621280887544125,
+      "grad_norm": 0.4459590017795563,
+      "learning_rate": 3.2016908148851624e-05,
+      "loss": 0.5323,
+      "step": 308
+    },
+    {
+      "epoch": 0.6232980332829047,
+      "grad_norm": 0.440335750579834,
+      "learning_rate": 3.1718768531296196e-05,
+      "loss": 0.5439,
+      "step": 309
+    },
+    {
+      "epoch": 0.6253151790216843,
+      "grad_norm": 0.422626793384552,
+      "learning_rate": 3.142137732571437e-05,
+      "loss": 0.538,
+      "step": 310
+    },
+    {
+      "epoch": 0.627332324760464,
+      "grad_norm": 0.3488753139972687,
+      "learning_rate": 3.112474670695008e-05,
+      "loss": 0.4664,
+      "step": 311
+    },
+    {
+      "epoch": 0.6293494704992436,
+      "grad_norm": 0.4092547297477722,
+      "learning_rate": 3.0828888818709656e-05,
+      "loss": 0.4762,
+      "step": 312
+    },
+    {
+      "epoch": 0.6313666162380231,
+      "grad_norm": 0.3810945749282837,
+      "learning_rate": 3.053381577306481e-05,
+      "loss": 0.4807,
+      "step": 313
+    },
+    {
+      "epoch": 0.6333837619768028,
+      "grad_norm": 0.3901941180229187,
+      "learning_rate": 3.0239539649956665e-05,
+      "loss": 0.505,
+      "step": 314
+    },
+    {
+      "epoch": 0.6354009077155824,
+      "grad_norm": 0.3677188456058502,
+      "learning_rate": 2.9946072496701334e-05,
+      "loss": 0.5334,
+      "step": 315
+    },
+    {
+      "epoch": 0.6374180534543621,
+      "grad_norm": 0.3920693099498749,
+      "learning_rate": 2.9653426327496647e-05,
+      "loss": 0.5632,
+      "step": 316
+    },
+    {
+      "epoch": 0.6394351991931417,
+      "grad_norm": 0.3857486844062805,
+      "learning_rate": 2.9361613122930304e-05,
+      "loss": 0.5151,
+      "step": 317
+    },
+    {
+      "epoch": 0.6414523449319214,
+      "grad_norm": 0.4166855812072754,
+      "learning_rate": 2.9070644829489434e-05,
+      "loss": 0.534,
+      "step": 318
+    },
+    {
+      "epoch": 0.6434694906707009,
+      "grad_norm": 0.40017688274383545,
+      "learning_rate": 2.8780533359071504e-05,
+      "loss": 0.4967,
+      "step": 319
+    },
+    {
+      "epoch": 0.6454866364094806,
+      "grad_norm": 0.45257654786109924,
+      "learning_rate": 2.8491290588496668e-05,
+      "loss": 0.5979,
+      "step": 320
+    },
+    {
+      "epoch": 0.6475037821482602,
+      "grad_norm": 0.4371342360973358,
+      "learning_rate": 2.820292835902148e-05,
+      "loss": 0.5411,
+      "step": 321
+    },
+    {
+      "epoch": 0.6495209278870399,
+      "grad_norm": 0.4240793287754059,
+      "learning_rate": 2.7915458475854283e-05,
+      "loss": 0.5436,
+      "step": 322
+    },
+    {
+      "epoch": 0.6515380736258195,
+      "grad_norm": 0.41990602016448975,
+      "learning_rate": 2.762889270767175e-05,
+      "loss": 0.523,
+      "step": 323
+    },
+    {
+      "epoch": 0.653555219364599,
+      "grad_norm": 0.45127952098846436,
+      "learning_rate": 2.7343242786137168e-05,
+      "loss": 0.5283,
+      "step": 324
+    },
+    {
+      "epoch": 0.6555723651033787,
+      "grad_norm": 0.4676419496536255,
+      "learning_rate": 2.7058520405420123e-05,
+      "loss": 0.5815,
+      "step": 325
+    },
+    {
+      "epoch": 0.6575895108421583,
+      "grad_norm": 0.42602765560150146,
+      "learning_rate": 2.677473722171786e-05,
+      "loss": 0.5368,
+      "step": 326
+    },
+    {
+      "epoch": 0.659606656580938,
+      "grad_norm": 0.43028366565704346,
+      "learning_rate": 2.649190485277792e-05,
+      "loss": 0.5435,
+      "step": 327
+    },
+    {
+      "epoch": 0.6616238023197176,
+      "grad_norm": 0.4180251955986023,
+      "learning_rate": 2.621003487742264e-05,
+      "loss": 0.529,
+      "step": 328
+    },
+    {
+      "epoch": 0.6636409480584973,
+      "grad_norm": 0.4283643364906311,
+      "learning_rate": 2.5929138835075152e-05,
+      "loss": 0.6257,
+      "step": 329
+    },
+    {
+      "epoch": 0.6656580937972768,
+      "grad_norm": 0.4437110424041748,
+      "learning_rate": 2.564922822528686e-05,
+      "loss": 0.5602,
+      "step": 330
+    },
+    {
+      "epoch": 0.6676752395360565,
+      "grad_norm": 0.464622437953949,
+      "learning_rate": 2.5370314507266756e-05,
+      "loss": 0.5834,
+      "step": 331
+    },
+    {
+      "epoch": 0.6696923852748361,
+      "grad_norm": 0.4433290958404541,
+      "learning_rate": 2.5092409099412227e-05,
+      "loss": 0.5993,
+      "step": 332
+    },
+    {
+      "epoch": 0.6717095310136157,
+      "grad_norm": 0.4574563503265381,
+      "learning_rate": 2.4815523378841726e-05,
+      "loss": 0.6211,
+      "step": 333
+    },
+    {
+      "epoch": 0.6737266767523954,
+      "grad_norm": 0.43739160895347595,
+      "learning_rate": 2.4539668680928784e-05,
+      "loss": 0.4989,
+      "step": 334
+    },
+    {
+      "epoch": 0.675743822491175,
+      "grad_norm": 0.4508601129055023,
+      "learning_rate": 2.4264856298838213e-05,
+      "loss": 0.6101,
+      "step": 335
+    },
+    {
+      "epoch": 0.6777609682299546,
+      "grad_norm": 0.4525294005870819,
+      "learning_rate": 2.399109748306355e-05,
+      "loss": 0.5605,
+      "step": 336
+    },
+    {
+      "epoch": 0.6797781139687342,
+      "grad_norm": 0.444223552942276,
+      "learning_rate": 2.371840344096665e-05,
+      "loss": 0.5952,
+      "step": 337
+    },
+    {
+      "epoch": 0.6817952597075139,
+      "grad_norm": 0.4897475242614746,
+      "learning_rate": 2.3446785336318754e-05,
+      "loss": 0.5829,
+      "step": 338
+    },
+    {
+      "epoch": 0.6838124054462935,
+      "grad_norm": 0.48455387353897095,
+      "learning_rate": 2.317625428884348e-05,
+      "loss": 0.6363,
+      "step": 339
+    },
+    {
+      "epoch": 0.6858295511850732,
+      "grad_norm": 0.5362589955329895,
+      "learning_rate": 2.290682137376169e-05,
+      "loss": 0.6597,
+      "step": 340
+    },
+    {
+      "epoch": 0.6878466969238527,
+      "grad_norm": 0.518014669418335,
+      "learning_rate": 2.263849762133788e-05,
+      "loss": 0.5839,
+      "step": 341
+    },
+    {
+      "epoch": 0.6898638426626323,
+      "grad_norm": 0.504257082939148,
+      "learning_rate": 2.237129401642887e-05,
+      "loss": 0.6091,
+      "step": 342
+    },
+    {
+      "epoch": 0.691880988401412,
+      "grad_norm": 0.5075199007987976,
+      "learning_rate": 2.2105221498033862e-05,
+      "loss": 0.5976,
+      "step": 343
+    },
+    {
+      "epoch": 0.6938981341401916,
+      "grad_norm": 0.5348207354545593,
+      "learning_rate": 2.1840290958846816e-05,
+      "loss": 0.6331,
+      "step": 344
+    },
+    {
+      "epoch": 0.6959152798789713,
+      "grad_norm": 0.6212106347084045,
+      "learning_rate": 2.157651324481033e-05,
+      "loss": 0.7058,
+      "step": 345
+    },
+    {
+      "epoch": 0.6979324256177509,
+      "grad_norm": 0.5681262612342834,
+      "learning_rate": 2.131389915467173e-05,
+      "loss": 0.6257,
+      "step": 346
+    },
+    {
+      "epoch": 0.6999495713565305,
+      "grad_norm": 0.5732640624046326,
+      "learning_rate": 2.1052459439541005e-05,
+      "loss": 0.6812,
+      "step": 347
+    },
+    {
+      "epoch": 0.7019667170953101,
+      "grad_norm": 0.5906322002410889,
+      "learning_rate": 2.0792204802450515e-05,
+      "loss": 0.6659,
+      "step": 348
+    },
+    {
+      "epoch": 0.7039838628340898,
+      "grad_norm": 0.6746000051498413,
+      "learning_rate": 2.0533145897917057e-05,
+      "loss": 0.6781,
+      "step": 349
+    },
+    {
+      "epoch": 0.7060010085728694,
+      "grad_norm": 0.7686178684234619,
+      "learning_rate": 2.0275293331505436e-05,
+      "loss": 0.7789,
+      "step": 350
+    },
+    {
+      "epoch": 0.708018154311649,
+      "grad_norm": 0.3885047733783722,
+      "learning_rate": 2.0018657659394496e-05,
+      "loss": 0.5026,
+      "step": 351
+    },
+    {
+      "epoch": 0.7100353000504287,
+      "grad_norm": 0.4007488489151001,
+      "learning_rate": 1.976324938794482e-05,
+      "loss": 0.5077,
+      "step": 352
+    },
+    {
+      "epoch": 0.7120524457892082,
+      "grad_norm": 0.4343924820423126,
+      "learning_rate": 1.9509078973268645e-05,
+      "loss": 0.5361,
+      "step": 353
+    },
+    {
+      "epoch": 0.7140695915279879,
+      "grad_norm": 0.42959120869636536,
+      "learning_rate": 1.9256156820801895e-05,
+      "loss": 0.551,
+      "step": 354
+    },
+    {
+      "epoch": 0.7160867372667675,
+      "grad_norm": 0.4766654372215271,
+      "learning_rate": 1.9004493284877995e-05,
+      "loss": 0.5484,
+      "step": 355
+    },
+    {
+      "epoch": 0.7181038830055472,
+      "grad_norm": 0.41945040225982666,
+      "learning_rate": 1.875409866830422e-05,
+      "loss": 0.5211,
+      "step": 356
+    },
+    {
+      "epoch": 0.7201210287443268,
+      "grad_norm": 0.4797287881374359,
+      "learning_rate": 1.850498322193972e-05,
+      "loss": 0.531,
+      "step": 357
+    },
+    {
+      "epoch": 0.7221381744831064,
+      "grad_norm": 0.4228487014770508,
+      "learning_rate": 1.825715714427594e-05,
+      "loss": 0.5317,
+      "step": 358
+    },
+    {
+      "epoch": 0.724155320221886,
+      "grad_norm": 0.4410184919834137,
+      "learning_rate": 1.8010630581019095e-05,
+      "loss": 0.5857,
+      "step": 359
+    },
+    {
+      "epoch": 0.7261724659606656,
+      "grad_norm": 0.3858675956726074,
+      "learning_rate": 1.7765413624674866e-05,
+      "loss": 0.4873,
+      "step": 360
+    },
+    {
+      "epoch": 0.7281896116994453,
+      "grad_norm": 0.42531728744506836,
+      "learning_rate": 1.752151631413511e-05,
+      "loss": 0.4964,
+      "step": 361
+    },
+    {
+      "epoch": 0.7302067574382249,
+      "grad_norm": 0.4145807921886444,
+      "learning_rate": 1.7278948634266968e-05,
+      "loss": 0.5002,
+      "step": 362
+    },
+    {
+      "epoch": 0.7322239031770046,
+      "grad_norm": 0.41224363446235657,
+      "learning_rate": 1.703772051550412e-05,
+      "loss": 0.5269,
+      "step": 363
+    },
+    {
+      "epoch": 0.7342410489157841,
+      "grad_norm": 0.3786209225654602,
+      "learning_rate": 1.679784183344014e-05,
+      "loss": 0.4877,
+      "step": 364
+    },
+    {
+      "epoch": 0.7362581946545638,
+      "grad_norm": 0.4014703035354614,
+      "learning_rate": 1.6559322408424287e-05,
+      "loss": 0.4801,
+      "step": 365
+    },
+    {
+      "epoch": 0.7382753403933434,
+      "grad_norm": 0.4308634400367737,
+      "learning_rate": 1.6322172005159435e-05,
+      "loss": 0.5762,
+      "step": 366
+    },
+    {
+      "epoch": 0.7402924861321231,
+      "grad_norm": 0.39501944184303284,
+      "learning_rate": 1.608640033230236e-05,
+      "loss": 0.5248,
+      "step": 367
+    },
+    {
+      "epoch": 0.7423096318709027,
+      "grad_norm": 0.44487130641937256,
+      "learning_rate": 1.5852017042066214e-05,
+      "loss": 0.5026,
+      "step": 368
+    },
+    {
+      "epoch": 0.7443267776096822,
+      "grad_norm": 0.41640734672546387,
+      "learning_rate": 1.5619031729825402e-05,
+      "loss": 0.5583,
+      "step": 369
+    },
+    {
+      "epoch": 0.7463439233484619,
+      "grad_norm": 0.42025187611579895,
+      "learning_rate": 1.538745393372281e-05,
+      "loss": 0.5471,
+      "step": 370
+    },
+    {
+      "epoch": 0.7483610690872415,
+      "grad_norm": 0.42271459102630615,
+      "learning_rate": 1.5157293134279244e-05,
+      "loss": 0.5239,
+      "step": 371
+    },
+    {
+      "epoch": 0.7503782148260212,
+      "grad_norm": 0.47474098205566406,
+      "learning_rate": 1.492855875400534e-05,
+      "loss": 0.5699,
+      "step": 372
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.386815140070687e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null