Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67310b278b346aedcf42b828dbaf252c4be0c671c62ea6402e97e1f206529528
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:8badaa52ca519105a5380878a96d36847cb55d906e0d2e705f3717f2655e7067
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89489c4a73e0c279f6e9957d5d7fad97283becf32320e68e655449cad2d8ee23
-size 18810036

 version https://git-lfs.github.com/spec/v1
+oid sha256:caec36497ef1581c061792ab4a36c00600adbe0322f14b4febe97fd488c40641
+size 18810356

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d3f0d3c698a0d9d24c459f7ae6628b03169d5174249692ec48942dde2ca8f67
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d23a005f828083a9add1710a8dc106dbb2334d4fcd07e84318a29c2a3ef22527
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7379819866926b736a73507411de0a14a4ceb99f31c9ea0971e466e8e375a5a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea211bfadd2ed8fd9c0d8cd735af3562d7b6f16823ff7f40e995c8863ea26ad0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.2505762577056885,
-  "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.12972271769093563,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1431,6 +1431,714 @@
       "eval_samples_per_second": 58.69,
       "eval_steps_per_second": 14.673,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1459,7 +2167,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4419086016512e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.1460793018341064,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.19458407653640344,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 58.69,
       "eval_steps_per_second": 14.673,
       "step": 200
+    },
+    {
+      "epoch": 0.1303713312793903,
+      "grad_norm": 1.5361113548278809,
+      "learning_rate": 0.00019810089346802346,
+      "loss": 2.9761,
+      "step": 201
+    },
+    {
+      "epoch": 0.131019944867845,
+      "grad_norm": 1.7124556303024292,
+      "learning_rate": 0.0001980810194908336,
+      "loss": 3.1084,
+      "step": 202
+    },
+    {
+      "epoch": 0.13166855845629966,
+      "grad_norm": 1.9076920747756958,
+      "learning_rate": 0.00019806104307190222,
+      "loss": 3.4621,
+      "step": 203
+    },
+    {
+      "epoch": 0.13231717204475435,
+      "grad_norm": 1.381435751914978,
+      "learning_rate": 0.00019804096423209398,
+      "loss": 2.7652,
+      "step": 204
+    },
+    {
+      "epoch": 0.13296578563320902,
+      "grad_norm": 1.352182149887085,
+      "learning_rate": 0.00019802078299238044,
+      "loss": 2.717,
+      "step": 205
+    },
+    {
+      "epoch": 0.13361439922166368,
+      "grad_norm": 1.5664377212524414,
+      "learning_rate": 0.00019800049937384004,
+      "loss": 2.9769,
+      "step": 206
+    },
+    {
+      "epoch": 0.13426301281011838,
+      "grad_norm": 1.786026120185852,
+      "learning_rate": 0.00019798011339765826,
+      "loss": 3.341,
+      "step": 207
+    },
+    {
+      "epoch": 0.13491162639857304,
+      "grad_norm": 1.4288690090179443,
+      "learning_rate": 0.00019795962508512742,
+      "loss": 2.676,
+      "step": 208
+    },
+    {
+      "epoch": 0.13556023998702774,
+      "grad_norm": 1.6291303634643555,
+      "learning_rate": 0.00019793903445764675,
+      "loss": 3.1092,
+      "step": 209
+    },
+    {
+      "epoch": 0.1362088535754824,
+      "grad_norm": 1.7784898281097412,
+      "learning_rate": 0.0001979183415367224,
+      "loss": 3.4292,
+      "step": 210
+    },
+    {
+      "epoch": 0.1368574671639371,
+      "grad_norm": 1.7724665403366089,
+      "learning_rate": 0.00019789754634396724,
+      "loss": 3.3831,
+      "step": 211
+    },
+    {
+      "epoch": 0.13750608075239176,
+      "grad_norm": 1.4297045469284058,
+      "learning_rate": 0.00019787664890110108,
+      "loss": 2.8106,
+      "step": 212
+    },
+    {
+      "epoch": 0.13815469434084643,
+      "grad_norm": 1.650215983390808,
+      "learning_rate": 0.0001978556492299504,
+      "loss": 3.1152,
+      "step": 213
+    },
+    {
+      "epoch": 0.13880330792930112,
+      "grad_norm": 1.4097318649291992,
+      "learning_rate": 0.0001978345473524486,
+      "loss": 3.1504,
+      "step": 214
+    },
+    {
+      "epoch": 0.1394519215177558,
+      "grad_norm": 1.5713872909545898,
+      "learning_rate": 0.00019781334329063572,
+      "loss": 3.5687,
+      "step": 215
+    },
+    {
+      "epoch": 0.14010053510621048,
+      "grad_norm": 1.2881985902786255,
+      "learning_rate": 0.00019779203706665857,
+      "loss": 3.0303,
+      "step": 216
+    },
+    {
+      "epoch": 0.14074914869466515,
+      "grad_norm": 1.3899292945861816,
+      "learning_rate": 0.00019777062870277064,
+      "loss": 2.7401,
+      "step": 217
+    },
+    {
+      "epoch": 0.14139776228311984,
+      "grad_norm": 1.5206934213638306,
+      "learning_rate": 0.00019774911822133216,
+      "loss": 2.8458,
+      "step": 218
+    },
+    {
+      "epoch": 0.1420463758715745,
+      "grad_norm": 1.777827262878418,
+      "learning_rate": 0.00019772750564480993,
+      "loss": 3.1399,
+      "step": 219
+    },
+    {
+      "epoch": 0.14269498946002918,
+      "grad_norm": 1.5830134153366089,
+      "learning_rate": 0.00019770579099577743,
+      "loss": 2.9685,
+      "step": 220
+    },
+    {
+      "epoch": 0.14334360304848387,
+      "grad_norm": 1.619513750076294,
+      "learning_rate": 0.0001976839742969148,
+      "loss": 3.1719,
+      "step": 221
+    },
+    {
+      "epoch": 0.14399221663693854,
+      "grad_norm": 1.6449589729309082,
+      "learning_rate": 0.00019766205557100868,
+      "loss": 3.1031,
+      "step": 222
+    },
+    {
+      "epoch": 0.14464083022539323,
+      "grad_norm": 1.5686246156692505,
+      "learning_rate": 0.0001976400348409523,
+      "loss": 3.1232,
+      "step": 223
+    },
+    {
+      "epoch": 0.1452894438138479,
+      "grad_norm": 1.781656265258789,
+      "learning_rate": 0.0001976179121297455,
+      "loss": 3.3703,
+      "step": 224
+    },
+    {
+      "epoch": 0.1459380574023026,
+      "grad_norm": 1.6864426136016846,
+      "learning_rate": 0.00019759568746049452,
+      "loss": 3.3688,
+      "step": 225
+    },
+    {
+      "epoch": 0.14658667099075726,
+      "grad_norm": 1.545371651649475,
+      "learning_rate": 0.00019757336085641218,
+      "loss": 3.3151,
+      "step": 226
+    },
+    {
+      "epoch": 0.14723528457921192,
+      "grad_norm": 1.3052035570144653,
+      "learning_rate": 0.0001975509323408177,
+      "loss": 2.8913,
+      "step": 227
+    },
+    {
+      "epoch": 0.14788389816766662,
+      "grad_norm": 1.2720938920974731,
+      "learning_rate": 0.0001975284019371368,
+      "loss": 3.0601,
+      "step": 228
+    },
+    {
+      "epoch": 0.14853251175612128,
+      "grad_norm": 1.3967403173446655,
+      "learning_rate": 0.00019750576966890158,
+      "loss": 3.1093,
+      "step": 229
+    },
+    {
+      "epoch": 0.14918112534457598,
+      "grad_norm": 1.4218616485595703,
+      "learning_rate": 0.00019748303555975057,
+      "loss": 2.8957,
+      "step": 230
+    },
+    {
+      "epoch": 0.14982973893303064,
+      "grad_norm": 1.502661943435669,
+      "learning_rate": 0.0001974601996334286,
+      "loss": 3.1115,
+      "step": 231
+    },
+    {
+      "epoch": 0.15047835252148534,
+      "grad_norm": 1.4609853029251099,
+      "learning_rate": 0.00019743726191378698,
+      "loss": 2.9707,
+      "step": 232
+    },
+    {
+      "epoch": 0.15112696610994,
+      "grad_norm": 1.4173173904418945,
+      "learning_rate": 0.00019741422242478316,
+      "loss": 2.7085,
+      "step": 233
+    },
+    {
+      "epoch": 0.15177557969839467,
+      "grad_norm": 1.6199264526367188,
+      "learning_rate": 0.000197391081190481,
+      "loss": 3.1486,
+      "step": 234
+    },
+    {
+      "epoch": 0.15242419328684936,
+      "grad_norm": 1.5446836948394775,
+      "learning_rate": 0.00019736783823505065,
+      "loss": 2.884,
+      "step": 235
+    },
+    {
+      "epoch": 0.15307280687530403,
+      "grad_norm": 1.59959876537323,
+      "learning_rate": 0.00019734449358276842,
+      "loss": 3.1943,
+      "step": 236
+    },
+    {
+      "epoch": 0.15372142046375872,
+      "grad_norm": 1.547925353050232,
+      "learning_rate": 0.00019732104725801684,
+      "loss": 3.1038,
+      "step": 237
+    },
+    {
+      "epoch": 0.1543700340522134,
+      "grad_norm": 1.5789741277694702,
+      "learning_rate": 0.0001972974992852847,
+      "loss": 3.0996,
+      "step": 238
+    },
+    {
+      "epoch": 0.15501864764066808,
+      "grad_norm": 1.675233244895935,
+      "learning_rate": 0.00019727384968916693,
+      "loss": 3.3453,
+      "step": 239
+    },
+    {
+      "epoch": 0.15566726122912275,
+      "grad_norm": 1.5575461387634277,
+      "learning_rate": 0.00019725009849436463,
+      "loss": 3.0918,
+      "step": 240
+    },
+    {
+      "epoch": 0.15631587481757742,
+      "grad_norm": 1.6142653226852417,
+      "learning_rate": 0.00019722624572568492,
+      "loss": 3.0376,
+      "step": 241
+    },
+    {
+      "epoch": 0.1569644884060321,
+      "grad_norm": 1.6989195346832275,
+      "learning_rate": 0.0001972022914080411,
+      "loss": 3.4667,
+      "step": 242
+    },
+    {
+      "epoch": 0.15761310199448678,
+      "grad_norm": 1.5104671716690063,
+      "learning_rate": 0.0001971782355664525,
+      "loss": 2.9191,
+      "step": 243
+    },
+    {
+      "epoch": 0.15826171558294147,
+      "grad_norm": 1.792704463005066,
+      "learning_rate": 0.00019715407822604451,
+      "loss": 3.2042,
+      "step": 244
+    },
+    {
+      "epoch": 0.15891032917139614,
+      "grad_norm": 1.798724889755249,
+      "learning_rate": 0.00019712981941204848,
+      "loss": 3.1007,
+      "step": 245
+    },
+    {
+      "epoch": 0.15955894275985083,
+      "grad_norm": 1.3790630102157593,
+      "learning_rate": 0.00019710545914980183,
+      "loss": 3.0556,
+      "step": 246
+    },
+    {
+      "epoch": 0.1602075563483055,
+      "grad_norm": 1.8706490993499756,
+      "learning_rate": 0.00019708099746474785,
+      "loss": 3.3396,
+      "step": 247
+    },
+    {
+      "epoch": 0.16085616993676016,
+      "grad_norm": 1.6114487648010254,
+      "learning_rate": 0.00019705643438243584,
+      "loss": 2.6075,
+      "step": 248
+    },
+    {
+      "epoch": 0.16150478352521486,
+      "grad_norm": 1.5196325778961182,
+      "learning_rate": 0.0001970317699285209,
+      "loss": 3.0033,
+      "step": 249
+    },
+    {
+      "epoch": 0.16215339711366952,
+      "grad_norm": 1.6828662157058716,
+      "learning_rate": 0.00019700700412876416,
+      "loss": 3.0495,
+      "step": 250
+    },
+    {
+      "epoch": 0.16280201070212422,
+      "grad_norm": 1.5269091129302979,
+      "learning_rate": 0.00019698213700903246,
+      "loss": 3.0902,
+      "step": 251
+    },
+    {
+      "epoch": 0.16345062429057888,
+      "grad_norm": 1.3918476104736328,
+      "learning_rate": 0.00019695716859529855,
+      "loss": 2.9452,
+      "step": 252
+    },
+    {
+      "epoch": 0.16409923787903358,
+      "grad_norm": 1.6424914598464966,
+      "learning_rate": 0.00019693209891364093,
+      "loss": 3.2129,
+      "step": 253
+    },
+    {
+      "epoch": 0.16474785146748824,
+      "grad_norm": 2.0256524085998535,
+      "learning_rate": 0.0001969069279902439,
+      "loss": 3.6181,
+      "step": 254
+    },
+    {
+      "epoch": 0.1653964650559429,
+      "grad_norm": 1.7617406845092773,
+      "learning_rate": 0.00019688165585139748,
+      "loss": 3.2462,
+      "step": 255
+    },
+    {
+      "epoch": 0.1660450786443976,
+      "grad_norm": 1.5007916688919067,
+      "learning_rate": 0.00019685628252349744,
+      "loss": 3.0225,
+      "step": 256
+    },
+    {
+      "epoch": 0.16669369223285227,
+      "grad_norm": 1.6564241647720337,
+      "learning_rate": 0.0001968308080330452,
+      "loss": 3.0276,
+      "step": 257
+    },
+    {
+      "epoch": 0.16734230582130696,
+      "grad_norm": 1.571022868156433,
+      "learning_rate": 0.00019680523240664786,
+      "loss": 2.853,
+      "step": 258
+    },
+    {
+      "epoch": 0.16799091940976163,
+      "grad_norm": 1.3106783628463745,
+      "learning_rate": 0.00019677955567101813,
+      "loss": 2.539,
+      "step": 259
+    },
+    {
+      "epoch": 0.16863953299821632,
+      "grad_norm": 1.4485841989517212,
+      "learning_rate": 0.0001967537778529744,
+      "loss": 2.9644,
+      "step": 260
+    },
+    {
+      "epoch": 0.169288146586671,
+      "grad_norm": 1.4725185632705688,
+      "learning_rate": 0.00019672789897944056,
+      "loss": 2.8638,
+      "step": 261
+    },
+    {
+      "epoch": 0.16993676017512566,
+      "grad_norm": 1.7089518308639526,
+      "learning_rate": 0.00019670191907744598,
+      "loss": 3.2726,
+      "step": 262
+    },
+    {
+      "epoch": 0.17058537376358035,
+      "grad_norm": 2.0805907249450684,
+      "learning_rate": 0.00019667583817412578,
+      "loss": 3.5152,
+      "step": 263
+    },
+    {
+      "epoch": 0.17123398735203502,
+      "grad_norm": 1.42578125,
+      "learning_rate": 0.00019664965629672033,
+      "loss": 2.7068,
+      "step": 264
+    },
+    {
+      "epoch": 0.1718826009404897,
+      "grad_norm": 1.70866858959198,
+      "learning_rate": 0.0001966233734725756,
+      "loss": 2.9573,
+      "step": 265
+    },
+    {
+      "epoch": 0.17253121452894438,
+      "grad_norm": 1.55576491355896,
+      "learning_rate": 0.000196596989729143,
+      "loss": 3.1079,
+      "step": 266
+    },
+    {
+      "epoch": 0.17317982811739907,
+      "grad_norm": 1.4486907720565796,
+      "learning_rate": 0.00019657050509397923,
+      "loss": 2.9568,
+      "step": 267
+    },
+    {
+      "epoch": 0.17382844170585374,
+      "grad_norm": 2.7671091556549072,
+      "learning_rate": 0.00019654391959474647,
+      "loss": 3.3971,
+      "step": 268
+    },
+    {
+      "epoch": 0.1744770552943084,
+      "grad_norm": 1.6282739639282227,
+      "learning_rate": 0.00019651723325921224,
+      "loss": 3.3782,
+      "step": 269
+    },
+    {
+      "epoch": 0.1751256688827631,
+      "grad_norm": 1.6156924962997437,
+      "learning_rate": 0.00019649044611524933,
+      "loss": 3.1748,
+      "step": 270
+    },
+    {
+      "epoch": 0.17577428247121776,
+      "grad_norm": 1.4323394298553467,
+      "learning_rate": 0.00019646355819083589,
+      "loss": 2.9028,
+      "step": 271
+    },
+    {
+      "epoch": 0.17642289605967246,
+      "grad_norm": 1.5577207803726196,
+      "learning_rate": 0.00019643656951405525,
+      "loss": 3.1325,
+      "step": 272
+    },
+    {
+      "epoch": 0.17707150964812712,
+      "grad_norm": 1.5941506624221802,
+      "learning_rate": 0.00019640948011309604,
+      "loss": 3.0872,
+      "step": 273
+    },
+    {
+      "epoch": 0.17772012323658182,
+      "grad_norm": 1.3285024166107178,
+      "learning_rate": 0.00019638229001625205,
+      "loss": 2.6622,
+      "step": 274
+    },
+    {
+      "epoch": 0.17836873682503648,
+      "grad_norm": 1.4673588275909424,
+      "learning_rate": 0.0001963549992519223,
+      "loss": 3.1656,
+      "step": 275
+    },
+    {
+      "epoch": 0.17901735041349115,
+      "grad_norm": 1.9262672662734985,
+      "learning_rate": 0.00019632760784861087,
+      "loss": 2.8955,
+      "step": 276
+    },
+    {
+      "epoch": 0.17966596400194584,
+      "grad_norm": 1.5531195402145386,
+      "learning_rate": 0.00019630011583492702,
+      "loss": 2.9895,
+      "step": 277
+    },
+    {
+      "epoch": 0.1803145775904005,
+      "grad_norm": 1.259700059890747,
+      "learning_rate": 0.00019627252323958504,
+      "loss": 2.4601,
+      "step": 278
+    },
+    {
+      "epoch": 0.1809631911788552,
+      "grad_norm": 1.4137221574783325,
+      "learning_rate": 0.00019624483009140435,
+      "loss": 2.7425,
+      "step": 279
+    },
+    {
+      "epoch": 0.18161180476730987,
+      "grad_norm": 1.2611486911773682,
+      "learning_rate": 0.0001962170364193093,
+      "loss": 2.7371,
+      "step": 280
+    },
+    {
+      "epoch": 0.18226041835576456,
+      "grad_norm": 1.7357769012451172,
+      "learning_rate": 0.00019618914225232934,
+      "loss": 3.0614,
+      "step": 281
+    },
+    {
+      "epoch": 0.18290903194421923,
+      "grad_norm": 1.706430435180664,
+      "learning_rate": 0.00019616114761959874,
+      "loss": 2.9595,
+      "step": 282
+    },
+    {
+      "epoch": 0.1835576455326739,
+      "grad_norm": 1.6613085269927979,
+      "learning_rate": 0.00019613305255035686,
+      "loss": 2.9299,
+      "step": 283
+    },
+    {
+      "epoch": 0.1842062591211286,
+      "grad_norm": 1.4859459400177002,
+      "learning_rate": 0.00019610485707394784,
+      "loss": 2.8886,
+      "step": 284
+    },
+    {
+      "epoch": 0.18485487270958326,
+      "grad_norm": 1.5165742635726929,
+      "learning_rate": 0.00019607656121982075,
+      "loss": 2.7321,
+      "step": 285
+    },
+    {
+      "epoch": 0.18550348629803795,
+      "grad_norm": 1.711745023727417,
+      "learning_rate": 0.00019604816501752947,
+      "loss": 2.8954,
+      "step": 286
+    },
+    {
+      "epoch": 0.18615209988649262,
+      "grad_norm": 1.5451345443725586,
+      "learning_rate": 0.00019601966849673276,
+      "loss": 2.9563,
+      "step": 287
+    },
+    {
+      "epoch": 0.1868007134749473,
+      "grad_norm": 1.3915531635284424,
+      "learning_rate": 0.00019599107168719412,
+      "loss": 3.0462,
+      "step": 288
+    },
+    {
+      "epoch": 0.18744932706340198,
+      "grad_norm": 1.2463390827178955,
+      "learning_rate": 0.0001959623746187817,
+      "loss": 2.7958,
+      "step": 289
+    },
+    {
+      "epoch": 0.18809794065185664,
+      "grad_norm": 1.683665156364441,
+      "learning_rate": 0.0001959335773214685,
+      "loss": 2.8798,
+      "step": 290
+    },
+    {
+      "epoch": 0.18874655424031134,
+      "grad_norm": 1.5754495859146118,
+      "learning_rate": 0.00019590467982533215,
+      "loss": 2.8401,
+      "step": 291
+    },
+    {
+      "epoch": 0.189395167828766,
+      "grad_norm": 1.6469128131866455,
+      "learning_rate": 0.000195875682160555,
+      "loss": 3.0458,
+      "step": 292
+    },
+    {
+      "epoch": 0.1900437814172207,
+      "grad_norm": 1.4030909538269043,
+      "learning_rate": 0.00019584658435742384,
+      "loss": 2.8845,
+      "step": 293
+    },
+    {
+      "epoch": 0.19069239500567536,
+      "grad_norm": 1.4269638061523438,
+      "learning_rate": 0.00019581738644633027,
+      "loss": 3.1745,
+      "step": 294
+    },
+    {
+      "epoch": 0.19134100859413006,
+      "grad_norm": 1.8712362051010132,
+      "learning_rate": 0.00019578808845777034,
+      "loss": 2.9603,
+      "step": 295
+    },
+    {
+      "epoch": 0.19198962218258472,
+      "grad_norm": 1.3823246955871582,
+      "learning_rate": 0.0001957586904223446,
+      "loss": 2.8134,
+      "step": 296
+    },
+    {
+      "epoch": 0.1926382357710394,
+      "grad_norm": 1.6640461683273315,
+      "learning_rate": 0.00019572919237075817,
+      "loss": 3.0434,
+      "step": 297
+    },
+    {
+      "epoch": 0.19328684935949408,
+      "grad_norm": 1.68658447265625,
+      "learning_rate": 0.0001956995943338206,
+      "loss": 3.2863,
+      "step": 298
+    },
+    {
+      "epoch": 0.19393546294794875,
+      "grad_norm": 1.4889813661575317,
+      "learning_rate": 0.00019566989634244584,
+      "loss": 2.7607,
+      "step": 299
+    },
+    {
+      "epoch": 0.19458407653640344,
+      "grad_norm": 1.526188850402832,
+      "learning_rate": 0.00019564009842765225,
+      "loss": 2.9834,
+      "step": 300
+    },
+    {
+      "epoch": 0.19458407653640344,
+      "eval_loss": 3.1460793018341064,
+      "eval_runtime": 35.0284,
+      "eval_samples_per_second": 58.695,
+      "eval_steps_per_second": 14.674,
+      "step": 300
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.1628629024768e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null