Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2b891c33fc429c0cb28a6877c37fee7b9ad88339efdbefdc17f8648514cafb8
 size 289452128

 version https://git-lfs.github.com/spec/v1
+oid sha256:36670a5cb07570b526cc68b5b9d0ebaa892ac926af726fbdbe84eaef96121c65
 size 289452128

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04f5aface321fef6ac3514607279d714a80837692202b1e5c7688000c93315d8
 size 147359892

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe5c90e3905bbef6d07cf5a823555596fd9e072abb4be8bdc0ec4434b20e757e
 size 147359892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1213594a86bdd0e226c0613d392942ef0951aab7e498f1cfd891e9a0d4d395d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:630e2711af6547215ca84e215db44507617e11099bbc440a92e4d380f49e9a73
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7e9f0c2a27af03f3c1874438820d046de94b36aaec3b0cc778f96def4616314
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:530505d607699f384741067a5f9139d72f043713adb680898a3f1b5714170c97
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7203736305236816,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.019923295313044778,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,714 @@
       "eval_samples_per_second": 31.902,
       "eval_steps_per_second": 7.977,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -751,7 +1459,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.325585305423053e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7008334398269653,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.039846590626089556,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.902,
       "eval_steps_per_second": 7.977,
       "step": 100
+    },
+    {
+      "epoch": 0.020122528266175225,
+      "grad_norm": 0.21482613682746887,
+      "learning_rate": 9.31367192988896e-05,
+      "loss": 0.5356,
+      "step": 101
+    },
+    {
+      "epoch": 0.02032176121930567,
+      "grad_norm": 0.28820475935935974,
+      "learning_rate": 9.297032057507264e-05,
+      "loss": 0.6606,
+      "step": 102
+    },
+    {
+      "epoch": 0.02052099417243612,
+      "grad_norm": 0.22919978201389313,
+      "learning_rate": 9.280208114573859e-05,
+      "loss": 0.6681,
+      "step": 103
+    },
+    {
+      "epoch": 0.02072022712556657,
+      "grad_norm": 0.20591995120048523,
+      "learning_rate": 9.263200821770461e-05,
+      "loss": 0.6523,
+      "step": 104
+    },
+    {
+      "epoch": 0.020919460078697015,
+      "grad_norm": 0.20084571838378906,
+      "learning_rate": 9.246010907632895e-05,
+      "loss": 0.629,
+      "step": 105
+    },
+    {
+      "epoch": 0.021118693031827465,
+      "grad_norm": 0.25101473927497864,
+      "learning_rate": 9.228639108519868e-05,
+      "loss": 0.7384,
+      "step": 106
+    },
+    {
+      "epoch": 0.021317925984957912,
+      "grad_norm": 0.23734250664710999,
+      "learning_rate": 9.211086168581433e-05,
+      "loss": 0.6895,
+      "step": 107
+    },
+    {
+      "epoch": 0.02151715893808836,
+      "grad_norm": 0.22536040842533112,
+      "learning_rate": 9.193352839727121e-05,
+      "loss": 0.7137,
+      "step": 108
+    },
+    {
+      "epoch": 0.02171639189121881,
+      "grad_norm": 0.2355283945798874,
+      "learning_rate": 9.175439881593716e-05,
+      "loss": 0.7662,
+      "step": 109
+    },
+    {
+      "epoch": 0.021915624844349255,
+      "grad_norm": 0.2228918820619583,
+      "learning_rate": 9.157348061512727e-05,
+      "loss": 0.6534,
+      "step": 110
+    },
+    {
+      "epoch": 0.022114857797479702,
+      "grad_norm": 0.2184191644191742,
+      "learning_rate": 9.139078154477512e-05,
+      "loss": 0.7054,
+      "step": 111
+    },
+    {
+      "epoch": 0.022314090750610152,
+      "grad_norm": 0.21408338844776154,
+      "learning_rate": 9.120630943110077e-05,
+      "loss": 0.6103,
+      "step": 112
+    },
+    {
+      "epoch": 0.0225133237037406,
+      "grad_norm": 0.25427061319351196,
+      "learning_rate": 9.102007217627568e-05,
+      "loss": 0.8366,
+      "step": 113
+    },
+    {
+      "epoch": 0.022712556656871045,
+      "grad_norm": 0.21583066880702972,
+      "learning_rate": 9.083207775808396e-05,
+      "loss": 0.7165,
+      "step": 114
+    },
+    {
+      "epoch": 0.022911789610001496,
+      "grad_norm": 0.2396174967288971,
+      "learning_rate": 9.064233422958077e-05,
+      "loss": 0.6794,
+      "step": 115
+    },
+    {
+      "epoch": 0.023111022563131942,
+      "grad_norm": 0.2334282398223877,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 0.7945,
+      "step": 116
+    },
+    {
+      "epoch": 0.02331025551626239,
+      "grad_norm": 0.20607277750968933,
+      "learning_rate": 9.025763242814291e-05,
+      "loss": 0.5739,
+      "step": 117
+    },
+    {
+      "epoch": 0.02350948846939284,
+      "grad_norm": 0.2125677615404129,
+      "learning_rate": 9.006269063455304e-05,
+      "loss": 0.6976,
+      "step": 118
+    },
+    {
+      "epoch": 0.023708721422523286,
+      "grad_norm": 0.24233980476856232,
+      "learning_rate": 8.986603268863536e-05,
+      "loss": 0.7768,
+      "step": 119
+    },
+    {
+      "epoch": 0.023907954375653732,
+      "grad_norm": 0.23903363943099976,
+      "learning_rate": 8.966766701456177e-05,
+      "loss": 0.7925,
+      "step": 120
+    },
+    {
+      "epoch": 0.024107187328784183,
+      "grad_norm": 0.2270466685295105,
+      "learning_rate": 8.94676021096575e-05,
+      "loss": 0.679,
+      "step": 121
+    },
+    {
+      "epoch": 0.02430642028191463,
+      "grad_norm": 0.22134599089622498,
+      "learning_rate": 8.926584654403724e-05,
+      "loss": 0.7155,
+      "step": 122
+    },
+    {
+      "epoch": 0.024505653235045076,
+      "grad_norm": 0.2539938986301422,
+      "learning_rate": 8.906240896023794e-05,
+      "loss": 0.7156,
+      "step": 123
+    },
+    {
+      "epoch": 0.024704886188175523,
+      "grad_norm": 0.24332107603549957,
+      "learning_rate": 8.885729807284856e-05,
+      "loss": 0.6988,
+      "step": 124
+    },
+    {
+      "epoch": 0.024904119141305973,
+      "grad_norm": 0.23084275424480438,
+      "learning_rate": 8.865052266813685e-05,
+      "loss": 0.7142,
+      "step": 125
+    },
+    {
+      "epoch": 0.02510335209443642,
+      "grad_norm": 0.24665014445781708,
+      "learning_rate": 8.844209160367299e-05,
+      "loss": 0.7471,
+      "step": 126
+    },
+    {
+      "epoch": 0.025302585047566866,
+      "grad_norm": 0.23389537632465363,
+      "learning_rate": 8.823201380795001e-05,
+      "loss": 0.676,
+      "step": 127
+    },
+    {
+      "epoch": 0.025501818000697316,
+      "grad_norm": 0.2225302904844284,
+      "learning_rate": 8.802029828000156e-05,
+      "loss": 0.6702,
+      "step": 128
+    },
+    {
+      "epoch": 0.025701050953827763,
+      "grad_norm": 0.2194124162197113,
+      "learning_rate": 8.780695408901613e-05,
+      "loss": 0.7173,
+      "step": 129
+    },
+    {
+      "epoch": 0.02590028390695821,
+      "grad_norm": 0.22123487293720245,
+      "learning_rate": 8.759199037394887e-05,
+      "loss": 0.679,
+      "step": 130
+    },
+    {
+      "epoch": 0.02609951686008866,
+      "grad_norm": 0.2465553730726242,
+      "learning_rate": 8.737541634312985e-05,
+      "loss": 0.7151,
+      "step": 131
+    },
+    {
+      "epoch": 0.026298749813219106,
+      "grad_norm": 0.2150459736585617,
+      "learning_rate": 8.715724127386972e-05,
+      "loss": 0.7106,
+      "step": 132
+    },
+    {
+      "epoch": 0.026497982766349553,
+      "grad_norm": 0.1966014802455902,
+      "learning_rate": 8.693747451206232e-05,
+      "loss": 0.7174,
+      "step": 133
+    },
+    {
+      "epoch": 0.026697215719480003,
+      "grad_norm": 0.25692620873451233,
+      "learning_rate": 8.671612547178428e-05,
+      "loss": 0.6709,
+      "step": 134
+    },
+    {
+      "epoch": 0.02689644867261045,
+      "grad_norm": 0.19534507393836975,
+      "learning_rate": 8.649320363489179e-05,
+      "loss": 0.5871,
+      "step": 135
+    },
+    {
+      "epoch": 0.027095681625740897,
+      "grad_norm": 0.25263822078704834,
+      "learning_rate": 8.626871855061438e-05,
+      "loss": 0.7815,
+      "step": 136
+    },
+    {
+      "epoch": 0.027294914578871347,
+      "grad_norm": 0.24761663377285004,
+      "learning_rate": 8.604267983514594e-05,
+      "loss": 0.7253,
+      "step": 137
+    },
+    {
+      "epoch": 0.027494147532001793,
+      "grad_norm": 0.2336164116859436,
+      "learning_rate": 8.581509717123273e-05,
+      "loss": 0.6871,
+      "step": 138
+    },
+    {
+      "epoch": 0.02769338048513224,
+      "grad_norm": 0.2253238558769226,
+      "learning_rate": 8.558598030775857e-05,
+      "loss": 0.6806,
+      "step": 139
+    },
+    {
+      "epoch": 0.02789261343826269,
+      "grad_norm": 0.2473248988389969,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 0.8147,
+      "step": 140
+    },
+    {
+      "epoch": 0.028091846391393137,
+      "grad_norm": 0.2515900731086731,
+      "learning_rate": 8.51231833058426e-05,
+      "loss": 0.8055,
+      "step": 141
+    },
+    {
+      "epoch": 0.028291079344523583,
+      "grad_norm": 0.2372109442949295,
+      "learning_rate": 8.488952299208401e-05,
+      "loss": 0.6404,
+      "step": 142
+    },
+    {
+      "epoch": 0.028490312297654034,
+      "grad_norm": 0.24354617297649384,
+      "learning_rate": 8.46543681272818e-05,
+      "loss": 0.7355,
+      "step": 143
+    },
+    {
+      "epoch": 0.02868954525078448,
+      "grad_norm": 0.2426522821187973,
+      "learning_rate": 8.44177287846877e-05,
+      "loss": 0.7561,
+      "step": 144
+    },
+    {
+      "epoch": 0.028888778203914927,
+      "grad_norm": 0.24764999747276306,
+      "learning_rate": 8.417961510114356e-05,
+      "loss": 0.7203,
+      "step": 145
+    },
+    {
+      "epoch": 0.029088011157045374,
+      "grad_norm": 0.2602037191390991,
+      "learning_rate": 8.39400372766471e-05,
+      "loss": 0.7111,
+      "step": 146
+    },
+    {
+      "epoch": 0.029287244110175824,
+      "grad_norm": 0.25291872024536133,
+      "learning_rate": 8.36990055739149e-05,
+      "loss": 0.7011,
+      "step": 147
+    },
+    {
+      "epoch": 0.02948647706330627,
+      "grad_norm": 0.2726818323135376,
+      "learning_rate": 8.345653031794292e-05,
+      "loss": 0.8132,
+      "step": 148
+    },
+    {
+      "epoch": 0.029685710016436717,
+      "grad_norm": 0.2830042839050293,
+      "learning_rate": 8.321262189556409e-05,
+      "loss": 0.7577,
+      "step": 149
+    },
+    {
+      "epoch": 0.029884942969567167,
+      "grad_norm": 0.2831919193267822,
+      "learning_rate": 8.296729075500344e-05,
+      "loss": 0.7566,
+      "step": 150
+    },
+    {
+      "epoch": 0.030084175922697614,
+      "grad_norm": 0.33581486344337463,
+      "learning_rate": 8.272054740543052e-05,
+      "loss": 0.7664,
+      "step": 151
+    },
+    {
+      "epoch": 0.03028340887582806,
+      "grad_norm": 0.3191780149936676,
+      "learning_rate": 8.247240241650918e-05,
+      "loss": 0.6583,
+      "step": 152
+    },
+    {
+      "epoch": 0.03048264182895851,
+      "grad_norm": 0.22133736312389374,
+      "learning_rate": 8.222286641794488e-05,
+      "loss": 0.6328,
+      "step": 153
+    },
+    {
+      "epoch": 0.030681874782088957,
+      "grad_norm": 0.21831941604614258,
+      "learning_rate": 8.197195009902924e-05,
+      "loss": 0.698,
+      "step": 154
+    },
+    {
+      "epoch": 0.030881107735219404,
+      "grad_norm": 0.1761123090982437,
+      "learning_rate": 8.171966420818228e-05,
+      "loss": 0.5441,
+      "step": 155
+    },
+    {
+      "epoch": 0.031080340688349854,
+      "grad_norm": 0.20375514030456543,
+      "learning_rate": 8.146601955249188e-05,
+      "loss": 0.6309,
+      "step": 156
+    },
+    {
+      "epoch": 0.0312795736414803,
+      "grad_norm": 0.2410995066165924,
+      "learning_rate": 8.121102699725089e-05,
+      "loss": 0.6337,
+      "step": 157
+    },
+    {
+      "epoch": 0.03147880659461075,
+      "grad_norm": 0.2229624092578888,
+      "learning_rate": 8.095469746549172e-05,
+      "loss": 0.7014,
+      "step": 158
+    },
+    {
+      "epoch": 0.0316780395477412,
+      "grad_norm": 0.23791897296905518,
+      "learning_rate": 8.069704193751832e-05,
+      "loss": 0.6582,
+      "step": 159
+    },
+    {
+      "epoch": 0.03187727250087164,
+      "grad_norm": 0.22524884343147278,
+      "learning_rate": 8.043807145043604e-05,
+      "loss": 0.7178,
+      "step": 160
+    },
+    {
+      "epoch": 0.03207650545400209,
+      "grad_norm": 0.21897757053375244,
+      "learning_rate": 8.017779709767858e-05,
+      "loss": 0.7032,
+      "step": 161
+    },
+    {
+      "epoch": 0.03227573840713254,
+      "grad_norm": 0.19019927084445953,
+      "learning_rate": 7.991623002853296e-05,
+      "loss": 0.5842,
+      "step": 162
+    },
+    {
+      "epoch": 0.032474971360262984,
+      "grad_norm": 0.21591834723949432,
+      "learning_rate": 7.965338144766186e-05,
+      "loss": 0.7243,
+      "step": 163
+    },
+    {
+      "epoch": 0.032674204313393435,
+      "grad_norm": 0.20649899542331696,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 0.6814,
+      "step": 164
+    },
+    {
+      "epoch": 0.032873437266523885,
+      "grad_norm": 0.24540702998638153,
+      "learning_rate": 7.912388484339012e-05,
+      "loss": 0.7356,
+      "step": 165
+    },
+    {
+      "epoch": 0.03307267021965433,
+      "grad_norm": 0.22577622532844543,
+      "learning_rate": 7.88572595018617e-05,
+      "loss": 0.6468,
+      "step": 166
+    },
+    {
+      "epoch": 0.03327190317278478,
+      "grad_norm": 0.2168670892715454,
+      "learning_rate": 7.858939801138061e-05,
+      "loss": 0.6448,
+      "step": 167
+    },
+    {
+      "epoch": 0.03347113612591523,
+      "grad_norm": 0.22892935574054718,
+      "learning_rate": 7.832031184624164e-05,
+      "loss": 0.6375,
+      "step": 168
+    },
+    {
+      "epoch": 0.03367036907904567,
+      "grad_norm": 0.2086174190044403,
+      "learning_rate": 7.80500125332005e-05,
+      "loss": 0.6993,
+      "step": 169
+    },
+    {
+      "epoch": 0.03386960203217612,
+      "grad_norm": 0.19050797820091248,
+      "learning_rate": 7.777851165098012e-05,
+      "loss": 0.5909,
+      "step": 170
+    },
+    {
+      "epoch": 0.03406883498530657,
+      "grad_norm": 0.22635716199874878,
+      "learning_rate": 7.750582082977467e-05,
+      "loss": 0.6799,
+      "step": 171
+    },
+    {
+      "epoch": 0.034268067938437015,
+      "grad_norm": 0.2369690090417862,
+      "learning_rate": 7.723195175075136e-05,
+      "loss": 0.6823,
+      "step": 172
+    },
+    {
+      "epoch": 0.034467300891567465,
+      "grad_norm": 0.2258961796760559,
+      "learning_rate": 7.695691614555003e-05,
+      "loss": 0.6768,
+      "step": 173
+    },
+    {
+      "epoch": 0.034666533844697915,
+      "grad_norm": 0.2175053060054779,
+      "learning_rate": 7.668072579578058e-05,
+      "loss": 0.6501,
+      "step": 174
+    },
+    {
+      "epoch": 0.03486576679782836,
+      "grad_norm": 0.23859256505966187,
+      "learning_rate": 7.64033925325184e-05,
+      "loss": 0.7198,
+      "step": 175
+    },
+    {
+      "epoch": 0.03506499975095881,
+      "grad_norm": 0.2090621143579483,
+      "learning_rate": 7.612492823579745e-05,
+      "loss": 0.6075,
+      "step": 176
+    },
+    {
+      "epoch": 0.03526423270408926,
+      "grad_norm": 0.23009976744651794,
+      "learning_rate": 7.584534483410137e-05,
+      "loss": 0.6993,
+      "step": 177
+    },
+    {
+      "epoch": 0.0354634656572197,
+      "grad_norm": 0.19365736842155457,
+      "learning_rate": 7.55646543038526e-05,
+      "loss": 0.61,
+      "step": 178
+    },
+    {
+      "epoch": 0.03566269861035015,
+      "grad_norm": 0.24506577849388123,
+      "learning_rate": 7.528286866889924e-05,
+      "loss": 0.68,
+      "step": 179
+    },
+    {
+      "epoch": 0.0358619315634806,
+      "grad_norm": 0.2776792049407959,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.7224,
+      "step": 180
+    },
+    {
+      "epoch": 0.036061164516611045,
+      "grad_norm": 0.21556320786476135,
+      "learning_rate": 7.471606041430723e-05,
+      "loss": 0.6742,
+      "step": 181
+    },
+    {
+      "epoch": 0.036260397469741495,
+      "grad_norm": 0.2549598217010498,
+      "learning_rate": 7.443106207484776e-05,
+      "loss": 0.7866,
+      "step": 182
+    },
+    {
+      "epoch": 0.036459630422871946,
+      "grad_norm": 0.2427287995815277,
+      "learning_rate": 7.414501719000187e-05,
+      "loss": 0.7006,
+      "step": 183
+    },
+    {
+      "epoch": 0.03665886337600239,
+      "grad_norm": 0.2485671192407608,
+      "learning_rate": 7.385793801298042e-05,
+      "loss": 0.678,
+      "step": 184
+    },
+    {
+      "epoch": 0.03685809632913284,
+      "grad_norm": 0.23628251254558563,
+      "learning_rate": 7.35698368412999e-05,
+      "loss": 0.7176,
+      "step": 185
+    },
+    {
+      "epoch": 0.03705732928226329,
+      "grad_norm": 0.245437353849411,
+      "learning_rate": 7.328072601625557e-05,
+      "loss": 0.6784,
+      "step": 186
+    },
+    {
+      "epoch": 0.03725656223539373,
+      "grad_norm": 0.26534438133239746,
+      "learning_rate": 7.2990617922393e-05,
+      "loss": 0.7389,
+      "step": 187
+    },
+    {
+      "epoch": 0.03745579518852418,
+      "grad_norm": 0.26061758399009705,
+      "learning_rate": 7.269952498697734e-05,
+      "loss": 0.637,
+      "step": 188
+    },
+    {
+      "epoch": 0.03765502814165463,
+      "grad_norm": 0.247264102101326,
+      "learning_rate": 7.240745967946113e-05,
+      "loss": 0.7007,
+      "step": 189
+    },
+    {
+      "epoch": 0.037854261094785076,
+      "grad_norm": 0.2501027584075928,
+      "learning_rate": 7.211443451095007e-05,
+      "loss": 0.7552,
+      "step": 190
+    },
+    {
+      "epoch": 0.038053494047915526,
+      "grad_norm": 0.24374301731586456,
+      "learning_rate": 7.18204620336671e-05,
+      "loss": 0.7168,
+      "step": 191
+    },
+    {
+      "epoch": 0.038252727001045976,
+      "grad_norm": 0.2584417760372162,
+      "learning_rate": 7.152555484041476e-05,
+      "loss": 0.6993,
+      "step": 192
+    },
+    {
+      "epoch": 0.03845195995417642,
+      "grad_norm": 0.2678215205669403,
+      "learning_rate": 7.122972556403567e-05,
+      "loss": 0.7069,
+      "step": 193
+    },
+    {
+      "epoch": 0.03865119290730687,
+      "grad_norm": 0.27493688464164734,
+      "learning_rate": 7.09329868768714e-05,
+      "loss": 0.7271,
+      "step": 194
+    },
+    {
+      "epoch": 0.03885042586043732,
+      "grad_norm": 0.28842246532440186,
+      "learning_rate": 7.063535149021973e-05,
+      "loss": 0.7254,
+      "step": 195
+    },
+    {
+      "epoch": 0.03904965881356776,
+      "grad_norm": 0.26705753803253174,
+      "learning_rate": 7.033683215379002e-05,
+      "loss": 0.7115,
+      "step": 196
+    },
+    {
+      "epoch": 0.03924889176669821,
+      "grad_norm": 0.25876060128211975,
+      "learning_rate": 7.003744165515705e-05,
+      "loss": 0.7228,
+      "step": 197
+    },
+    {
+      "epoch": 0.03944812471982866,
+      "grad_norm": 0.30024516582489014,
+      "learning_rate": 6.973719281921335e-05,
+      "loss": 0.7615,
+      "step": 198
+    },
+    {
+      "epoch": 0.039647357672959106,
+      "grad_norm": 0.27740225195884705,
+      "learning_rate": 6.943609850761979e-05,
+      "loss": 0.7928,
+      "step": 199
+    },
+    {
+      "epoch": 0.039846590626089556,
+      "grad_norm": 0.3024348020553589,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 0.7479,
+      "step": 200
+    },
+    {
+      "epoch": 0.039846590626089556,
+      "eval_loss": 0.7008334398269653,
+      "eval_runtime": 265.1826,
+      "eval_samples_per_second": 31.88,
+      "eval_steps_per_second": 7.972,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.651170610846106e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null