Training in progress, step 248, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +871 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c900d238d14e1037bad8471b2ad3ccccd90a5fec518db1c8ce72007d170398ad
 size 144805440

 version https://git-lfs.github.com/spec/v1
+oid sha256:58100fe655c8ed92d4518fef45e29df4d60c7e291f66f716456f73e4ea77f392
 size 144805440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c360ceccad5f60fb95639dbf4e0513e8c0f0b3ff27fb6d0739312326efc435e
 size 74291604

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab152d90f6d47ebd8a356ceecca0993fb077dcff867c7c58ca00456e2cfcd04b
 size 74291604

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bac592a9ca2514bc2cfa4738b780303ed581353d6599570fd57ea5aeacd531f9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f660891b19594633ced246d59eedd400fe2556d319f4e5ca333df7fb57888180
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5f72f9735f8242853fd5033caee72818d745afc25ff8221fe23de7a6ff33743
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:81771ff96e80b84ed048126e169640f8617ceb476fe2f91b8561190057e53b0d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.25012607160867373,
   "eval_steps": 500,
-  "global_step": 124,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -875,6 +875,874 @@
       "learning_rate": 8.61933911810608e-05,
       "loss": 0.5258,
       "step": 124
     }
   ],
   "logging_steps": 1,
@@ -894,7 +1762,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1893950480161178e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5002521432173475,
   "eval_steps": 500,
+  "global_step": 248,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.61933911810608e-05,
       "loss": 0.5258,
       "step": 124
+    },
+    {
+      "epoch": 0.2521432173474534,
+      "grad_norm": 0.4010732173919678,
+      "learning_rate": 8.597192817707122e-05,
+      "loss": 0.6078,
+      "step": 125
+    },
+    {
+      "epoch": 0.25416036308623297,
+      "grad_norm": 0.3758286237716675,
+      "learning_rate": 8.574899252492833e-05,
+      "loss": 0.6296,
+      "step": 126
+    },
+    {
+      "epoch": 0.2561775088250126,
+      "grad_norm": 0.4148157238960266,
+      "learning_rate": 8.552459335135381e-05,
+      "loss": 0.5566,
+      "step": 127
+    },
+    {
+      "epoch": 0.25819465456379226,
+      "grad_norm": 0.45975151658058167,
+      "learning_rate": 8.529873984298418e-05,
+      "loss": 0.7839,
+      "step": 128
+    },
+    {
+      "epoch": 0.26021180030257185,
+      "grad_norm": 0.3960277736186981,
+      "learning_rate": 8.507144124599467e-05,
+      "loss": 0.5523,
+      "step": 129
+    },
+    {
+      "epoch": 0.2622289460413515,
+      "grad_norm": 0.42408132553100586,
+      "learning_rate": 8.484270686572078e-05,
+      "loss": 0.6672,
+      "step": 130
+    },
+    {
+      "epoch": 0.2642460917801311,
+      "grad_norm": 0.42156749963760376,
+      "learning_rate": 8.46125460662772e-05,
+      "loss": 0.6694,
+      "step": 131
+    },
+    {
+      "epoch": 0.26626323751891073,
+      "grad_norm": 0.4371163547039032,
+      "learning_rate": 8.43809682701746e-05,
+      "loss": 0.6765,
+      "step": 132
+    },
+    {
+      "epoch": 0.2682803832576904,
+      "grad_norm": 0.4190014600753784,
+      "learning_rate": 8.41479829579338e-05,
+      "loss": 0.5559,
+      "step": 133
+    },
+    {
+      "epoch": 0.27029752899646997,
+      "grad_norm": 0.44778770208358765,
+      "learning_rate": 8.391359966769765e-05,
+      "loss": 0.6315,
+      "step": 134
+    },
+    {
+      "epoch": 0.2723146747352496,
+      "grad_norm": 0.4291159212589264,
+      "learning_rate": 8.367782799484057e-05,
+      "loss": 0.6501,
+      "step": 135
+    },
+    {
+      "epoch": 0.27433182047402926,
+      "grad_norm": 0.4089667797088623,
+      "learning_rate": 8.344067759157572e-05,
+      "loss": 0.6162,
+      "step": 136
+    },
+    {
+      "epoch": 0.27634896621280886,
+      "grad_norm": 0.4689813256263733,
+      "learning_rate": 8.320215816655987e-05,
+      "loss": 0.7768,
+      "step": 137
+    },
+    {
+      "epoch": 0.2783661119515885,
+      "grad_norm": 0.41810306906700134,
+      "learning_rate": 8.296227948449589e-05,
+      "loss": 0.5548,
+      "step": 138
+    },
+    {
+      "epoch": 0.28038325769036815,
+      "grad_norm": 0.4520469009876251,
+      "learning_rate": 8.272105136573303e-05,
+      "loss": 0.6789,
+      "step": 139
+    },
+    {
+      "epoch": 0.28240040342914774,
+      "grad_norm": 0.4686160087585449,
+      "learning_rate": 8.24784836858649e-05,
+      "loss": 0.6922,
+      "step": 140
+    },
+    {
+      "epoch": 0.2844175491679274,
+      "grad_norm": 0.5166224837303162,
+      "learning_rate": 8.223458637532515e-05,
+      "loss": 0.7206,
+      "step": 141
+    },
+    {
+      "epoch": 0.28643469490670703,
+      "grad_norm": 0.486186683177948,
+      "learning_rate": 8.198936941898091e-05,
+      "loss": 0.599,
+      "step": 142
+    },
+    {
+      "epoch": 0.2884518406454866,
+      "grad_norm": 0.4760117530822754,
+      "learning_rate": 8.174284285572408e-05,
+      "loss": 0.6445,
+      "step": 143
+    },
+    {
+      "epoch": 0.29046898638426627,
+      "grad_norm": 0.49186456203460693,
+      "learning_rate": 8.14950167780603e-05,
+      "loss": 0.635,
+      "step": 144
+    },
+    {
+      "epoch": 0.2924861321230459,
+      "grad_norm": 0.5166196227073669,
+      "learning_rate": 8.12459013316958e-05,
+      "loss": 0.6888,
+      "step": 145
+    },
+    {
+      "epoch": 0.2945032778618255,
+      "grad_norm": 0.5494632720947266,
+      "learning_rate": 8.099550671512202e-05,
+      "loss": 0.6592,
+      "step": 146
+    },
+    {
+      "epoch": 0.29652042360060515,
+      "grad_norm": 0.5840097069740295,
+      "learning_rate": 8.074384317919812e-05,
+      "loss": 0.7137,
+      "step": 147
+    },
+    {
+      "epoch": 0.29853756933938475,
+      "grad_norm": 0.5433554649353027,
+      "learning_rate": 8.049092102673135e-05,
+      "loss": 0.6446,
+      "step": 148
+    },
+    {
+      "epoch": 0.3005547150781644,
+      "grad_norm": 0.5587112903594971,
+      "learning_rate": 8.023675061205519e-05,
+      "loss": 0.6222,
+      "step": 149
+    },
+    {
+      "epoch": 0.30257186081694404,
+      "grad_norm": 0.6611265540122986,
+      "learning_rate": 7.998134234060551e-05,
+      "loss": 0.7365,
+      "step": 150
+    },
+    {
+      "epoch": 0.30458900655572363,
+      "grad_norm": 0.49480316042900085,
+      "learning_rate": 7.972470666849457e-05,
+      "loss": 0.5276,
+      "step": 151
+    },
+    {
+      "epoch": 0.3066061522945033,
+      "grad_norm": 0.5728369951248169,
+      "learning_rate": 7.946685410208296e-05,
+      "loss": 0.6442,
+      "step": 152
+    },
+    {
+      "epoch": 0.3086232980332829,
+      "grad_norm": 0.4378463923931122,
+      "learning_rate": 7.920779519754948e-05,
+      "loss": 0.6289,
+      "step": 153
+    },
+    {
+      "epoch": 0.3106404437720625,
+      "grad_norm": 0.42006805539131165,
+      "learning_rate": 7.894754056045901e-05,
+      "loss": 0.545,
+      "step": 154
+    },
+    {
+      "epoch": 0.31265758951084216,
+      "grad_norm": 0.4681999683380127,
+      "learning_rate": 7.868610084532828e-05,
+      "loss": 0.5703,
+      "step": 155
+    },
+    {
+      "epoch": 0.3146747352496218,
+      "grad_norm": 0.537784218788147,
+      "learning_rate": 7.842348675518968e-05,
+      "loss": 0.6163,
+      "step": 156
+    },
+    {
+      "epoch": 0.3166918809884014,
+      "grad_norm": 0.4741944372653961,
+      "learning_rate": 7.815970904115319e-05,
+      "loss": 0.6115,
+      "step": 157
+    },
+    {
+      "epoch": 0.31870902672718104,
+      "grad_norm": 0.42608484625816345,
+      "learning_rate": 7.789477850196614e-05,
+      "loss": 0.5959,
+      "step": 158
+    },
+    {
+      "epoch": 0.3207261724659607,
+      "grad_norm": 0.44388729333877563,
+      "learning_rate": 7.762870598357115e-05,
+      "loss": 0.5518,
+      "step": 159
+    },
+    {
+      "epoch": 0.3227433182047403,
+      "grad_norm": 0.3990045189857483,
+      "learning_rate": 7.736150237866213e-05,
+      "loss": 0.5256,
+      "step": 160
+    },
+    {
+      "epoch": 0.32476046394351993,
+      "grad_norm": 1.0403790473937988,
+      "learning_rate": 7.709317862623833e-05,
+      "loss": 0.5994,
+      "step": 161
+    },
+    {
+      "epoch": 0.3267776096822995,
+      "grad_norm": 0.38422009348869324,
+      "learning_rate": 7.682374571115651e-05,
+      "loss": 0.5976,
+      "step": 162
+    },
+    {
+      "epoch": 0.32879475542107917,
+      "grad_norm": 0.3627714216709137,
+      "learning_rate": 7.655321466368126e-05,
+      "loss": 0.6134,
+      "step": 163
+    },
+    {
+      "epoch": 0.3308119011598588,
+      "grad_norm": 0.4387800097465515,
+      "learning_rate": 7.628159655903336e-05,
+      "loss": 0.6016,
+      "step": 164
+    },
+    {
+      "epoch": 0.3328290468986384,
+      "grad_norm": 0.38703906536102295,
+      "learning_rate": 7.600890251693645e-05,
+      "loss": 0.5804,
+      "step": 165
+    },
+    {
+      "epoch": 0.33484619263741805,
+      "grad_norm": 0.37045204639434814,
+      "learning_rate": 7.57351437011618e-05,
+      "loss": 0.5176,
+      "step": 166
+    },
+    {
+      "epoch": 0.3368633383761977,
+      "grad_norm": 0.37859445810317993,
+      "learning_rate": 7.546033131907122e-05,
+      "loss": 0.5251,
+      "step": 167
+    },
+    {
+      "epoch": 0.3388804841149773,
+      "grad_norm": 0.3897157907485962,
+      "learning_rate": 7.51844766211583e-05,
+      "loss": 0.5355,
+      "step": 168
+    },
+    {
+      "epoch": 0.34089762985375693,
+      "grad_norm": 0.384641170501709,
+      "learning_rate": 7.490759090058778e-05,
+      "loss": 0.5872,
+      "step": 169
+    },
+    {
+      "epoch": 0.3429147755925366,
+      "grad_norm": 0.3863964378833771,
+      "learning_rate": 7.462968549273326e-05,
+      "loss": 0.5901,
+      "step": 170
+    },
+    {
+      "epoch": 0.34493192133131617,
+      "grad_norm": 0.3983001708984375,
+      "learning_rate": 7.435077177471315e-05,
+      "loss": 0.5495,
+      "step": 171
+    },
+    {
+      "epoch": 0.3469490670700958,
+      "grad_norm": 0.419036865234375,
+      "learning_rate": 7.407086116492484e-05,
+      "loss": 0.5547,
+      "step": 172
+    },
+    {
+      "epoch": 0.34896621280887546,
+      "grad_norm": 0.39494702219963074,
+      "learning_rate": 7.378996512257735e-05,
+      "loss": 0.5727,
+      "step": 173
+    },
+    {
+      "epoch": 0.35098335854765506,
+      "grad_norm": 0.3954225182533264,
+      "learning_rate": 7.35080951472221e-05,
+      "loss": 0.5669,
+      "step": 174
+    },
+    {
+      "epoch": 0.3530005042864347,
+      "grad_norm": 0.43136945366859436,
+      "learning_rate": 7.322526277828216e-05,
+      "loss": 0.6174,
+      "step": 175
+    },
+    {
+      "epoch": 0.35501765002521435,
+      "grad_norm": 0.37220558524131775,
+      "learning_rate": 7.294147959457989e-05,
+      "loss": 0.5389,
+      "step": 176
+    },
+    {
+      "epoch": 0.35703479576399394,
+      "grad_norm": 0.40839049220085144,
+      "learning_rate": 7.265675721386285e-05,
+      "loss": 0.6032,
+      "step": 177
+    },
+    {
+      "epoch": 0.3590519415027736,
+      "grad_norm": 0.37361785769462585,
+      "learning_rate": 7.237110729232825e-05,
+      "loss": 0.5343,
+      "step": 178
+    },
+    {
+      "epoch": 0.3610690872415532,
+      "grad_norm": 0.4027640223503113,
+      "learning_rate": 7.208454152414571e-05,
+      "loss": 0.5687,
+      "step": 179
+    },
+    {
+      "epoch": 0.3630862329803328,
+      "grad_norm": 0.4593392014503479,
+      "learning_rate": 7.179707164097851e-05,
+      "loss": 0.633,
+      "step": 180
+    },
+    {
+      "epoch": 0.36510337871911247,
+      "grad_norm": 0.3781510889530182,
+      "learning_rate": 7.150870941150336e-05,
+      "loss": 0.5225,
+      "step": 181
+    },
+    {
+      "epoch": 0.36712052445789206,
+      "grad_norm": 0.40543264150619507,
+      "learning_rate": 7.12194666409285e-05,
+      "loss": 0.5756,
+      "step": 182
+    },
+    {
+      "epoch": 0.3691376701966717,
+      "grad_norm": 0.42058733105659485,
+      "learning_rate": 7.092935517051058e-05,
+      "loss": 0.6274,
+      "step": 183
+    },
+    {
+      "epoch": 0.37115481593545135,
+      "grad_norm": 0.4416365623474121,
+      "learning_rate": 7.063838687706971e-05,
+      "loss": 0.6606,
+      "step": 184
+    },
+    {
+      "epoch": 0.37317196167423095,
+      "grad_norm": 0.4479544460773468,
+      "learning_rate": 7.034657367250337e-05,
+      "loss": 0.5858,
+      "step": 185
+    },
+    {
+      "epoch": 0.3751891074130106,
+      "grad_norm": 0.45392027497291565,
+      "learning_rate": 7.005392750329868e-05,
+      "loss": 0.6059,
+      "step": 186
+    },
+    {
+      "epoch": 0.37720625315179024,
+      "grad_norm": 0.4504959285259247,
+      "learning_rate": 6.976046035004335e-05,
+      "loss": 0.5292,
+      "step": 187
+    },
+    {
+      "epoch": 0.37922339889056983,
+      "grad_norm": 0.4504992365837097,
+      "learning_rate": 6.946618422693521e-05,
+      "loss": 0.5815,
+      "step": 188
+    },
+    {
+      "epoch": 0.3812405446293495,
+      "grad_norm": 0.44799065589904785,
+      "learning_rate": 6.917111118129035e-05,
+      "loss": 0.5636,
+      "step": 189
+    },
+    {
+      "epoch": 0.3832576903681291,
+      "grad_norm": 0.49823272228240967,
+      "learning_rate": 6.887525329304994e-05,
+      "loss": 0.7133,
+      "step": 190
+    },
+    {
+      "epoch": 0.3852748361069087,
+      "grad_norm": 0.50130295753479,
+      "learning_rate": 6.857862267428563e-05,
+      "loss": 0.6892,
+      "step": 191
+    },
+    {
+      "epoch": 0.38729198184568836,
+      "grad_norm": 0.47600454092025757,
+      "learning_rate": 6.828123146870383e-05,
+      "loss": 0.5957,
+      "step": 192
+    },
+    {
+      "epoch": 0.38930912758446795,
+      "grad_norm": 0.44867298007011414,
+      "learning_rate": 6.79830918511484e-05,
+      "loss": 0.6613,
+      "step": 193
+    },
+    {
+      "epoch": 0.3913262733232476,
+      "grad_norm": 0.4927201569080353,
+      "learning_rate": 6.76842160271023e-05,
+      "loss": 0.6486,
+      "step": 194
+    },
+    {
+      "epoch": 0.39334341906202724,
+      "grad_norm": 0.43527671694755554,
+      "learning_rate": 6.738461623218795e-05,
+      "loss": 0.5978,
+      "step": 195
+    },
+    {
+      "epoch": 0.39536056480080684,
+      "grad_norm": 0.5360156297683716,
+      "learning_rate": 6.708430473166628e-05,
+      "loss": 0.6932,
+      "step": 196
+    },
+    {
+      "epoch": 0.3973777105395865,
+      "grad_norm": 0.5388275384902954,
+      "learning_rate": 6.678329381993458e-05,
+      "loss": 0.6456,
+      "step": 197
+    },
+    {
+      "epoch": 0.39939485627836613,
+      "grad_norm": 0.544686496257782,
+      "learning_rate": 6.648159582002322e-05,
+      "loss": 0.6551,
+      "step": 198
+    },
+    {
+      "epoch": 0.4014120020171457,
+      "grad_norm": 0.6613960266113281,
+      "learning_rate": 6.617922308309115e-05,
+      "loss": 0.6676,
+      "step": 199
+    },
+    {
+      "epoch": 0.40342914775592537,
+      "grad_norm": 0.7599596381187439,
+      "learning_rate": 6.587618798792022e-05,
+      "loss": 0.6209,
+      "step": 200
+    },
+    {
+      "epoch": 0.405446293494705,
+      "grad_norm": 0.3868520259857178,
+      "learning_rate": 6.557250294040849e-05,
+      "loss": 0.505,
+      "step": 201
+    },
+    {
+      "epoch": 0.4074634392334846,
+      "grad_norm": 0.4449853003025055,
+      "learning_rate": 6.526818037306228e-05,
+      "loss": 0.5904,
+      "step": 202
+    },
+    {
+      "epoch": 0.40948058497226425,
+      "grad_norm": 0.45996958017349243,
+      "learning_rate": 6.496323274448721e-05,
+      "loss": 0.543,
+      "step": 203
+    },
+    {
+      "epoch": 0.4114977307110439,
+      "grad_norm": 0.49261391162872314,
+      "learning_rate": 6.46576725388782e-05,
+      "loss": 0.6735,
+      "step": 204
+    },
+    {
+      "epoch": 0.4135148764498235,
+      "grad_norm": 0.43575477600097656,
+      "learning_rate": 6.435151226550829e-05,
+      "loss": 0.5711,
+      "step": 205
+    },
+    {
+      "epoch": 0.41553202218860313,
+      "grad_norm": 0.44983670115470886,
+      "learning_rate": 6.404476445821663e-05,
+      "loss": 0.5746,
+      "step": 206
+    },
+    {
+      "epoch": 0.4175491679273828,
+      "grad_norm": 0.3756658136844635,
+      "learning_rate": 6.373744167489531e-05,
+      "loss": 0.5298,
+      "step": 207
+    },
+    {
+      "epoch": 0.41956631366616237,
+      "grad_norm": 0.4081321954727173,
+      "learning_rate": 6.342955649697523e-05,
+      "loss": 0.6043,
+      "step": 208
+    },
+    {
+      "epoch": 0.421583459404942,
+      "grad_norm": 0.42804038524627686,
+      "learning_rate": 6.312112152891107e-05,
+      "loss": 0.5759,
+      "step": 209
+    },
+    {
+      "epoch": 0.4236006051437216,
+      "grad_norm": 0.3999483287334442,
+      "learning_rate": 6.28121493976653e-05,
+      "loss": 0.5183,
+      "step": 210
+    },
+    {
+      "epoch": 0.42561775088250126,
+      "grad_norm": 0.40822234749794006,
+      "learning_rate": 6.250265275219116e-05,
+      "loss": 0.6014,
+      "step": 211
+    },
+    {
+      "epoch": 0.4276348966212809,
+      "grad_norm": 0.388310045003891,
+      "learning_rate": 6.219264426291494e-05,
+      "loss": 0.5951,
+      "step": 212
+    },
+    {
+      "epoch": 0.4296520423600605,
+      "grad_norm": 0.39607641100883484,
+      "learning_rate": 6.188213662121716e-05,
+      "loss": 0.5743,
+      "step": 213
+    },
+    {
+      "epoch": 0.43166918809884014,
+      "grad_norm": 0.37904173135757446,
+      "learning_rate": 6.157114253891307e-05,
+      "loss": 0.5331,
+      "step": 214
+    },
+    {
+      "epoch": 0.4336863338376198,
+      "grad_norm": 0.36542603373527527,
+      "learning_rate": 6.125967474773223e-05,
+      "loss": 0.5145,
+      "step": 215
+    },
+    {
+      "epoch": 0.4357034795763994,
+      "grad_norm": 0.37045159935951233,
+      "learning_rate": 6.0947745998797266e-05,
+      "loss": 0.5091,
+      "step": 216
+    },
+    {
+      "epoch": 0.437720625315179,
+      "grad_norm": 0.41620463132858276,
+      "learning_rate": 6.0635369062101875e-05,
+      "loss": 0.6119,
+      "step": 217
+    },
+    {
+      "epoch": 0.43973777105395867,
+      "grad_norm": 0.4387664794921875,
+      "learning_rate": 6.032255672598803e-05,
+      "loss": 0.6682,
+      "step": 218
+    },
+    {
+      "epoch": 0.44175491679273826,
+      "grad_norm": 0.4237648844718933,
+      "learning_rate": 6.0009321796622444e-05,
+      "loss": 0.5406,
+      "step": 219
+    },
+    {
+      "epoch": 0.4437720625315179,
+      "grad_norm": 0.43322858214378357,
+      "learning_rate": 5.969567709747228e-05,
+      "loss": 0.5316,
+      "step": 220
+    },
+    {
+      "epoch": 0.44578920827029755,
+      "grad_norm": 0.4600161910057068,
+      "learning_rate": 5.938163546878024e-05,
+      "loss": 0.567,
+      "step": 221
+    },
+    {
+      "epoch": 0.44780635400907715,
+      "grad_norm": 0.3887118399143219,
+      "learning_rate": 5.906720976703877e-05,
+      "loss": 0.5762,
+      "step": 222
+    },
+    {
+      "epoch": 0.4498234997478568,
+      "grad_norm": 0.4044482409954071,
+      "learning_rate": 5.87524128644639e-05,
+      "loss": 0.6196,
+      "step": 223
+    },
+    {
+      "epoch": 0.4518406454866364,
+      "grad_norm": 0.4436338543891907,
+      "learning_rate": 5.843725764846812e-05,
+      "loss": 0.5333,
+      "step": 224
+    },
+    {
+      "epoch": 0.45385779122541603,
+      "grad_norm": 0.454662948846817,
+      "learning_rate": 5.812175702113286e-05,
+      "loss": 0.557,
+      "step": 225
+    },
+    {
+      "epoch": 0.4558749369641957,
+      "grad_norm": 0.445640504360199,
+      "learning_rate": 5.7805923898680305e-05,
+      "loss": 0.5332,
+      "step": 226
+    },
+    {
+      "epoch": 0.45789208270297527,
+      "grad_norm": 0.4665926992893219,
+      "learning_rate": 5.7489771210944564e-05,
+      "loss": 0.6367,
+      "step": 227
+    },
+    {
+      "epoch": 0.4599092284417549,
+      "grad_norm": 0.45142456889152527,
+      "learning_rate": 5.717331190084243e-05,
+      "loss": 0.5792,
+      "step": 228
+    },
+    {
+      "epoch": 0.46192637418053456,
+      "grad_norm": 0.4474796652793884,
+      "learning_rate": 5.6856558923843364e-05,
+      "loss": 0.575,
+      "step": 229
+    },
+    {
+      "epoch": 0.46394351991931415,
+      "grad_norm": 0.4471490681171417,
+      "learning_rate": 5.6539525247439274e-05,
+      "loss": 0.5461,
+      "step": 230
+    },
+    {
+      "epoch": 0.4659606656580938,
+      "grad_norm": 0.4579067528247833,
+      "learning_rate": 5.622222385061353e-05,
+      "loss": 0.6183,
+      "step": 231
+    },
+    {
+      "epoch": 0.46797781139687344,
+      "grad_norm": 0.4598318636417389,
+      "learning_rate": 5.590466772330968e-05,
+      "loss": 0.6318,
+      "step": 232
+    },
+    {
+      "epoch": 0.46999495713565304,
+      "grad_norm": 0.4309539496898651,
+      "learning_rate": 5.558686986589963e-05,
+      "loss": 0.6644,
+      "step": 233
+    },
+    {
+      "epoch": 0.4720121028744327,
+      "grad_norm": 0.46067124605178833,
+      "learning_rate": 5.526884328865142e-05,
+      "loss": 0.6401,
+      "step": 234
+    },
+    {
+      "epoch": 0.47402924861321233,
+      "grad_norm": 0.450652539730072,
+      "learning_rate": 5.495060101119662e-05,
+      "loss": 0.5475,
+      "step": 235
+    },
+    {
+      "epoch": 0.4760463943519919,
+      "grad_norm": 0.44922497868537903,
+      "learning_rate": 5.463215606199733e-05,
+      "loss": 0.6372,
+      "step": 236
+    },
+    {
+      "epoch": 0.47806354009077157,
+      "grad_norm": 0.4988616704940796,
+      "learning_rate": 5.431352147781275e-05,
+      "loss": 0.6847,
+      "step": 237
+    },
+    {
+      "epoch": 0.4800806858295512,
+      "grad_norm": 0.45364323258399963,
+      "learning_rate": 5.399471030316554e-05,
+      "loss": 0.6203,
+      "step": 238
+    },
+    {
+      "epoch": 0.4820978315683308,
+      "grad_norm": 0.4748307466506958,
+      "learning_rate": 5.367573558980775e-05,
+      "loss": 0.571,
+      "step": 239
+    },
+    {
+      "epoch": 0.48411497730711045,
+      "grad_norm": 0.46398502588272095,
+      "learning_rate": 5.335661039618653e-05,
+      "loss": 0.6339,
+      "step": 240
+    },
+    {
+      "epoch": 0.48613212304589004,
+      "grad_norm": 0.49548378586769104,
+      "learning_rate": 5.3037347786909495e-05,
+      "loss": 0.6616,
+      "step": 241
+    },
+    {
+      "epoch": 0.4881492687846697,
+      "grad_norm": 0.4696505665779114,
+      "learning_rate": 5.2717960832209914e-05,
+      "loss": 0.6289,
+      "step": 242
+    },
+    {
+      "epoch": 0.49016641452344933,
+      "grad_norm": 0.4907710552215576,
+      "learning_rate": 5.239846260741158e-05,
+      "loss": 0.5494,
+      "step": 243
+    },
+    {
+      "epoch": 0.4921835602622289,
+      "grad_norm": 0.522820234298706,
+      "learning_rate": 5.2078866192393574e-05,
+      "loss": 0.6741,
+      "step": 244
+    },
+    {
+      "epoch": 0.49420070600100857,
+      "grad_norm": 0.5473442673683167,
+      "learning_rate": 5.1759184671054785e-05,
+      "loss": 0.6574,
+      "step": 245
+    },
+    {
+      "epoch": 0.4962178517397882,
+      "grad_norm": 0.5453885197639465,
+      "learning_rate": 5.1439431130778206e-05,
+      "loss": 0.6463,
+      "step": 246
+    },
+    {
+      "epoch": 0.4982349974785678,
+      "grad_norm": 0.5924301743507385,
+      "learning_rate": 5.111961866189524e-05,
+      "loss": 0.675,
+      "step": 247
+    },
+    {
+      "epoch": 0.5002521432173475,
+      "grad_norm": 0.6364018321037292,
+      "learning_rate": 5.079976035714976e-05,
+      "loss": 0.7198,
+      "step": 248
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.194181341983539e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null