Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94896ae97603ccc2cb66508569f08f59788cb1e5f442b35c03d94fafd4c24c8d
 size 2373352

 version https://git-lfs.github.com/spec/v1
+oid sha256:78a8782148c22fb3a740b574b5d8b47ae824d83995cde93fba1aa18d29a0bd98
 size 2373352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:408894d778f842c88b7df68c5144a7b5cdd5c01b093f73b0e5c640a8fa763406
 size 4830714

 version https://git-lfs.github.com/spec/v1
+oid sha256:364904d28c9205fbc60077be793ffcf15e8951842789eac20273d3cf97012a21
 size 4830714

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:598b1fa9c5387e2f2e00f6917e940a1d346bf0019f0e8f98a40b2e8c993fef35
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:08f4a7fd97b67479128a873431907df489602aa70981f3cfdaf86ed24d2527a3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48a1533051e4bee653afc683a4359c329f95831c0354ae8442616cabf80d0caa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5efc880080e0854e5765c7a5ea108f46d0ccbf30b054c9bbb5a2162a1c5babf
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.792704582214355,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.017157563625965114,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 36.548,
       "eval_steps_per_second": 9.145,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 41317171200000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.789202690124512,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.02287675150128682,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 36.548,
       "eval_steps_per_second": 9.145,
       "step": 150
+    },
+    {
+      "epoch": 0.017271947383471545,
+      "grad_norm": 0.24707955121994019,
+      "learning_rate": 2.5815263157894736e-05,
+      "loss": 11.8637,
+      "step": 151
+    },
+    {
+      "epoch": 0.01738633114097798,
+      "grad_norm": 0.24718506634235382,
+      "learning_rate": 2.528842105263158e-05,
+      "loss": 11.8212,
+      "step": 152
+    },
+    {
+      "epoch": 0.017500714898484416,
+      "grad_norm": 0.33310824632644653,
+      "learning_rate": 2.4761578947368418e-05,
+      "loss": 11.8403,
+      "step": 153
+    },
+    {
+      "epoch": 0.017615098655990848,
+      "grad_norm": 0.2045665830373764,
+      "learning_rate": 2.423473684210526e-05,
+      "loss": 11.8194,
+      "step": 154
+    },
+    {
+      "epoch": 0.017729482413497283,
+      "grad_norm": 0.17509344220161438,
+      "learning_rate": 2.3707894736842103e-05,
+      "loss": 11.8122,
+      "step": 155
+    },
+    {
+      "epoch": 0.017843866171003718,
+      "grad_norm": 0.21512837707996368,
+      "learning_rate": 2.3181052631578946e-05,
+      "loss": 11.7928,
+      "step": 156
+    },
+    {
+      "epoch": 0.017958249928510153,
+      "grad_norm": 0.26602622866630554,
+      "learning_rate": 2.265421052631579e-05,
+      "loss": 11.8055,
+      "step": 157
+    },
+    {
+      "epoch": 0.018072633686016585,
+      "grad_norm": 0.1669163554906845,
+      "learning_rate": 2.212736842105263e-05,
+      "loss": 11.8191,
+      "step": 158
+    },
+    {
+      "epoch": 0.01818701744352302,
+      "grad_norm": 0.1745176613330841,
+      "learning_rate": 2.1600526315789474e-05,
+      "loss": 11.7985,
+      "step": 159
+    },
+    {
+      "epoch": 0.018301401201029455,
+      "grad_norm": 0.22357724606990814,
+      "learning_rate": 2.1073684210526313e-05,
+      "loss": 11.8164,
+      "step": 160
+    },
+    {
+      "epoch": 0.018415784958535887,
+      "grad_norm": 0.18855567276477814,
+      "learning_rate": 2.0546842105263155e-05,
+      "loss": 11.7984,
+      "step": 161
+    },
+    {
+      "epoch": 0.018530168716042322,
+      "grad_norm": 0.17857414484024048,
+      "learning_rate": 2.002e-05,
+      "loss": 11.826,
+      "step": 162
+    },
+    {
+      "epoch": 0.018644552473548757,
+      "grad_norm": 0.16557997465133667,
+      "learning_rate": 1.949315789473684e-05,
+      "loss": 11.8069,
+      "step": 163
+    },
+    {
+      "epoch": 0.01875893623105519,
+      "grad_norm": 0.14565524458885193,
+      "learning_rate": 1.8966315789473683e-05,
+      "loss": 11.7757,
+      "step": 164
+    },
+    {
+      "epoch": 0.018873319988561624,
+      "grad_norm": 0.224319726228714,
+      "learning_rate": 1.8439473684210522e-05,
+      "loss": 11.7687,
+      "step": 165
+    },
+    {
+      "epoch": 0.01898770374606806,
+      "grad_norm": 0.18438120186328888,
+      "learning_rate": 1.791263157894737e-05,
+      "loss": 11.7766,
+      "step": 166
+    },
+    {
+      "epoch": 0.01910208750357449,
+      "grad_norm": 0.2236049324274063,
+      "learning_rate": 1.738578947368421e-05,
+      "loss": 11.7945,
+      "step": 167
+    },
+    {
+      "epoch": 0.019216471261080927,
+      "grad_norm": 0.21375702321529388,
+      "learning_rate": 1.685894736842105e-05,
+      "loss": 11.7835,
+      "step": 168
+    },
+    {
+      "epoch": 0.019330855018587362,
+      "grad_norm": 0.164867103099823,
+      "learning_rate": 1.6332105263157893e-05,
+      "loss": 11.8334,
+      "step": 169
+    },
+    {
+      "epoch": 0.019445238776093794,
+      "grad_norm": 0.1830967664718628,
+      "learning_rate": 1.5805263157894735e-05,
+      "loss": 11.7627,
+      "step": 170
+    },
+    {
+      "epoch": 0.01955962253360023,
+      "grad_norm": 0.516990602016449,
+      "learning_rate": 1.5278421052631578e-05,
+      "loss": 11.7963,
+      "step": 171
+    },
+    {
+      "epoch": 0.019674006291106664,
+      "grad_norm": 0.2165108621120453,
+      "learning_rate": 1.4751578947368419e-05,
+      "loss": 11.7813,
+      "step": 172
+    },
+    {
+      "epoch": 0.019788390048613096,
+      "grad_norm": 0.20191384851932526,
+      "learning_rate": 1.4224736842105262e-05,
+      "loss": 11.7721,
+      "step": 173
+    },
+    {
+      "epoch": 0.01990277380611953,
+      "grad_norm": 0.16778965294361115,
+      "learning_rate": 1.3697894736842106e-05,
+      "loss": 11.7968,
+      "step": 174
+    },
+    {
+      "epoch": 0.020017157563625966,
+      "grad_norm": 0.16983447968959808,
+      "learning_rate": 1.3171052631578945e-05,
+      "loss": 11.7666,
+      "step": 175
+    },
+    {
+      "epoch": 0.020131541321132398,
+      "grad_norm": 0.18451401591300964,
+      "learning_rate": 1.264421052631579e-05,
+      "loss": 11.7896,
+      "step": 176
+    },
+    {
+      "epoch": 0.020245925078638833,
+      "grad_norm": 0.16675086319446564,
+      "learning_rate": 1.211736842105263e-05,
+      "loss": 11.8059,
+      "step": 177
+    },
+    {
+      "epoch": 0.020360308836145268,
+      "grad_norm": 0.15788999199867249,
+      "learning_rate": 1.1590526315789473e-05,
+      "loss": 11.7934,
+      "step": 178
+    },
+    {
+      "epoch": 0.0204746925936517,
+      "grad_norm": 0.2428436577320099,
+      "learning_rate": 1.1063684210526316e-05,
+      "loss": 11.7346,
+      "step": 179
+    },
+    {
+      "epoch": 0.020589076351158135,
+      "grad_norm": 0.2078651338815689,
+      "learning_rate": 1.0536842105263156e-05,
+      "loss": 11.7503,
+      "step": 180
+    },
+    {
+      "epoch": 0.02070346010866457,
+      "grad_norm": 0.1591472029685974,
+      "learning_rate": 1.001e-05,
+      "loss": 11.7827,
+      "step": 181
+    },
+    {
+      "epoch": 0.020817843866171002,
+      "grad_norm": 0.2180211991071701,
+      "learning_rate": 9.483157894736842e-06,
+      "loss": 11.7671,
+      "step": 182
+    },
+    {
+      "epoch": 0.020932227623677437,
+      "grad_norm": 0.1913239061832428,
+      "learning_rate": 8.956315789473684e-06,
+      "loss": 11.7801,
+      "step": 183
+    },
+    {
+      "epoch": 0.021046611381183872,
+      "grad_norm": 0.23337297141551971,
+      "learning_rate": 8.429473684210525e-06,
+      "loss": 11.7463,
+      "step": 184
+    },
+    {
+      "epoch": 0.021160995138690308,
+      "grad_norm": 0.2319062501192093,
+      "learning_rate": 7.902631578947368e-06,
+      "loss": 11.7445,
+      "step": 185
+    },
+    {
+      "epoch": 0.02127537889619674,
+      "grad_norm": 0.2010311484336853,
+      "learning_rate": 7.3757894736842095e-06,
+      "loss": 11.7733,
+      "step": 186
+    },
+    {
+      "epoch": 0.021389762653703175,
+      "grad_norm": 0.25788718461990356,
+      "learning_rate": 6.848947368421053e-06,
+      "loss": 11.752,
+      "step": 187
+    },
+    {
+      "epoch": 0.02150414641120961,
+      "grad_norm": 0.17638243734836578,
+      "learning_rate": 6.322105263157895e-06,
+      "loss": 11.7793,
+      "step": 188
+    },
+    {
+      "epoch": 0.02161853016871604,
+      "grad_norm": 0.2046917825937271,
+      "learning_rate": 5.7952631578947365e-06,
+      "loss": 11.7657,
+      "step": 189
+    },
+    {
+      "epoch": 0.021732913926222477,
+      "grad_norm": 0.2301589995622635,
+      "learning_rate": 5.268421052631578e-06,
+      "loss": 11.7546,
+      "step": 190
+    },
+    {
+      "epoch": 0.021847297683728912,
+      "grad_norm": 0.21123434603214264,
+      "learning_rate": 4.741578947368421e-06,
+      "loss": 11.832,
+      "step": 191
+    },
+    {
+      "epoch": 0.021961681441235344,
+      "grad_norm": 0.20363949239253998,
+      "learning_rate": 4.2147368421052626e-06,
+      "loss": 11.7615,
+      "step": 192
+    },
+    {
+      "epoch": 0.02207606519874178,
+      "grad_norm": 0.2523394525051117,
+      "learning_rate": 3.6878947368421047e-06,
+      "loss": 11.8368,
+      "step": 193
+    },
+    {
+      "epoch": 0.022190448956248214,
+      "grad_norm": 0.21425357460975647,
+      "learning_rate": 3.1610526315789474e-06,
+      "loss": 11.769,
+      "step": 194
+    },
+    {
+      "epoch": 0.022304832713754646,
+      "grad_norm": 0.3113345503807068,
+      "learning_rate": 2.634210526315789e-06,
+      "loss": 11.792,
+      "step": 195
+    },
+    {
+      "epoch": 0.02241921647126108,
+      "grad_norm": 0.3299500644207001,
+      "learning_rate": 2.1073684210526313e-06,
+      "loss": 11.8106,
+      "step": 196
+    },
+    {
+      "epoch": 0.022533600228767516,
+      "grad_norm": 0.3436274230480194,
+      "learning_rate": 1.5805263157894737e-06,
+      "loss": 11.8191,
+      "step": 197
+    },
+    {
+      "epoch": 0.022647983986273948,
+      "grad_norm": 0.3444826900959015,
+      "learning_rate": 1.0536842105263156e-06,
+      "loss": 11.7976,
+      "step": 198
+    },
+    {
+      "epoch": 0.022762367743780383,
+      "grad_norm": 0.3153350055217743,
+      "learning_rate": 5.268421052631578e-07,
+      "loss": 11.783,
+      "step": 199
+    },
+    {
+      "epoch": 0.02287675150128682,
+      "grad_norm": 0.34337830543518066,
+      "learning_rate": 0.0,
+      "loss": 11.8164,
+      "step": 200
+    },
+    {
+      "epoch": 0.02287675150128682,
+      "eval_loss": 11.789202690124512,
+      "eval_runtime": 100.6996,
+      "eval_samples_per_second": 36.554,
+      "eval_steps_per_second": 9.146,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 55089561600000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null