Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6183fcd765362b46d7b2ee32af8283901109a474c5daae11cae8501a5f4dce42
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:e255a3bf76f9c80de3cfe450532822ca289f621c6d171dfc4e73ced437d756e7
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a9346416183890ea8f06eb809113390b750c8ce78028bbbd1112e7381e4b7f2
 size 170920084

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa1b7c6fb34d453805bac2195d6cccbd5a438316d5666bbd4d9e21485c319b23
 size 170920084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1994d92a9b3c122ae8e15eecf0804ea2a2fc696b1ddee92eccf0d81c63eb854
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5ad8286ea66b701d63e50598db6aef980a8ece5be55ee5d74d7201d2703c34f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3eb0f7a710adf9e599fc08ebbb5ad60e429eaaf1eaeec204e7c52cb45e30b9cf
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60d6f1383abda4776549360effee800fe6cfe2c0604503e9e3fbaa79347f790
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.46051260828971863,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08565921907345278,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 4.948,
       "eval_steps_per_second": 1.238,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.629974314842849e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.4560202658176422,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.11421229209793704,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.948,
       "eval_steps_per_second": 1.238,
       "step": 150
+    },
+    {
+      "epoch": 0.08623028053394247,
+      "grad_norm": 3.791814088821411,
+      "learning_rate": 1.339526225485725e-05,
+      "loss": 3.4645,
+      "step": 151
+    },
+    {
+      "epoch": 0.08680134199443215,
+      "grad_norm": 3.796891212463379,
+      "learning_rate": 1.2890114781136224e-05,
+      "loss": 3.4475,
+      "step": 152
+    },
+    {
+      "epoch": 0.08737240345492184,
+      "grad_norm": 3.6403725147247314,
+      "learning_rate": 1.239251781296245e-05,
+      "loss": 3.1833,
+      "step": 153
+    },
+    {
+      "epoch": 0.08794346491541152,
+      "grad_norm": 3.6974196434020996,
+      "learning_rate": 1.1902641279244715e-05,
+      "loss": 3.3696,
+      "step": 154
+    },
+    {
+      "epoch": 0.08851452637590121,
+      "grad_norm": 3.578291893005371,
+      "learning_rate": 1.1420652472370497e-05,
+      "loss": 3.1136,
+      "step": 155
+    },
+    {
+      "epoch": 0.0890855878363909,
+      "grad_norm": 3.4341652393341064,
+      "learning_rate": 1.0946715991075805e-05,
+      "loss": 2.9641,
+      "step": 156
+    },
+    {
+      "epoch": 0.08965664929688058,
+      "grad_norm": 3.451934576034546,
+      "learning_rate": 1.0480993684234815e-05,
+      "loss": 3.1792,
+      "step": 157
+    },
+    {
+      "epoch": 0.09022771075737027,
+      "grad_norm": 3.3800251483917236,
+      "learning_rate": 1.0023644595588671e-05,
+      "loss": 3.3144,
+      "step": 158
+    },
+    {
+      "epoch": 0.09079877221785995,
+      "grad_norm": 3.479318618774414,
+      "learning_rate": 9.57482490943216e-06,
+      "loss": 3.1648,
+      "step": 159
+    },
+    {
+      "epoch": 0.09136983367834964,
+      "grad_norm": 3.491649866104126,
+      "learning_rate": 9.134687897276934e-06,
+      "loss": 3.3233,
+      "step": 160
+    },
+    {
+      "epoch": 0.09194089513883932,
+      "grad_norm": 3.484065055847168,
+      "learning_rate": 8.703383865509432e-06,
+      "loss": 3.5239,
+      "step": 161
+    },
+    {
+      "epoch": 0.09251195659932901,
+      "grad_norm": 3.6290812492370605,
+      "learning_rate": 8.281060104061394e-06,
+      "loss": 3.2998,
+      "step": 162
+    },
+    {
+      "epoch": 0.09308301805981868,
+      "grad_norm": 3.6152350902557373,
+      "learning_rate": 7.867860836110453e-06,
+      "loss": 3.7931,
+      "step": 163
+    },
+    {
+      "epoch": 0.09365407952030837,
+      "grad_norm": 3.5166842937469482,
+      "learning_rate": 7.463927168828087e-06,
+      "loss": 3.5036,
+      "step": 164
+    },
+    {
+      "epoch": 0.09422514098079805,
+      "grad_norm": 3.266004800796509,
+      "learning_rate": 7.069397045191617e-06,
+      "loss": 2.7839,
+      "step": 165
+    },
+    {
+      "epoch": 0.09479620244128774,
+      "grad_norm": 3.5984013080596924,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 3.7792,
+      "step": 166
+    },
+    {
+      "epoch": 0.09536726390177742,
+      "grad_norm": 3.4622771739959717,
+      "learning_rate": 6.309083098247264e-06,
+      "loss": 3.4876,
+      "step": 167
+    },
+    {
+      "epoch": 0.09593832536226711,
+      "grad_norm": 3.397523880004883,
+      "learning_rate": 5.943558921455733e-06,
+      "loss": 3.202,
+      "step": 168
+    },
+    {
+      "epoch": 0.0965093868227568,
+      "grad_norm": 3.561593770980835,
+      "learning_rate": 5.587957492673759e-06,
+      "loss": 3.5857,
+      "step": 169
+    },
+    {
+      "epoch": 0.09708044828324648,
+      "grad_norm": 3.7165884971618652,
+      "learning_rate": 5.2424002494635095e-06,
+      "loss": 3.6163,
+      "step": 170
+    },
+    {
+      "epoch": 0.09765150974373617,
+      "grad_norm": 3.710353374481201,
+      "learning_rate": 4.9070051993069636e-06,
+      "loss": 3.9078,
+      "step": 171
+    },
+    {
+      "epoch": 0.09822257120422585,
+      "grad_norm": 3.53379225730896,
+      "learning_rate": 4.581886879306507e-06,
+      "loss": 3.5316,
+      "step": 172
+    },
+    {
+      "epoch": 0.09879363266471554,
+      "grad_norm": 3.5342020988464355,
+      "learning_rate": 4.2671563170705725e-06,
+      "loss": 3.5556,
+      "step": 173
+    },
+    {
+      "epoch": 0.09936469412520522,
+      "grad_norm": 3.5446951389312744,
+      "learning_rate": 3.962920992797834e-06,
+      "loss": 3.4027,
+      "step": 174
+    },
+    {
+      "epoch": 0.09993575558569491,
+      "grad_norm": 3.7609660625457764,
+      "learning_rate": 3.6692848025728216e-06,
+      "loss": 4.0196,
+      "step": 175
+    },
+    {
+      "epoch": 0.1005068170461846,
+      "grad_norm": 3.5949559211730957,
+      "learning_rate": 3.38634802288549e-06,
+      "loss": 3.7018,
+      "step": 176
+    },
+    {
+      "epoch": 0.10107787850667428,
+      "grad_norm": 3.4899206161499023,
+      "learning_rate": 3.1142072763869042e-06,
+      "loss": 3.4402,
+      "step": 177
+    },
+    {
+      "epoch": 0.10164893996716397,
+      "grad_norm": 3.4958362579345703,
+      "learning_rate": 2.852955498892694e-06,
+      "loss": 3.6609,
+      "step": 178
+    },
+    {
+      "epoch": 0.10222000142765365,
+      "grad_norm": 3.7367665767669678,
+      "learning_rate": 2.6026819076455325e-06,
+      "loss": 3.9386,
+      "step": 179
+    },
+    {
+      "epoch": 0.10279106288814334,
+      "grad_norm": 3.6101346015930176,
+      "learning_rate": 2.36347197084755e-06,
+      "loss": 3.589,
+      "step": 180
+    },
+    {
+      "epoch": 0.10336212434863302,
+      "grad_norm": 3.714442491531372,
+      "learning_rate": 2.1354073784730253e-06,
+      "loss": 3.9742,
+      "step": 181
+    },
+    {
+      "epoch": 0.10393318580912271,
+      "grad_norm": 3.8775899410247803,
+      "learning_rate": 1.9185660143713184e-06,
+      "loss": 4.4065,
+      "step": 182
+    },
+    {
+      "epoch": 0.1045042472696124,
+      "grad_norm": 4.041444301605225,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 4.5485,
+      "step": 183
+    },
+    {
+      "epoch": 0.10507530873010208,
+      "grad_norm": 3.8321800231933594,
+      "learning_rate": 1.5188453174845743e-06,
+      "loss": 4.2913,
+      "step": 184
+    },
+    {
+      "epoch": 0.10564637019059177,
+      "grad_norm": 3.849231481552124,
+      "learning_rate": 1.3361024889513333e-06,
+      "loss": 4.6328,
+      "step": 185
+    },
+    {
+      "epoch": 0.10621743165108145,
+      "grad_norm": 3.960550546646118,
+      "learning_rate": 1.16485585057844e-06,
+      "loss": 4.6204,
+      "step": 186
+    },
+    {
+      "epoch": 0.10678849311157114,
+      "grad_norm": 3.7694177627563477,
+      "learning_rate": 1.0051638829360127e-06,
+      "loss": 3.6095,
+      "step": 187
+    },
+    {
+      "epoch": 0.10735955457206081,
+      "grad_norm": 3.936204671859741,
+      "learning_rate": 8.570811206847189e-07,
+      "loss": 4.067,
+      "step": 188
+    },
+    {
+      "epoch": 0.1079306160325505,
+      "grad_norm": 4.248170852661133,
+      "learning_rate": 7.206581339521939e-07,
+      "loss": 4.3996,
+      "step": 189
+    },
+    {
+      "epoch": 0.10850167749304018,
+      "grad_norm": 4.024265766143799,
+      "learning_rate": 5.959415110634375e-07,
+      "loss": 3.9334,
+      "step": 190
+    },
+    {
+      "epoch": 0.10907273895352987,
+      "grad_norm": 4.193387508392334,
+      "learning_rate": 4.829738426309099e-07,
+      "loss": 3.9381,
+      "step": 191
+    },
+    {
+      "epoch": 0.10964380041401955,
+      "grad_norm": 4.102433204650879,
+      "learning_rate": 3.817937070098914e-07,
+      "loss": 3.7026,
+      "step": 192
+    },
+    {
+      "epoch": 0.11021486187450924,
+      "grad_norm": 4.2977142333984375,
+      "learning_rate": 2.9243565712400384e-07,
+      "loss": 3.4472,
+      "step": 193
+    },
+    {
+      "epoch": 0.11078592333499893,
+      "grad_norm": 4.350393295288086,
+      "learning_rate": 2.1493020866542365e-07,
+      "loss": 3.5126,
+      "step": 194
+    },
+    {
+      "epoch": 0.11135698479548861,
+      "grad_norm": 4.65367317199707,
+      "learning_rate": 1.4930382967379363e-07,
+      "loss": 3.6799,
+      "step": 195
+    },
+    {
+      "epoch": 0.1119280462559783,
+      "grad_norm": 4.893271446228027,
+      "learning_rate": 9.557893149741924e-08,
+      "loss": 3.6397,
+      "step": 196
+    },
+    {
+      "epoch": 0.11249910771646798,
+      "grad_norm": 4.931285381317139,
+      "learning_rate": 5.377386113981197e-08,
+      "loss": 3.6552,
+      "step": 197
+    },
+    {
+      "epoch": 0.11307016917695767,
+      "grad_norm": 5.327000141143799,
+      "learning_rate": 2.3902894994198286e-08,
+      "loss": 3.3647,
+      "step": 198
+    },
+    {
+      "epoch": 0.11364123063744735,
+      "grad_norm": 6.226569175720215,
+      "learning_rate": 5.976233968155164e-09,
+      "loss": 3.279,
+      "step": 199
+    },
+    {
+      "epoch": 0.11421229209793704,
+      "grad_norm": 8.364439010620117,
+      "learning_rate": 0.0,
+      "loss": 3.8711,
+      "step": 200
+    },
+    {
+      "epoch": 0.11421229209793704,
+      "eval_loss": 0.4560202658176422,
+      "eval_runtime": 596.8378,
+      "eval_samples_per_second": 4.943,
+      "eval_steps_per_second": 1.237,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.849985165910344e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null