Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c90ac422429a9f6b7e3acd0bda9dcc8b4a31441fee2975f6ab57dbe4bf11f84c
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0323b4a58308037e469680b9bfb1b8b01bfbb35a6c067db3b205ce6afd77e20
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5059700704cd58f2b5f8109094d84c10491d945cb1296e963c5ec271a2aab61d
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:7be146ea174b49420c7a2cec1584b7f51f009e6556fe4b5d72194eaf567a0055
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:952da6182b23026f9eff2a7e4a3318b8ccd9d0189255fad84fcf6db78f38466d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:212ecee7ba5bbad66868b4f13a31101fb6a6055516f2090d01279bc6d5e12910
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef91cf09f7b6a58a39bbfe1ef78ac2fa91c0c15ca1705097a187d272d0433d8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47edf82128c3f034f21204d4b9ce5c76cd3269748ce31061b148a0a389d049d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9893535375595093,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.0243842965130456,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.4,
       "eval_steps_per_second": 2.351,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.349856178601984e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.983881413936615,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.03251239535072747,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.4,
       "eval_steps_per_second": 2.351,
       "step": 150
+    },
+    {
+      "epoch": 0.024546858489799237,
+      "grad_norm": 0.32013484835624695,
+      "learning_rate": 2.5944210526315793e-05,
+      "loss": 0.9826,
+      "step": 151
+    },
+    {
+      "epoch": 0.024709420466552872,
+      "grad_norm": 0.34136995673179626,
+      "learning_rate": 2.5414736842105266e-05,
+      "loss": 0.9456,
+      "step": 152
+    },
+    {
+      "epoch": 0.02487198244330651,
+      "grad_norm": 0.35575228929519653,
+      "learning_rate": 2.4885263157894737e-05,
+      "loss": 0.9309,
+      "step": 153
+    },
+    {
+      "epoch": 0.02503454442006015,
+      "grad_norm": 0.39715224504470825,
+      "learning_rate": 2.4355789473684214e-05,
+      "loss": 1.0874,
+      "step": 154
+    },
+    {
+      "epoch": 0.025197106396813784,
+      "grad_norm": 0.39866402745246887,
+      "learning_rate": 2.3826315789473684e-05,
+      "loss": 0.9358,
+      "step": 155
+    },
+    {
+      "epoch": 0.025359668373567422,
+      "grad_norm": 0.39832454919815063,
+      "learning_rate": 2.3296842105263158e-05,
+      "loss": 1.0743,
+      "step": 156
+    },
+    {
+      "epoch": 0.02552223035032106,
+      "grad_norm": 0.40810534358024597,
+      "learning_rate": 2.2767368421052635e-05,
+      "loss": 0.9787,
+      "step": 157
+    },
+    {
+      "epoch": 0.0256847923270747,
+      "grad_norm": 0.38781747221946716,
+      "learning_rate": 2.2237894736842105e-05,
+      "loss": 0.8989,
+      "step": 158
+    },
+    {
+      "epoch": 0.025847354303828333,
+      "grad_norm": 0.4570434093475342,
+      "learning_rate": 2.170842105263158e-05,
+      "loss": 0.9888,
+      "step": 159
+    },
+    {
+      "epoch": 0.026009916280581972,
+      "grad_norm": 0.4297999143600464,
+      "learning_rate": 2.1178947368421053e-05,
+      "loss": 0.9206,
+      "step": 160
+    },
+    {
+      "epoch": 0.02617247825733561,
+      "grad_norm": 0.42393121123313904,
+      "learning_rate": 2.0649473684210527e-05,
+      "loss": 0.9191,
+      "step": 161
+    },
+    {
+      "epoch": 0.026335040234089245,
+      "grad_norm": 0.43705666065216064,
+      "learning_rate": 2.0120000000000004e-05,
+      "loss": 0.9721,
+      "step": 162
+    },
+    {
+      "epoch": 0.026497602210842883,
+      "grad_norm": 0.46401333808898926,
+      "learning_rate": 1.9590526315789474e-05,
+      "loss": 1.0054,
+      "step": 163
+    },
+    {
+      "epoch": 0.02666016418759652,
+      "grad_norm": 0.45111915469169617,
+      "learning_rate": 1.9061052631578948e-05,
+      "loss": 0.8965,
+      "step": 164
+    },
+    {
+      "epoch": 0.02682272616435016,
+      "grad_norm": 0.4912633001804352,
+      "learning_rate": 1.8531578947368422e-05,
+      "loss": 1.1235,
+      "step": 165
+    },
+    {
+      "epoch": 0.026985288141103795,
+      "grad_norm": 0.551999568939209,
+      "learning_rate": 1.8002105263157896e-05,
+      "loss": 1.0657,
+      "step": 166
+    },
+    {
+      "epoch": 0.027147850117857433,
+      "grad_norm": 0.5471257567405701,
+      "learning_rate": 1.747263157894737e-05,
+      "loss": 1.0765,
+      "step": 167
+    },
+    {
+      "epoch": 0.02731041209461107,
+      "grad_norm": 0.6052240133285522,
+      "learning_rate": 1.6943157894736843e-05,
+      "loss": 1.2019,
+      "step": 168
+    },
+    {
+      "epoch": 0.027472974071364707,
+      "grad_norm": 0.6268660426139832,
+      "learning_rate": 1.6413684210526317e-05,
+      "loss": 1.1699,
+      "step": 169
+    },
+    {
+      "epoch": 0.027635536048118345,
+      "grad_norm": 0.5555728673934937,
+      "learning_rate": 1.588421052631579e-05,
+      "loss": 0.9256,
+      "step": 170
+    },
+    {
+      "epoch": 0.027798098024871983,
+      "grad_norm": 0.6148643493652344,
+      "learning_rate": 1.5354736842105264e-05,
+      "loss": 0.9823,
+      "step": 171
+    },
+    {
+      "epoch": 0.027960660001625618,
+      "grad_norm": 0.6582595705986023,
+      "learning_rate": 1.4825263157894736e-05,
+      "loss": 1.1272,
+      "step": 172
+    },
+    {
+      "epoch": 0.028123221978379256,
+      "grad_norm": 0.657721996307373,
+      "learning_rate": 1.4295789473684212e-05,
+      "loss": 1.0821,
+      "step": 173
+    },
+    {
+      "epoch": 0.028285783955132895,
+      "grad_norm": 0.7575223445892334,
+      "learning_rate": 1.3766315789473686e-05,
+      "loss": 1.1628,
+      "step": 174
+    },
+    {
+      "epoch": 0.028448345931886533,
+      "grad_norm": 0.7547634243965149,
+      "learning_rate": 1.3236842105263158e-05,
+      "loss": 1.1852,
+      "step": 175
+    },
+    {
+      "epoch": 0.028610907908640168,
+      "grad_norm": 0.9611251950263977,
+      "learning_rate": 1.2707368421052633e-05,
+      "loss": 0.9881,
+      "step": 176
+    },
+    {
+      "epoch": 0.028773469885393806,
+      "grad_norm": 0.9127490520477295,
+      "learning_rate": 1.2177894736842107e-05,
+      "loss": 1.0568,
+      "step": 177
+    },
+    {
+      "epoch": 0.028936031862147445,
+      "grad_norm": 0.7519910931587219,
+      "learning_rate": 1.1648421052631579e-05,
+      "loss": 0.8341,
+      "step": 178
+    },
+    {
+      "epoch": 0.02909859383890108,
+      "grad_norm": 0.9325132369995117,
+      "learning_rate": 1.1118947368421053e-05,
+      "loss": 0.9248,
+      "step": 179
+    },
+    {
+      "epoch": 0.029261155815654718,
+      "grad_norm": 0.8238653540611267,
+      "learning_rate": 1.0589473684210526e-05,
+      "loss": 0.8453,
+      "step": 180
+    },
+    {
+      "epoch": 0.029423717792408356,
+      "grad_norm": 1.0096287727355957,
+      "learning_rate": 1.0060000000000002e-05,
+      "loss": 1.2289,
+      "step": 181
+    },
+    {
+      "epoch": 0.029586279769161995,
+      "grad_norm": 1.232429027557373,
+      "learning_rate": 9.530526315789474e-06,
+      "loss": 1.0992,
+      "step": 182
+    },
+    {
+      "epoch": 0.02974884174591563,
+      "grad_norm": 1.225100040435791,
+      "learning_rate": 9.001052631578948e-06,
+      "loss": 1.0505,
+      "step": 183
+    },
+    {
+      "epoch": 0.029911403722669268,
+      "grad_norm": 1.947990894317627,
+      "learning_rate": 8.471578947368422e-06,
+      "loss": 1.2918,
+      "step": 184
+    },
+    {
+      "epoch": 0.030073965699422906,
+      "grad_norm": 1.6593623161315918,
+      "learning_rate": 7.942105263157895e-06,
+      "loss": 1.2474,
+      "step": 185
+    },
+    {
+      "epoch": 0.03023652767617654,
+      "grad_norm": 1.3890700340270996,
+      "learning_rate": 7.412631578947368e-06,
+      "loss": 1.1882,
+      "step": 186
+    },
+    {
+      "epoch": 0.03039908965293018,
+      "grad_norm": 1.4280545711517334,
+      "learning_rate": 6.883157894736843e-06,
+      "loss": 1.2195,
+      "step": 187
+    },
+    {
+      "epoch": 0.030561651629683818,
+      "grad_norm": 1.4920859336853027,
+      "learning_rate": 6.3536842105263166e-06,
+      "loss": 0.8529,
+      "step": 188
+    },
+    {
+      "epoch": 0.030724213606437453,
+      "grad_norm": 1.9819258451461792,
+      "learning_rate": 5.8242105263157895e-06,
+      "loss": 0.9768,
+      "step": 189
+    },
+    {
+      "epoch": 0.03088677558319109,
+      "grad_norm": 1.6569184064865112,
+      "learning_rate": 5.294736842105263e-06,
+      "loss": 1.1028,
+      "step": 190
+    },
+    {
+      "epoch": 0.03104933755994473,
+      "grad_norm": 1.5863397121429443,
+      "learning_rate": 4.765263157894737e-06,
+      "loss": 0.9491,
+      "step": 191
+    },
+    {
+      "epoch": 0.031211899536698368,
+      "grad_norm": 1.8544268608093262,
+      "learning_rate": 4.235789473684211e-06,
+      "loss": 0.9581,
+      "step": 192
+    },
+    {
+      "epoch": 0.031374461513452,
+      "grad_norm": 1.6126660108566284,
+      "learning_rate": 3.706315789473684e-06,
+      "loss": 0.5916,
+      "step": 193
+    },
+    {
+      "epoch": 0.031537023490205644,
+      "grad_norm": 2.3393468856811523,
+      "learning_rate": 3.1768421052631583e-06,
+      "loss": 0.9394,
+      "step": 194
+    },
+    {
+      "epoch": 0.03169958546695928,
+      "grad_norm": 1.8670778274536133,
+      "learning_rate": 2.6473684210526316e-06,
+      "loss": 0.905,
+      "step": 195
+    },
+    {
+      "epoch": 0.031862147443712914,
+      "grad_norm": 2.77335786819458,
+      "learning_rate": 2.1178947368421054e-06,
+      "loss": 0.9273,
+      "step": 196
+    },
+    {
+      "epoch": 0.032024709420466556,
+      "grad_norm": 3.024428367614746,
+      "learning_rate": 1.5884210526315791e-06,
+      "loss": 1.3273,
+      "step": 197
+    },
+    {
+      "epoch": 0.03218727139722019,
+      "grad_norm": 2.7640976905822754,
+      "learning_rate": 1.0589473684210527e-06,
+      "loss": 0.9206,
+      "step": 198
+    },
+    {
+      "epoch": 0.032349833373973826,
+      "grad_norm": 2.9897639751434326,
+      "learning_rate": 5.294736842105263e-07,
+      "loss": 1.0956,
+      "step": 199
+    },
+    {
+      "epoch": 0.03251239535072747,
+      "grad_norm": 3.9954426288604736,
+      "learning_rate": 0.0,
+      "loss": 1.2385,
+      "step": 200
+    },
+    {
+      "epoch": 0.03251239535072747,
+      "eval_loss": 0.983881413936615,
+      "eval_runtime": 275.5247,
+      "eval_samples_per_second": 9.404,
+      "eval_steps_per_second": 2.352,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.127217035280384e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null