Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4645db15cb8051f985e2a50c613e0570afd8d4079e8d83ec368fac88dfe2d209
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:32bb62d6a6df1e9090bf1a2ddcff32b962674a33a2f10919c46fa940cffde98a
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c982507742ded6fb6c09c99f01d0b8ba2ab28404219dceb9bf512b67c6d2189a
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb4b57a1b567db0e063f6a8ed77f7bca3d4ba53fbd8b03be0cf8f77ced921d7a
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73b1a8839cfb05ca6b01f7cb76ab6a2b0ecb176121723a6354b25d30c2303bbb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d4d7d51d7f3d82dc84cfce4cdb53e28aa641b389bec6f16d6cbb7c270f5c763
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4006094932556152,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.11316484345529988,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 10.01,
       "eval_steps_per_second": 5.007,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.150074508503941e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.317726731300354,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.15088645794039984,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.01,
       "eval_steps_per_second": 5.007,
       "step": 150
+    },
+    {
+      "epoch": 0.11391927574500188,
+      "grad_norm": 7.5596137046813965,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 5.8243,
+      "step": 151
+    },
+    {
+      "epoch": 0.11467370803470389,
+      "grad_norm": 9.126245498657227,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 6.3349,
+      "step": 152
+    },
+    {
+      "epoch": 0.11542814032440589,
+      "grad_norm": 8.61217975616455,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 6.3952,
+      "step": 153
+    },
+    {
+      "epoch": 0.11618257261410789,
+      "grad_norm": 8.0001802444458,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 6.1142,
+      "step": 154
+    },
+    {
+      "epoch": 0.11693700490380989,
+      "grad_norm": 8.282342910766602,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 6.4089,
+      "step": 155
+    },
+    {
+      "epoch": 0.11769143719351188,
+      "grad_norm": 7.451427936553955,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 6.5969,
+      "step": 156
+    },
+    {
+      "epoch": 0.11844586948321388,
+      "grad_norm": 6.895029544830322,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 5.5629,
+      "step": 157
+    },
+    {
+      "epoch": 0.11920030177291588,
+      "grad_norm": 6.875367164611816,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 5.9498,
+      "step": 158
+    },
+    {
+      "epoch": 0.11995473406261788,
+      "grad_norm": 7.0699334144592285,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 5.9006,
+      "step": 159
+    },
+    {
+      "epoch": 0.12070916635231987,
+      "grad_norm": 6.803040027618408,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 6.0293,
+      "step": 160
+    },
+    {
+      "epoch": 0.12146359864202187,
+      "grad_norm": 7.202753067016602,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 6.1596,
+      "step": 161
+    },
+    {
+      "epoch": 0.12221803093172388,
+      "grad_norm": 6.716833114624023,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 5.9252,
+      "step": 162
+    },
+    {
+      "epoch": 0.12297246322142588,
+      "grad_norm": 7.449049472808838,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 6.2734,
+      "step": 163
+    },
+    {
+      "epoch": 0.12372689551112788,
+      "grad_norm": 7.610846042633057,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 6.1015,
+      "step": 164
+    },
+    {
+      "epoch": 0.12448132780082988,
+      "grad_norm": 7.21907377243042,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 5.906,
+      "step": 165
+    },
+    {
+      "epoch": 0.12523576009053186,
+      "grad_norm": 7.018588542938232,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 6.0964,
+      "step": 166
+    },
+    {
+      "epoch": 0.12599019238023387,
+      "grad_norm": 7.820737361907959,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 5.8306,
+      "step": 167
+    },
+    {
+      "epoch": 0.12674462466993588,
+      "grad_norm": 8.341608047485352,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 6.4438,
+      "step": 168
+    },
+    {
+      "epoch": 0.12749905695963787,
+      "grad_norm": 7.734309673309326,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 5.7469,
+      "step": 169
+    },
+    {
+      "epoch": 0.12825348924933988,
+      "grad_norm": 7.480657577514648,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 5.4772,
+      "step": 170
+    },
+    {
+      "epoch": 0.12900792153904186,
+      "grad_norm": 8.113786697387695,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 5.5337,
+      "step": 171
+    },
+    {
+      "epoch": 0.12976235382874388,
+      "grad_norm": 7.584042072296143,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 6.0905,
+      "step": 172
+    },
+    {
+      "epoch": 0.13051678611844586,
+      "grad_norm": 7.5577921867370605,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 6.3663,
+      "step": 173
+    },
+    {
+      "epoch": 0.13127121840814787,
+      "grad_norm": 8.262290000915527,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 5.8939,
+      "step": 174
+    },
+    {
+      "epoch": 0.13202565069784986,
+      "grad_norm": 8.029220581054688,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 6.0865,
+      "step": 175
+    },
+    {
+      "epoch": 0.13278008298755187,
+      "grad_norm": 7.945626735687256,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 5.5796,
+      "step": 176
+    },
+    {
+      "epoch": 0.13353451527725388,
+      "grad_norm": 8.58486270904541,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 6.0029,
+      "step": 177
+    },
+    {
+      "epoch": 0.13428894756695586,
+      "grad_norm": 7.3270745277404785,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 5.3606,
+      "step": 178
+    },
+    {
+      "epoch": 0.13504337985665787,
+      "grad_norm": 7.994801044464111,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 5.9999,
+      "step": 179
+    },
+    {
+      "epoch": 0.13579781214635986,
+      "grad_norm": 7.928518295288086,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 6.375,
+      "step": 180
+    },
+    {
+      "epoch": 0.13655224443606187,
+      "grad_norm": 8.030516624450684,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 5.4132,
+      "step": 181
+    },
+    {
+      "epoch": 0.13730667672576385,
+      "grad_norm": 7.663419723510742,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 4.9839,
+      "step": 182
+    },
+    {
+      "epoch": 0.13806110901546587,
+      "grad_norm": 7.753046035766602,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 5.4283,
+      "step": 183
+    },
+    {
+      "epoch": 0.13881554130516785,
+      "grad_norm": 7.275458335876465,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 5.2769,
+      "step": 184
+    },
+    {
+      "epoch": 0.13956997359486986,
+      "grad_norm": 8.196554183959961,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 5.5939,
+      "step": 185
+    },
+    {
+      "epoch": 0.14032440588457187,
+      "grad_norm": 8.262168884277344,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 5.6388,
+      "step": 186
+    },
+    {
+      "epoch": 0.14107883817427386,
+      "grad_norm": 8.033586502075195,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 5.6564,
+      "step": 187
+    },
+    {
+      "epoch": 0.14183327046397587,
+      "grad_norm": 8.317608833312988,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 5.4141,
+      "step": 188
+    },
+    {
+      "epoch": 0.14258770275367785,
+      "grad_norm": 7.451907634735107,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 4.7749,
+      "step": 189
+    },
+    {
+      "epoch": 0.14334213504337986,
+      "grad_norm": 7.307699203491211,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 4.6471,
+      "step": 190
+    },
+    {
+      "epoch": 0.14409656733308185,
+      "grad_norm": 8.553318977355957,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 5.5232,
+      "step": 191
+    },
+    {
+      "epoch": 0.14485099962278386,
+      "grad_norm": 8.741515159606934,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 5.1838,
+      "step": 192
+    },
+    {
+      "epoch": 0.14560543191248584,
+      "grad_norm": 8.356700897216797,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 4.7049,
+      "step": 193
+    },
+    {
+      "epoch": 0.14635986420218786,
+      "grad_norm": 8.87369441986084,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 4.9752,
+      "step": 194
+    },
+    {
+      "epoch": 0.14711429649188984,
+      "grad_norm": 9.124860763549805,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 4.7535,
+      "step": 195
+    },
+    {
+      "epoch": 0.14786872878159185,
+      "grad_norm": 9.08664608001709,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 4.8214,
+      "step": 196
+    },
+    {
+      "epoch": 0.14862316107129386,
+      "grad_norm": 9.12248706817627,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 4.8401,
+      "step": 197
+    },
+    {
+      "epoch": 0.14937759336099585,
+      "grad_norm": 10.60326099395752,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 3.3106,
+      "step": 198
+    },
+    {
+      "epoch": 0.15013202565069786,
+      "grad_norm": 7.462176322937012,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 2.0658,
+      "step": 199
+    },
+    {
+      "epoch": 0.15088645794039984,
+      "grad_norm": 7.780041694641113,
+      "learning_rate": 0.0,
+      "loss": 1.7847,
+      "step": 200
+    },
+    {
+      "epoch": 0.15088645794039984,
+      "eval_loss": 1.317726731300354,
+      "eval_runtime": 223.0408,
+      "eval_samples_per_second": 10.012,
+      "eval_steps_per_second": 5.008,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.865573512997765e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null