Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ba41519dbde25d164865dc5e9497d8d36c30bb018f110d9411127e54e162951
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:5478cd3f45115734c5e2b80b21f9097b8723d23bfa0da24d2ea89f75a873ad3e
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75742939dfcba11593731f81af812aa425082535e6d1eba914d56402eff7256d
 size 281824770

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f99ccf3d20e49f675cafbea9aaa11a298aab737b8a4e8d041a194487edf1169
 size 281824770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:291c3b3e9fab4bfdf2e16adafa2beaea748a242782596ed00204a4801baff9a7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfa49591298f0342c28e57b20816a275e1ca9f075b8f1801c2aae347db868451
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.0016629695892334,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.008864986259271299,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 41.62,
       "eval_steps_per_second": 20.81,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.15927236476928e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.9855155944824219,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.011819981679028398,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 41.62,
       "eval_steps_per_second": 20.81,
       "step": 150
+    },
+    {
+      "epoch": 0.008924086167666441,
+      "grad_norm": 3.083522081375122,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 1.8471,
+      "step": 151
+    },
+    {
+      "epoch": 0.008983186076061581,
+      "grad_norm": 3.249023914337158,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 1.9572,
+      "step": 152
+    },
+    {
+      "epoch": 0.009042285984456723,
+      "grad_norm": 2.9980008602142334,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 2.1264,
+      "step": 153
+    },
+    {
+      "epoch": 0.009101385892851865,
+      "grad_norm": 2.857516050338745,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 1.901,
+      "step": 154
+    },
+    {
+      "epoch": 0.009160485801247007,
+      "grad_norm": 3.0055689811706543,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 2.192,
+      "step": 155
+    },
+    {
+      "epoch": 0.00921958570964215,
+      "grad_norm": 2.5224833488464355,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 1.9115,
+      "step": 156
+    },
+    {
+      "epoch": 0.009278685618037292,
+      "grad_norm": 2.467513084411621,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 1.7274,
+      "step": 157
+    },
+    {
+      "epoch": 0.009337785526432434,
+      "grad_norm": 3.3824963569641113,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 2.2169,
+      "step": 158
+    },
+    {
+      "epoch": 0.009396885434827576,
+      "grad_norm": 2.666517496109009,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 2.1059,
+      "step": 159
+    },
+    {
+      "epoch": 0.009455985343222718,
+      "grad_norm": 3.0350444316864014,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 2.3778,
+      "step": 160
+    },
+    {
+      "epoch": 0.00951508525161786,
+      "grad_norm": 2.6140732765197754,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 1.8671,
+      "step": 161
+    },
+    {
+      "epoch": 0.009574185160013002,
+      "grad_norm": 2.8309125900268555,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 1.977,
+      "step": 162
+    },
+    {
+      "epoch": 0.009633285068408144,
+      "grad_norm": 3.1472816467285156,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 2.4342,
+      "step": 163
+    },
+    {
+      "epoch": 0.009692384976803286,
+      "grad_norm": 2.7688024044036865,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 1.9787,
+      "step": 164
+    },
+    {
+      "epoch": 0.009751484885198428,
+      "grad_norm": 3.0642006397247314,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 2.1242,
+      "step": 165
+    },
+    {
+      "epoch": 0.00981058479359357,
+      "grad_norm": 2.5731711387634277,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.0205,
+      "step": 166
+    },
+    {
+      "epoch": 0.009869684701988712,
+      "grad_norm": 2.6096303462982178,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 1.9473,
+      "step": 167
+    },
+    {
+      "epoch": 0.009928784610383854,
+      "grad_norm": 2.6382505893707275,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 1.9061,
+      "step": 168
+    },
+    {
+      "epoch": 0.009987884518778996,
+      "grad_norm": 3.020261526107788,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 2.3045,
+      "step": 169
+    },
+    {
+      "epoch": 0.010046984427174138,
+      "grad_norm": 2.3413009643554688,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 1.7087,
+      "step": 170
+    },
+    {
+      "epoch": 0.01010608433556928,
+      "grad_norm": 2.669962167739868,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 2.0177,
+      "step": 171
+    },
+    {
+      "epoch": 0.010165184243964422,
+      "grad_norm": 2.9112627506256104,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 2.2063,
+      "step": 172
+    },
+    {
+      "epoch": 0.010224284152359564,
+      "grad_norm": 2.7621371746063232,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 2.1102,
+      "step": 173
+    },
+    {
+      "epoch": 0.010283384060754706,
+      "grad_norm": 2.7470543384552,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 1.9059,
+      "step": 174
+    },
+    {
+      "epoch": 0.010342483969149848,
+      "grad_norm": 2.68990159034729,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 2.0888,
+      "step": 175
+    },
+    {
+      "epoch": 0.01040158387754499,
+      "grad_norm": 2.371671676635742,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 1.6942,
+      "step": 176
+    },
+    {
+      "epoch": 0.010460683785940133,
+      "grad_norm": 2.7487049102783203,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 1.9798,
+      "step": 177
+    },
+    {
+      "epoch": 0.010519783694335275,
+      "grad_norm": 2.5755209922790527,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 1.8144,
+      "step": 178
+    },
+    {
+      "epoch": 0.010578883602730415,
+      "grad_norm": 2.740054130554199,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 1.9632,
+      "step": 179
+    },
+    {
+      "epoch": 0.010637983511125557,
+      "grad_norm": 2.762751340866089,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 2.0491,
+      "step": 180
+    },
+    {
+      "epoch": 0.010697083419520699,
+      "grad_norm": 2.6476433277130127,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 2.0671,
+      "step": 181
+    },
+    {
+      "epoch": 0.010756183327915841,
+      "grad_norm": 3.5005996227264404,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 2.2838,
+      "step": 182
+    },
+    {
+      "epoch": 0.010815283236310983,
+      "grad_norm": 2.557985305786133,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.8317,
+      "step": 183
+    },
+    {
+      "epoch": 0.010874383144706125,
+      "grad_norm": 2.7409958839416504,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 1.9403,
+      "step": 184
+    },
+    {
+      "epoch": 0.010933483053101267,
+      "grad_norm": 2.9567854404449463,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 2.0511,
+      "step": 185
+    },
+    {
+      "epoch": 0.01099258296149641,
+      "grad_norm": 3.5258986949920654,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 2.3455,
+      "step": 186
+    },
+    {
+      "epoch": 0.011051682869891551,
+      "grad_norm": 2.719268560409546,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 1.9524,
+      "step": 187
+    },
+    {
+      "epoch": 0.011110782778286693,
+      "grad_norm": 3.0413248538970947,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 2.0141,
+      "step": 188
+    },
+    {
+      "epoch": 0.011169882686681835,
+      "grad_norm": 3.6312408447265625,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 1.986,
+      "step": 189
+    },
+    {
+      "epoch": 0.011228982595076977,
+      "grad_norm": 2.7848613262176514,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 1.7297,
+      "step": 190
+    },
+    {
+      "epoch": 0.01128808250347212,
+      "grad_norm": 2.962815523147583,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 2.0131,
+      "step": 191
+    },
+    {
+      "epoch": 0.011347182411867262,
+      "grad_norm": 3.577907085418701,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 2.4659,
+      "step": 192
+    },
+    {
+      "epoch": 0.011406282320262404,
+      "grad_norm": 3.7039952278137207,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 2.0184,
+      "step": 193
+    },
+    {
+      "epoch": 0.011465382228657546,
+      "grad_norm": 3.0766096115112305,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 1.9093,
+      "step": 194
+    },
+    {
+      "epoch": 0.011524482137052688,
+      "grad_norm": 2.9538846015930176,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 1.9372,
+      "step": 195
+    },
+    {
+      "epoch": 0.01158358204544783,
+      "grad_norm": 3.276865005493164,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 2.1478,
+      "step": 196
+    },
+    {
+      "epoch": 0.011642681953842972,
+      "grad_norm": 2.88531231880188,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 1.9631,
+      "step": 197
+    },
+    {
+      "epoch": 0.011701781862238114,
+      "grad_norm": 2.8093371391296387,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 1.7216,
+      "step": 198
+    },
+    {
+      "epoch": 0.011760881770633256,
+      "grad_norm": 2.859862804412842,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 1.8161,
+      "step": 199
+    },
+    {
+      "epoch": 0.011819981679028398,
+      "grad_norm": 3.6518397331237793,
+      "learning_rate": 0.0,
+      "loss": 2.0347,
+      "step": 200
+    },
+    {
+      "epoch": 0.011819981679028398,
+      "eval_loss": 1.9855155944824219,
+      "eval_runtime": 684.4118,
+      "eval_samples_per_second": 41.639,
+      "eval_steps_per_second": 20.819,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.54569648635904e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null