Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf6744e519341d3b420656bd59b7889f18236f1f3cc1ad501c18077fe80aef06
 size 222865880

 version https://git-lfs.github.com/spec/v1
+oid sha256:534576653d7383b7fb5b03cbe05f08c60f7cfe6e9da373e6315b079b0d4e6da1
 size 222865880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3999ede9030a20dd519b818bbab73cdaaf0bb1174ff55c1d93594f09588ebc40
 size 445914554

 version https://git-lfs.github.com/spec/v1
+oid sha256:89fe12ccf023ff96a5bef94512186aaac1c5fc58cc31b89dea5648ddf08ce3ff
 size 445914554

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e16dd2bdf100c88e5df5a000258d46b25206ce676bdf853a6f888b275afb829
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2569b8fe195e9e5ad144335deb2c70ce02cb5d910847d1ff219b75369e4f673b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.2887627184391022,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.038476337052712584,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 1.57,
       "eval_steps_per_second": 0.22,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.37691583496192e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.2834896147251129,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.05130178273695011,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.57,
       "eval_steps_per_second": 0.22,
       "step": 150
+    },
+    {
+      "epoch": 0.03873284596639733,
+      "grad_norm": 0.31122446060180664,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 1.1079,
+      "step": 151
+    },
+    {
+      "epoch": 0.03898935488008208,
+      "grad_norm": 0.24545907974243164,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 1.0046,
+      "step": 152
+    },
+    {
+      "epoch": 0.039245863793766836,
+      "grad_norm": 0.20316024124622345,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 1.0567,
+      "step": 153
+    },
+    {
+      "epoch": 0.039502372707451584,
+      "grad_norm": 0.20005953311920166,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 1.1053,
+      "step": 154
+    },
+    {
+      "epoch": 0.03975888162113633,
+      "grad_norm": 0.1976904422044754,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 1.1163,
+      "step": 155
+    },
+    {
+      "epoch": 0.04001539053482109,
+      "grad_norm": 0.2005474865436554,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 1.068,
+      "step": 156
+    },
+    {
+      "epoch": 0.040271899448505837,
+      "grad_norm": 0.18155024945735931,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 1.1684,
+      "step": 157
+    },
+    {
+      "epoch": 0.040528408362190585,
+      "grad_norm": 0.17311017215251923,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 1.0116,
+      "step": 158
+    },
+    {
+      "epoch": 0.04078491727587533,
+      "grad_norm": 0.17773979902267456,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 1.0785,
+      "step": 159
+    },
+    {
+      "epoch": 0.04104142618956009,
+      "grad_norm": 0.2995525002479553,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 1.0934,
+      "step": 160
+    },
+    {
+      "epoch": 0.04129793510324484,
+      "grad_norm": 0.17463243007659912,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.9919,
+      "step": 161
+    },
+    {
+      "epoch": 0.041554444016929586,
+      "grad_norm": 0.17306958138942719,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 1.1307,
+      "step": 162
+    },
+    {
+      "epoch": 0.04181095293061434,
+      "grad_norm": 0.15881255269050598,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 1.1525,
+      "step": 163
+    },
+    {
+      "epoch": 0.04206746184429909,
+      "grad_norm": 0.16048413515090942,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 1.1592,
+      "step": 164
+    },
+    {
+      "epoch": 0.04232397075798384,
+      "grad_norm": 0.15379053354263306,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.9624,
+      "step": 165
+    },
+    {
+      "epoch": 0.04258047967166859,
+      "grad_norm": 0.2737276554107666,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 1.0579,
+      "step": 166
+    },
+    {
+      "epoch": 0.04283698858535334,
+      "grad_norm": 0.15662381052970886,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 1.0594,
+      "step": 167
+    },
+    {
+      "epoch": 0.04309349749903809,
+      "grad_norm": 0.16340816020965576,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 1.0777,
+      "step": 168
+    },
+    {
+      "epoch": 0.043350006412722845,
+      "grad_norm": 0.16664321720600128,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 1.0744,
+      "step": 169
+    },
+    {
+      "epoch": 0.043606515326407594,
+      "grad_norm": 0.17165815830230713,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 1.0481,
+      "step": 170
+    },
+    {
+      "epoch": 0.04386302424009234,
+      "grad_norm": 0.17665603756904602,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 1.17,
+      "step": 171
+    },
+    {
+      "epoch": 0.04411953315377709,
+      "grad_norm": 0.15224668383598328,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.9989,
+      "step": 172
+    },
+    {
+      "epoch": 0.044376042067461846,
+      "grad_norm": 0.1608746498823166,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 1.1762,
+      "step": 173
+    },
+    {
+      "epoch": 0.044632550981146595,
+      "grad_norm": 0.1899283528327942,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 1.1426,
+      "step": 174
+    },
+    {
+      "epoch": 0.04488905989483134,
+      "grad_norm": 0.18082468211650848,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 1.1745,
+      "step": 175
+    },
+    {
+      "epoch": 0.04488905989483134,
+      "eval_loss": 0.28639593720436096,
+      "eval_runtime": 31.8318,
+      "eval_samples_per_second": 1.571,
+      "eval_steps_per_second": 0.22,
+      "step": 175
+    },
+    {
+      "epoch": 0.0451455688085161,
+      "grad_norm": 0.19314950704574585,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 1.2775,
+      "step": 176
+    },
+    {
+      "epoch": 0.04540207772220085,
+      "grad_norm": 0.18126244843006134,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 1.0684,
+      "step": 177
+    },
+    {
+      "epoch": 0.045658586635885595,
+      "grad_norm": 0.18341222405433655,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 1.1358,
+      "step": 178
+    },
+    {
+      "epoch": 0.04591509554957035,
+      "grad_norm": 0.18251806497573853,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 1.2176,
+      "step": 179
+    },
+    {
+      "epoch": 0.0461716044632551,
+      "grad_norm": 0.1876961886882782,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 1.1623,
+      "step": 180
+    },
+    {
+      "epoch": 0.04642811337693985,
+      "grad_norm": 0.24478206038475037,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 1.3566,
+      "step": 181
+    },
+    {
+      "epoch": 0.046684622290624596,
+      "grad_norm": 0.22969405353069305,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 1.335,
+      "step": 182
+    },
+    {
+      "epoch": 0.04694113120430935,
+      "grad_norm": 1.3417153358459473,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 1.3391,
+      "step": 183
+    },
+    {
+      "epoch": 0.0471976401179941,
+      "grad_norm": 0.21031659841537476,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 1.214,
+      "step": 184
+    },
+    {
+      "epoch": 0.04745414903167885,
+      "grad_norm": 0.20800001919269562,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 1.2491,
+      "step": 185
+    },
+    {
+      "epoch": 0.0477106579453636,
+      "grad_norm": 0.2116893082857132,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 1.3338,
+      "step": 186
+    },
+    {
+      "epoch": 0.04796716685904835,
+      "grad_norm": 0.20268724858760834,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 1.1552,
+      "step": 187
+    },
+    {
+      "epoch": 0.0482236757727331,
+      "grad_norm": 0.21017323434352875,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 1.1528,
+      "step": 188
+    },
+    {
+      "epoch": 0.048480184686417856,
+      "grad_norm": 0.23236685991287231,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 1.2059,
+      "step": 189
+    },
+    {
+      "epoch": 0.048736693600102604,
+      "grad_norm": 0.26075395941734314,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 1.2194,
+      "step": 190
+    },
+    {
+      "epoch": 0.04899320251378735,
+      "grad_norm": 0.263323575258255,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 1.2614,
+      "step": 191
+    },
+    {
+      "epoch": 0.04924971142747211,
+      "grad_norm": 0.2640993297100067,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 1.1803,
+      "step": 192
+    },
+    {
+      "epoch": 0.049506220341156856,
+      "grad_norm": 0.2626555263996124,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 1.1416,
+      "step": 193
+    },
+    {
+      "epoch": 0.049762729254841605,
+      "grad_norm": 0.380075603723526,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 1.1872,
+      "step": 194
+    },
+    {
+      "epoch": 0.05001923816852635,
+      "grad_norm": 0.40151721239089966,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 1.2733,
+      "step": 195
+    },
+    {
+      "epoch": 0.05027574708221111,
+      "grad_norm": 0.546929657459259,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 1.1326,
+      "step": 196
+    },
+    {
+      "epoch": 0.05053225599589586,
+      "grad_norm": 0.834646463394165,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.945,
+      "step": 197
+    },
+    {
+      "epoch": 0.050788764909580605,
+      "grad_norm": 0.5064449906349182,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 1.1507,
+      "step": 198
+    },
+    {
+      "epoch": 0.05104527382326536,
+      "grad_norm": 0.6023350358009338,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 1.3557,
+      "step": 199
+    },
+    {
+      "epoch": 0.05130178273695011,
+      "grad_norm": 0.7777907252311707,
+      "learning_rate": 0.0,
+      "loss": 1.4745,
+      "step": 200
+    },
+    {
+      "epoch": 0.05130178273695011,
+      "eval_loss": 0.2834896147251129,
+      "eval_runtime": 31.838,
+      "eval_samples_per_second": 1.57,
+      "eval_steps_per_second": 0.22,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.114751925878784e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null