Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77a8c1d55ad0560d1fea317d62755fa3ac455956550bdc8beebf8fdb5cbad3e7
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d787544b08ab0d4cade0305dad7f50350a2c24c6c0ee0b603d14c35c509da9f
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c7035dcff0de3120190b5471e24169132d7aba9ca8b90a8256fe59a3a6c52f0
 size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4ee7fc5f75ec00f16358de8c2685a30e8a2532aa952083a168fe80d6f9bcaeb
 size 46057082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e31a2c3d0797ac580a92fccc8f6f77fb2572c5039b438dfe92ee0b2018c8aaea
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:99096d4e8555c6ef597909d68cc884f8aec314568300e99269d6d682793a4a68
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4598db665e6346ba448110213cdb9bf4e0fd12577c6b8ba0e9bdef835e311c10
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a55eea102467b61b8b63052cdc819cee63bc2a6becf7451f13fc332baf0514f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7732155919075012,
   "best_model_checkpoint": "miner_id_24/checkpoint-170",
-  "epoch": 1.1326378539493294,
   "eval_steps": 10,
-  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1497,6 +1497,84 @@
       "eval_samples_per_second": 7.273,
       "eval_steps_per_second": 1.479,
       "step": 190
     }
   ],
   "logging_steps": 1,
@@ -1511,7 +1589,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -1520,12 +1598,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.288846023432602e+16,
   "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7732155919075012,
   "best_model_checkpoint": "miner_id_24/checkpoint-170",
+  "epoch": 1.1922503725782414,
   "eval_steps": 10,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.273,
       "eval_steps_per_second": 1.479,
       "step": 190
+    },
+    {
+      "epoch": 1.1385991058122205,
+      "grad_norm": 0.2825652062892914,
+      "learning_rate": 0.0001773311242813613,
+      "loss": 0.7241,
+      "step": 191
+    },
+    {
+      "epoch": 1.1445603576751118,
+      "grad_norm": 0.22261716425418854,
+      "learning_rate": 0.00017709000624184162,
+      "loss": 0.6043,
+      "step": 192
+    },
+    {
+      "epoch": 1.150521609538003,
+      "grad_norm": 0.2786444425582886,
+      "learning_rate": 0.00017684777842299205,
+      "loss": 0.8017,
+      "step": 193
+    },
+    {
+      "epoch": 1.1564828614008942,
+      "grad_norm": 0.28150370717048645,
+      "learning_rate": 0.0001766044443118978,
+      "loss": 0.6926,
+      "step": 194
+    },
+    {
+      "epoch": 1.1624441132637853,
+      "grad_norm": 0.24119311571121216,
+      "learning_rate": 0.0001763600074115703,
+      "loss": 0.5431,
+      "step": 195
+    },
+    {
+      "epoch": 1.1684053651266766,
+      "grad_norm": 0.25659507513046265,
+      "learning_rate": 0.00017611447124089649,
+      "loss": 0.7129,
+      "step": 196
+    },
+    {
+      "epoch": 1.174366616989568,
+      "grad_norm": 0.2532925605773926,
+      "learning_rate": 0.00017586783933458834,
+      "loss": 0.7385,
+      "step": 197
+    },
+    {
+      "epoch": 1.180327868852459,
+      "grad_norm": 0.2979128360748291,
+      "learning_rate": 0.00017562011524313185,
+      "loss": 0.8338,
+      "step": 198
+    },
+    {
+      "epoch": 1.1862891207153503,
+      "grad_norm": 0.27080920338630676,
+      "learning_rate": 0.00017537130253273613,
+      "loss": 0.7389,
+      "step": 199
+    },
+    {
+      "epoch": 1.1922503725782414,
+      "grad_norm": 0.29549166560173035,
+      "learning_rate": 0.0001751214047852818,
+      "loss": 0.6476,
+      "step": 200
+    },
+    {
+      "epoch": 1.1922503725782414,
+      "eval_loss": 0.7744897603988647,
+      "eval_runtime": 24.342,
+      "eval_samples_per_second": 7.271,
+      "eval_steps_per_second": 1.479,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.778247816026522e+16,
   "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null