Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +49 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9c3c0e1fd517b5abc94df3ca8c0a09e23f16ec05c206eab73e819ba8d7894de
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad0c5305d977f5686093eb71fc2bba946488ee7cf7178547dc03de653246bac1
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:763707e860dfb2853aa5098824a9ac4e97fc1d4ee6ab1f90b6a691178fe986b4
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:56b4fda42154c448594c42d7e9337c9a05f02d2c1979a69e2d6ab4f0252ad2df
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25dfb2a0841ee402d4a2a03f3d5c26d4cb6314da40dd61b87b3528fcc2c8bccf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5650c81035b69a51d4dc894dda45abfe03b5dba1f240c8185d4d47d7114a6953
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6da36b358afefa2fe6fe8e3889efc77dfb89ac577ed7bb55c631123a9ebe149
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:56e06160672e234a504b2a9f8fb3d80ed8c221e80fde36a5548d37e259bd5bc6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.6532483100891113,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.019674711437565582,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,49 @@
       "eval_samples_per_second": 19.357,
       "eval_steps_per_second": 4.842,
       "step": 150
     }
   ],
   "logging_steps": 10,
@@ -167,12 +210,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.03265242382336e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.6275486946105957,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.026232948583420776,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.357,
       "eval_steps_per_second": 4.842,
       "step": 150
+    },
+    {
+      "epoch": 0.02098635886673662,
+      "grad_norm": 3.4204041957855225,
+      "learning_rate": 2.2444444444444445e-06,
+      "loss": 2.567,
+      "step": 160
+    },
+    {
+      "epoch": 0.02229800629590766,
+      "grad_norm": 3.5250372886657715,
+      "learning_rate": 1.6833333333333332e-06,
+      "loss": 2.6192,
+      "step": 170
+    },
+    {
+      "epoch": 0.0236096537250787,
+      "grad_norm": 4.614537239074707,
+      "learning_rate": 1.1222222222222222e-06,
+      "loss": 2.7407,
+      "step": 180
+    },
+    {
+      "epoch": 0.024921301154249738,
+      "grad_norm": 4.56196403503418,
+      "learning_rate": 5.611111111111111e-07,
+      "loss": 2.6979,
+      "step": 190
+    },
+    {
+      "epoch": 0.026232948583420776,
+      "grad_norm": 7.957613468170166,
+      "learning_rate": 0.0,
+      "loss": 2.9207,
+      "step": 200
+    },
+    {
+      "epoch": 0.026232948583420776,
+      "eval_loss": 2.6275486946105957,
+      "eval_runtime": 165.8027,
+      "eval_samples_per_second": 19.36,
+      "eval_steps_per_second": 4.843,
+      "step": 200
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.71020323176448e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null