Training in progress, step 1050, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +117 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c3b97767ca7c9178bec65b2993d9d3cccf9c8cde6eeb4a72ac6a0b6a88d78e6
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:33b2440d2694321ee888d51ec5666ec9e866384a438fe8a89aab4937822170b1
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1afbb720527c8a5867883fc72092a9d32aee54c6814a16dc9eec65ce3811750
 size 168149074

 version https://git-lfs.github.com/spec/v1
+oid sha256:d50e471a33e6d700094dc5bd8ce2bca4a2efdc1b1d33b74f03e583750f796133
 size 168149074

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aab1ea4bef8988b197925fbee11876428674fba7799ca0e8ae2027afbecdc488
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ba78f70e8450cf0ec82402ec5f695006e3cd4c321e54ba57e606421f97e4957
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b5cfc34d80ad44f8253d7cd926a081fa016d75ac6bb00b185ad09b5e1b4725b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eec984887369339384df18a86847622608ab8c12ab961dc4703f600d8c64c21c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.636073112487793,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
-  "epoch": 1.9533369506239826,
   "eval_steps": 150,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -693,6 +693,119 @@
       "eval_samples_per_second": 14.415,
       "eval_steps_per_second": 1.818,
       "step": 900
     }
   ],
   "logging_steps": 10,
@@ -707,7 +820,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -721,7 +834,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2619222458826752e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.636073112487793,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
+  "epoch": 2.278893109061313,
   "eval_steps": 150,
+  "global_step": 1050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.415,
       "eval_steps_per_second": 1.818,
       "step": 900
+    },
+    {
+      "epoch": 1.9750406945198047,
+      "grad_norm": 33.91230773925781,
+      "learning_rate": 1.067489489247974e-05,
+      "loss": 1.9067,
+      "step": 910
+    },
+    {
+      "epoch": 1.9967444384156265,
+      "grad_norm": 28.854825973510742,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 2.3281,
+      "step": 920
+    },
+    {
+      "epoch": 2.018448182311449,
+      "grad_norm": 18.260103225708008,
+      "learning_rate": 1.0056771083298894e-05,
+      "loss": 2.1337,
+      "step": 930
+    },
+    {
+      "epoch": 2.0401519262072707,
+      "grad_norm": 19.979646682739258,
+      "learning_rate": 9.751117453465674e-06,
+      "loss": 1.6345,
+      "step": 940
+    },
+    {
+      "epoch": 2.0618556701030926,
+      "grad_norm": 22.703859329223633,
+      "learning_rate": 9.447927669901284e-06,
+      "loss": 1.634,
+      "step": 950
+    },
+    {
+      "epoch": 2.083559413998915,
+      "grad_norm": 21.72873878479004,
+      "learning_rate": 9.147344051091682e-06,
+      "loss": 1.5881,
+      "step": 960
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "grad_norm": 31.957630157470703,
+      "learning_rate": 8.849507692178758e-06,
+      "loss": 1.3856,
+      "step": 970
+    },
+    {
+      "epoch": 2.126966901790559,
+      "grad_norm": 21.75389862060547,
+      "learning_rate": 8.554558398729726e-06,
+      "loss": 1.9382,
+      "step": 980
+    },
+    {
+      "epoch": 2.148670645686381,
+      "grad_norm": 23.939027786254883,
+      "learning_rate": 8.262634621111819e-06,
+      "loss": 1.8201,
+      "step": 990
+    },
+    {
+      "epoch": 2.1703743895822027,
+      "grad_norm": 21.948673248291016,
+      "learning_rate": 7.97387338950315e-06,
+      "loss": 1.5186,
+      "step": 1000
+    },
+    {
+      "epoch": 2.192078133478025,
+      "grad_norm": 26.39198112487793,
+      "learning_rate": 7.688410249570214e-06,
+      "loss": 1.4693,
+      "step": 1010
+    },
+    {
+      "epoch": 2.213781877373847,
+      "grad_norm": 43.11570358276367,
+      "learning_rate": 7.4063791988421905e-06,
+      "loss": 1.3836,
+      "step": 1020
+    },
+    {
+      "epoch": 2.235485621269669,
+      "grad_norm": 21.80547523498535,
+      "learning_rate": 7.127912623811993e-06,
+      "loss": 1.9962,
+      "step": 1030
+    },
+    {
+      "epoch": 2.257189365165491,
+      "grad_norm": 20.71767234802246,
+      "learning_rate": 6.853141237793507e-06,
+      "loss": 1.6606,
+      "step": 1040
+    },
+    {
+      "epoch": 2.278893109061313,
+      "grad_norm": 21.81035614013672,
+      "learning_rate": 6.582194019564266e-06,
+      "loss": 1.4825,
+      "step": 1050
+    },
+    {
+      "epoch": 2.278893109061313,
+      "eval_loss": 0.6611286997795105,
+      "eval_runtime": 53.8731,
+      "eval_samples_per_second": 14.423,
+      "eval_steps_per_second": 1.819,
+      "step": 1050
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.4722426201964544e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null