Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7afd8871dc04bc39c70d94d1657d1ecbf9e3e243bf477b981398bd8274d9869
 size 180385008

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7a9f5d4ef6c1efe403f15ed1b52b883df56dc1af1dc6be5d39c8b9c8f96bdcc
 size 180385008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c1848ef6856e16ff24e480a4ee8f480c311c7fbf68383a5597504133ef72765
 size 91850618

 version https://git-lfs.github.com/spec/v1
+oid sha256:70a38b80fd201c6c86d9f8e9fe5a7116cf5323011542a1da110e2c726c0ffc64
 size 91850618

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ec9d4f5618be2513fbbd8b00f97f539a8d70d68863b0d6fb9b7969987aad3ab
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7de2459be49a033486bff628884dd714018d945161a56b8975ced9238c878fb2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4210d0f4c79be19a1b0c8a85facc89fa1bc09497d44c3c3383a1a0c00e4709d7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c579f5f648f371ddcf6d84448bace40e1d46168d2f7b5fae9b6105a9c79d201
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8026256561279297,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.09679370840895342,
   "eval_steps": 50,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -359,6 +359,49 @@
       "eval_samples_per_second": 32.675,
       "eval_steps_per_second": 8.169,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -387,7 +430,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.88932260102144e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7915147542953491,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.1088929219600726,
   "eval_steps": 50,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 32.675,
       "eval_steps_per_second": 8.169,
       "step": 400
+    },
+    {
+      "epoch": 0.09921355111917725,
+      "grad_norm": 0.8329099416732788,
+      "learning_rate": 2.033969009906811e-05,
+      "loss": 0.708,
+      "step": 410
+    },
+    {
+      "epoch": 0.10163339382940109,
+      "grad_norm": 1.3031426668167114,
+      "learning_rate": 1.6182877759340637e-05,
+      "loss": 0.7208,
+      "step": 420
+    },
+    {
+      "epoch": 0.10405323653962492,
+      "grad_norm": 1.197944164276123,
+      "learning_rate": 1.2466081360524275e-05,
+      "loss": 0.7973,
+      "step": 430
+    },
+    {
+      "epoch": 0.10647307924984876,
+      "grad_norm": 2.2623517513275146,
+      "learning_rate": 9.207408761062996e-06,
+      "loss": 0.8831,
+      "step": 440
+    },
+    {
+      "epoch": 0.1088929219600726,
+      "grad_norm": 1.5466116666793823,
+      "learning_rate": 6.422735886300764e-06,
+      "loss": 0.669,
+      "step": 450
+    },
+    {
+      "epoch": 0.1088929219600726,
+      "eval_loss": 0.7915147542953491,
+      "eval_runtime": 53.2722,
+      "eval_samples_per_second": 32.662,
+      "eval_steps_per_second": 8.166,
+      "step": 450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.1270519349248e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null