Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4e2ee3f5087580faae42ffe0d0fd7681086a7f90e289b1dac777b5431cc52d8
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:d697290ca4b6534e798255818213b091fd3f0595740e684cc903fbb1fe6d3d01
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04bc10e7fb8d91a50a68c6a3b44a18044704662d21db04978f71336307883285
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:582d30f4e481db674c6f1f8ad06950fb2d69c2944e81701424a8ffb19a7a65e1
 size 640009682

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6eefaefb2c28f82edfd1f632326679461b022a1653c691a8c94c603ef696d971
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b453af8c7c7f7d9abda6defcdb0d9196c21c0c0960bedca4529dd3cf5dd971e0
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:324a0d60208588826a592d2853ff408ba6a7100fed002be5eec48f3c18eeff60
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d47c5277bb13c62fa35a878d9932a883b5da63462224a7dac36da90562246f49
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f03923668c7fd716893da05bd2abb9f55b8e98e438419bf1f97702e962d5d19
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:47848c403ed77fe07e3075ea610785f8d81e570f59fbf06446223232c4fa707c
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e6d026313a985285d2d1079b4cbea35763f2868b014fa491f3384f882d6edee
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe8e336813f8ba6e64a959feea25131279ae80de63b4cdbd6e85b94282e5d508
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2665033340454102,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.03388681802778719,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 23.257,
       "eval_steps_per_second": 2.911,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.629578157719552e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.0168567895889282,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.06777363605557438,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.257,
       "eval_steps_per_second": 2.911,
       "step": 25
+    },
+    {
+      "epoch": 0.03524229074889868,
+      "grad_norm": 18.225135803222656,
+      "learning_rate": 5e-05,
+      "loss": 20.766,
+      "step": 26
+    },
+    {
+      "epoch": 0.03659776347001017,
+      "grad_norm": 16.9871768951416,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 21.6636,
+      "step": 27
+    },
+    {
+      "epoch": 0.037953236191121655,
+      "grad_norm": 17.358606338500977,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 19.1701,
+      "step": 28
+    },
+    {
+      "epoch": 0.03930870891223314,
+      "grad_norm": 17.15289306640625,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 18.1974,
+      "step": 29
+    },
+    {
+      "epoch": 0.04066418163334463,
+      "grad_norm": 13.593099594116211,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 18.8118,
+      "step": 30
+    },
+    {
+      "epoch": 0.042019654354456114,
+      "grad_norm": 15.375582695007324,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 18.5611,
+      "step": 31
+    },
+    {
+      "epoch": 0.04337512707556761,
+      "grad_norm": 15.086400985717773,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 18.7409,
+      "step": 32
+    },
+    {
+      "epoch": 0.044730599796679094,
+      "grad_norm": 20.995872497558594,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 18.5098,
+      "step": 33
+    },
+    {
+      "epoch": 0.04608607251779058,
+      "grad_norm": 33.625465393066406,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 17.3266,
+      "step": 34
+    },
+    {
+      "epoch": 0.047441545238902066,
+      "grad_norm": 16.21245574951172,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 17.6345,
+      "step": 35
+    },
+    {
+      "epoch": 0.04879701796001355,
+      "grad_norm": 16.550430297851562,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 16.328,
+      "step": 36
+    },
+    {
+      "epoch": 0.05015249068112504,
+      "grad_norm": 19.983448028564453,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 18.239,
+      "step": 37
+    },
+    {
+      "epoch": 0.05150796340223653,
+      "grad_norm": 20.869291305541992,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 19.6876,
+      "step": 38
+    },
+    {
+      "epoch": 0.05286343612334802,
+      "grad_norm": 18.964210510253906,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 16.477,
+      "step": 39
+    },
+    {
+      "epoch": 0.054218908844459505,
+      "grad_norm": 15.887338638305664,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 16.8681,
+      "step": 40
+    },
+    {
+      "epoch": 0.05557438156557099,
+      "grad_norm": 16.398021697998047,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 15.5168,
+      "step": 41
+    },
+    {
+      "epoch": 0.05692985428668248,
+      "grad_norm": 23.358903884887695,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 17.0795,
+      "step": 42
+    },
+    {
+      "epoch": 0.058285327007793965,
+      "grad_norm": 27.82067108154297,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 17.604,
+      "step": 43
+    },
+    {
+      "epoch": 0.05964079972890546,
+      "grad_norm": 21.790048599243164,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 17.5052,
+      "step": 44
+    },
+    {
+      "epoch": 0.060996272450016945,
+      "grad_norm": 21.848608016967773,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 17.0257,
+      "step": 45
+    },
+    {
+      "epoch": 0.06235174517112843,
+      "grad_norm": 15.1517333984375,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 16.9079,
+      "step": 46
+    },
+    {
+      "epoch": 0.06370721789223992,
+      "grad_norm": 14.538222312927246,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 15.9922,
+      "step": 47
+    },
+    {
+      "epoch": 0.0650626906133514,
+      "grad_norm": 16.66765785217285,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 17.3769,
+      "step": 48
+    },
+    {
+      "epoch": 0.06641816333446289,
+      "grad_norm": 18.048357009887695,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 16.0147,
+      "step": 49
+    },
+    {
+      "epoch": 0.06777363605557438,
+      "grad_norm": 19.516748428344727,
+      "learning_rate": 0.0,
+      "loss": 18.572,
+      "step": 50
+    },
+    {
+      "epoch": 0.06777363605557438,
+      "eval_loss": 1.0168567895889282,
+      "eval_runtime": 213.7505,
+      "eval_samples_per_second": 23.251,
+      "eval_steps_per_second": 2.91,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.259156315439104e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null