Training in progress, step 141, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +291 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3a07bb6a35ea14e1ed69c3c2d77fbf0664d1804e0dc1874fe82dd47e43cda05
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:aeb20191a6c2b119231d7678dd617bc707711a23a1bb9b64cf760c7de0b71218
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d656b3e2fae6d74b6b99b062276209667100e0e05d05a48c76898b9843f72c57
 size 198011252

 version https://git-lfs.github.com/spec/v1
+oid sha256:c06718b376d024d256e2c7914d486bd8d83ea7b66f2817ccdc19466a5d4484cc
 size 198011252

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63add0586146b851f47d881f8a2c86d2e7bbd1031b34991ea727eda767e3ba6f
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9faa80e895f2ee441f65233c3b9d99d52a69600429d38bbddd6eead4f9c541b
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8187a6c88d7933f4442806aa816104ffa2abe40157d1b5941b707067b0f91484
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ef808943930759c29df7d6cb43c7ce42f18630028d7057fa5027d9913fbda00
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:428d17df5bbcb9c5bba8928ed07c3ac1e2473387421762fc2dd2a18dad7163cb
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:400862bf97811c06d931cbea4873b2c4f76e79cd2ca67b0903bcbae23651c690
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10e76a88b495596517a8c5e8dd0c9cf73e2fa8d302cb53089a3a0a19398d0705
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c9ee43196afab265d60dcd4c1c900179ab22f775dc75282af2f09c2068eaf05
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e07c2149652b1186dc91873797838ca755d72668f5e0d5315a9746f87efcc58
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2bc9793c1138e05078b265d5eaf89968ed1a8a6e30d8d2e6c8cd400956edb28
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea6c246d355698e0da22b783baa4cd562f3bf8472a265e83f06d63d516cc95c7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d6e96260fd16c14c9f9f12f3b8f9c8c35af2268f7c6f573e7123e9610bf0097
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55aa132a1f6a464d42b97932e211287aa777c051c2c25ed9e36ac7ddda94bf95
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad3f1ce05ff8747c562da4063aca1d62d228d34ae20324fd91268b964a6f1fd5
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15a7d871d8fc1dcb9a693c26cb0b989af2ab985ab337aecb1daa8032f9df0a10
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:aec81ebc5fec1f650ab3e9c5c8e2325a8fbc2b8c2c20649882b77b60f6d6de60
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b323f2cb8c959ffbaf90b97d17df7a3447ef35ce5770b54457987ada42ec67f4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7b8e2b43f384d74d38718a5b54e296fe62eb266007d5519e2bb4f49d875fe13
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.9264618754386902,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 2.127659574468085,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,293 @@
       "eval_samples_per_second": 168.787,
       "eval_steps_per_second": 5.333,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -754,12 +1041,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.586475853185024e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.9264618754386902,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.0,
   "eval_steps": 50,
+  "global_step": 141,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 168.787,
       "eval_steps_per_second": 5.333,
       "step": 100
+    },
+    {
+      "epoch": 2.148936170212766,
+      "grad_norm": 0.26321274042129517,
+      "learning_rate": 2.384916843344419e-05,
+      "loss": 0.2123,
+      "step": 101
+    },
+    {
+      "epoch": 2.1702127659574466,
+      "grad_norm": 0.36982113122940063,
+      "learning_rate": 2.275893467018154e-05,
+      "loss": 0.7511,
+      "step": 102
+    },
+    {
+      "epoch": 2.1914893617021276,
+      "grad_norm": 0.4143376648426056,
+      "learning_rate": 2.1687817534238292e-05,
+      "loss": 1.2891,
+      "step": 103
+    },
+    {
+      "epoch": 2.2127659574468086,
+      "grad_norm": 0.3768226206302643,
+      "learning_rate": 2.0636433015111154e-05,
+      "loss": 1.127,
+      "step": 104
+    },
+    {
+      "epoch": 2.2340425531914896,
+      "grad_norm": 0.3875614106655121,
+      "learning_rate": 1.9605385754252593e-05,
+      "loss": 0.9696,
+      "step": 105
+    },
+    {
+      "epoch": 2.25531914893617,
+      "grad_norm": 0.4567450284957886,
+      "learning_rate": 1.8595268697347047e-05,
+      "loss": 0.8626,
+      "step": 106
+    },
+    {
+      "epoch": 2.276595744680851,
+      "grad_norm": 0.43263593316078186,
+      "learning_rate": 1.76066627533135e-05,
+      "loss": 0.417,
+      "step": 107
+    },
+    {
+      "epoch": 2.297872340425532,
+      "grad_norm": 0.2629643380641937,
+      "learning_rate": 1.664013646023009e-05,
+      "loss": 0.3379,
+      "step": 108
+    },
+    {
+      "epoch": 2.3191489361702127,
+      "grad_norm": 0.47793322801589966,
+      "learning_rate": 1.5696245658373157e-05,
+      "loss": 1.1889,
+      "step": 109
+    },
+    {
+      "epoch": 2.3404255319148937,
+      "grad_norm": 0.4138708710670471,
+      "learning_rate": 1.4775533170558723e-05,
+      "loss": 1.1401,
+      "step": 110
+    },
+    {
+      "epoch": 2.3617021276595747,
+      "grad_norm": 0.4172796905040741,
+      "learning_rate": 1.3878528489970085e-05,
+      "loss": 1.0396,
+      "step": 111
+    },
+    {
+      "epoch": 2.382978723404255,
+      "grad_norm": 0.4409525692462921,
+      "learning_rate": 1.3005747475651238e-05,
+      "loss": 0.9267,
+      "step": 112
+    },
+    {
+      "epoch": 2.404255319148936,
+      "grad_norm": 0.49562060832977295,
+      "learning_rate": 1.2157692055841128e-05,
+      "loss": 0.6103,
+      "step": 113
+    },
+    {
+      "epoch": 2.425531914893617,
+      "grad_norm": 0.16902895271778107,
+      "learning_rate": 1.1334849939319436e-05,
+      "loss": 0.1601,
+      "step": 114
+    },
+    {
+      "epoch": 2.4468085106382977,
+      "grad_norm": 0.5262559652328491,
+      "learning_rate": 1.0537694334929756e-05,
+      "loss": 1.2164,
+      "step": 115
+    },
+    {
+      "epoch": 2.4680851063829787,
+      "grad_norm": 0.45780274271965027,
+      "learning_rate": 9.766683679441566e-06,
+      "loss": 1.2017,
+      "step": 116
+    },
+    {
+      "epoch": 2.4893617021276597,
+      "grad_norm": 0.4124818444252014,
+      "learning_rate": 9.022261373907599e-06,
+      "loss": 1.0688,
+      "step": 117
+    },
+    {
+      "epoch": 2.5106382978723403,
+      "grad_norm": 0.41770249605178833,
+      "learning_rate": 8.304855528667915e-06,
+      "loss": 0.893,
+      "step": 118
+    },
+    {
+      "epoch": 2.5319148936170213,
+      "grad_norm": 0.5359745025634766,
+      "learning_rate": 7.614878717147731e-06,
+      "loss": 0.734,
+      "step": 119
+    },
+    {
+      "epoch": 2.5531914893617023,
+      "grad_norm": 0.12150562554597855,
+      "learning_rate": 6.952727738590198e-06,
+      "loss": 0.0908,
+      "step": 120
+    },
+    {
+      "epoch": 2.574468085106383,
+      "grad_norm": 0.45060330629348755,
+      "learning_rate": 6.318783389860888e-06,
+      "loss": 0.9969,
+      "step": 121
+    },
+    {
+      "epoch": 2.595744680851064,
+      "grad_norm": 0.4552464485168457,
+      "learning_rate": 5.7134102464550925e-06,
+      "loss": 1.2133,
+      "step": 122
+    },
+    {
+      "epoch": 2.617021276595745,
+      "grad_norm": 0.41424861550331116,
+      "learning_rate": 5.136956452833776e-06,
+      "loss": 1.0531,
+      "step": 123
+    },
+    {
+      "epoch": 2.6382978723404253,
+      "grad_norm": 0.4196318984031677,
+      "learning_rate": 4.589753522209003e-06,
+      "loss": 0.9811,
+      "step": 124
+    },
+    {
+      "epoch": 2.6595744680851063,
+      "grad_norm": 0.4966506063938141,
+      "learning_rate": 4.072116145893723e-06,
+      "loss": 0.8532,
+      "step": 125
+    },
+    {
+      "epoch": 2.6808510638297873,
+      "grad_norm": 0.2934734523296356,
+      "learning_rate": 3.584342012325771e-06,
+      "loss": 0.1873,
+      "step": 126
+    },
+    {
+      "epoch": 2.702127659574468,
+      "grad_norm": 0.3809848725795746,
+      "learning_rate": 3.126711635869966e-06,
+      "loss": 0.6348,
+      "step": 127
+    },
+    {
+      "epoch": 2.723404255319149,
+      "grad_norm": 0.463334858417511,
+      "learning_rate": 2.699488195496971e-06,
+      "loss": 1.2586,
+      "step": 128
+    },
+    {
+      "epoch": 2.74468085106383,
+      "grad_norm": 0.43426749110221863,
+      "learning_rate": 2.3029173834314634e-06,
+      "loss": 1.1442,
+      "step": 129
+    },
+    {
+      "epoch": 2.7659574468085104,
+      "grad_norm": 0.4165303111076355,
+      "learning_rate": 1.9372272638568494e-06,
+      "loss": 1.0423,
+      "step": 130
+    },
+    {
+      "epoch": 2.7872340425531914,
+      "grad_norm": 0.48008161783218384,
+      "learning_rate": 1.6026281417576689e-06,
+      "loss": 0.8429,
+      "step": 131
+    },
+    {
+      "epoch": 2.8085106382978724,
+      "grad_norm": 0.45738592743873596,
+      "learning_rate": 1.299312441975153e-06,
+      "loss": 0.3767,
+      "step": 132
+    },
+    {
+      "epoch": 2.829787234042553,
+      "grad_norm": 0.350779265165329,
+      "learning_rate": 1.0274545985455078e-06,
+      "loss": 0.5226,
+      "step": 133
+    },
+    {
+      "epoch": 2.851063829787234,
+      "grad_norm": 0.49393415451049805,
+      "learning_rate": 7.872109543844799e-07,
+      "loss": 1.2395,
+      "step": 134
+    },
+    {
+      "epoch": 2.872340425531915,
+      "grad_norm": 0.4229400157928467,
+      "learning_rate": 5.787196713760618e-07,
+      "loss": 1.1422,
+      "step": 135
+    },
+    {
+      "epoch": 2.8936170212765955,
+      "grad_norm": 0.42691662907600403,
+      "learning_rate": 4.021006509168048e-07,
+      "loss": 1.0496,
+      "step": 136
+    },
+    {
+      "epoch": 2.9148936170212765,
+      "grad_norm": 0.4528738260269165,
+      "learning_rate": 2.574554649617209e-07,
+      "loss": 0.905,
+      "step": 137
+    },
+    {
+      "epoch": 2.9361702127659575,
+      "grad_norm": 0.4881468713283539,
+      "learning_rate": 1.4486729761113447e-07,
+      "loss": 0.5696,
+      "step": 138
+    },
+    {
+      "epoch": 2.9574468085106385,
+      "grad_norm": 0.412494957447052,
+      "learning_rate": 6.440089727230269e-08,
+      "loss": 0.7729,
+      "step": 139
+    },
+    {
+      "epoch": 2.978723404255319,
+      "grad_norm": 0.4168694317340851,
+      "learning_rate": 1.6102539423217266e-08,
+      "loss": 1.1272,
+      "step": 140
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.48817628622055054,
+      "learning_rate": 0.0,
+      "loss": 0.8255,
+      "step": 141
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.466930952990884e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null