Training in progress, step 150, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e81e7554f6a4e8dc1d6637458ad111412135558d4fa3da36246a6ce3a7f3582
 size 166182480

 version https://git-lfs.github.com/spec/v1
+oid sha256:133647ff75619b2ebf4b08b1907148f54f64d50a6540006b8b0cfd3d268e67a7
 size 166182480

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a24c4ba4fe6d4ef54df173ae57cdaa451bf814e7c0ccd1eee98ff9c9b08604d
 size 332574358

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b780e0bdd236c892f8c7d0c7d4afe550217aba06b3560dacba4ce990ecece4b
 size 332574358

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb69463d7c0255de870de267315257b4ab055b07250b9bf3d5f50f6e91ab28b2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4d6427d6aa68c158ad80f76946f6fc44629c698f67492b8fb6d643dbc5eef98
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3187bc61197df87f0c3ee611adcb22372b70450b00d5a0c3b9ca0c3c6aa112eb
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2caf50116db0368fab40b4e34ae3b0c9f1a86e9da0bc7a8de5ff785b985711a
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b833491f3e58bff523508fa17e20e242e52c888acfebbf28b5bd968b816788b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a955dabcc60ffe276aa50f92c60f21389f3a28e5589c5c081037f1d66876c9a3
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4c19b9c696d677fbac8fe64d1e779bd08ec3d7393548934388271f17014c89f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1380d7d4f110826396b67714006bc18fde61d5c81581b3cc018f32e55772014
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b898900b04322cf6c4f019c4f4ba26a4fda854c76bcf2313072c064f0fd1f3cb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:148dc60fce7a98d209219ab65863631c40408a69d4537618751b3440fe762b40
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.0905191579222446e-06,
-  "best_model_checkpoint": "miner_id_24/checkpoint-125",
-  "epoch": 0.2761096155173604,
   "eval_steps": 25,
-  "global_step": 125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -930,6 +930,189 @@
       "eval_samples_per_second": 23.81,
       "eval_steps_per_second": 6.191,
       "step": 125
     }
   ],
   "logging_steps": 1,
@@ -958,7 +1141,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.0620223627264e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.842964704541373e-06,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.3313315386208325,
   "eval_steps": 25,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.81,
       "eval_steps_per_second": 6.191,
       "step": 125
+    },
+    {
+      "epoch": 0.27831849244149925,
+      "grad_norm": 0.0002063347928924486,
+      "learning_rate": 3.9148747843544495e-05,
+      "loss": 0.0,
+      "step": 126
+    },
+    {
+      "epoch": 0.28052736936563816,
+      "grad_norm": 0.00025974729214794934,
+      "learning_rate": 3.846178285323835e-05,
+      "loss": 0.0,
+      "step": 127
+    },
+    {
+      "epoch": 0.28273624628977706,
+      "grad_norm": 0.00024177682644221932,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 0.0,
+      "step": 128
+    },
+    {
+      "epoch": 0.2849451232139159,
+      "grad_norm": 0.0002588094212114811,
+      "learning_rate": 3.710131864628451e-05,
+      "loss": 0.0,
+      "step": 129
+    },
+    {
+      "epoch": 0.2871540001380548,
+      "grad_norm": 0.0002662424521986395,
+      "learning_rate": 3.642818365880224e-05,
+      "loss": 0.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.2893628770621937,
+      "grad_norm": 0.0004572535108309239,
+      "learning_rate": 3.576002079563732e-05,
+      "loss": 0.0,
+      "step": 131
+    },
+    {
+      "epoch": 0.29157175398633256,
+      "grad_norm": 0.00027820674586109817,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 0.0,
+      "step": 132
+    },
+    {
+      "epoch": 0.29378063091047146,
+      "grad_norm": 0.00010199982352787629,
+      "learning_rate": 3.443932559663107e-05,
+      "loss": 0.0,
+      "step": 133
+    },
+    {
+      "epoch": 0.29598950783461037,
+      "grad_norm": 0.00010178149386774749,
+      "learning_rate": 3.378714684283011e-05,
+      "loss": 0.0,
+      "step": 134
+    },
+    {
+      "epoch": 0.2981983847587492,
+      "grad_norm": 0.00010338863648939878,
+      "learning_rate": 3.31406472827647e-05,
+      "loss": 0.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.3004072616828881,
+      "grad_norm": 0.0001027277103275992,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 0.0,
+      "step": 136
+    },
+    {
+      "epoch": 0.30261613860702696,
+      "grad_norm": 0.00010317438864149153,
+      "learning_rate": 3.186537651130503e-05,
+      "loss": 0.0,
+      "step": 137
+    },
+    {
+      "epoch": 0.30482501553116587,
+      "grad_norm": 0.00016677897656336427,
+      "learning_rate": 3.123694672073344e-05,
+      "loss": 0.0,
+      "step": 138
+    },
+    {
+      "epoch": 0.30703389245530477,
+      "grad_norm": 0.00022237653320189565,
+      "learning_rate": 3.061487887413619e-05,
+      "loss": 0.0,
+      "step": 139
+    },
+    {
+      "epoch": 0.3092427693794436,
+      "grad_norm": 0.00023152329958975315,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 0.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.3114516463035825,
+      "grad_norm": 0.00022528883710037917,
+      "learning_rate": 2.9390493435449572e-05,
+      "loss": 0.0,
+      "step": 141
+    },
+    {
+      "epoch": 0.3136605232277214,
+      "grad_norm": 0.00024947745259851217,
+      "learning_rate": 2.8788503640948912e-05,
+      "loss": 0.0,
+      "step": 142
+    },
+    {
+      "epoch": 0.31586940015186027,
+      "grad_norm": 0.00043007542262785137,
+      "learning_rate": 2.8193531297840503e-05,
+      "loss": 0.0,
+      "step": 143
+    },
+    {
+      "epoch": 0.31807827707599917,
+      "grad_norm": 0.00035726267378777266,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 0.0,
+      "step": 144
+    },
+    {
+      "epoch": 0.3202871540001381,
+      "grad_norm": 9.570374095346779e-05,
+      "learning_rate": 2.702527419834653e-05,
+      "loss": 0.0,
+      "step": 145
+    },
+    {
+      "epoch": 0.3224960309242769,
+      "grad_norm": 9.726483403937891e-05,
+      "learning_rate": 2.645230221263596e-05,
+      "loss": 0.0,
+      "step": 146
+    },
+    {
+      "epoch": 0.3247049078484158,
+      "grad_norm": 9.980611503124237e-05,
+      "learning_rate": 2.5886973135931425e-05,
+      "loss": 0.0,
+      "step": 147
+    },
+    {
+      "epoch": 0.3269137847725547,
+      "grad_norm": 9.672047599451616e-05,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 0.0,
+      "step": 148
+    },
+    {
+      "epoch": 0.3291226616966936,
+      "grad_norm": 9.55902723944746e-05,
+      "learning_rate": 2.4779847031884175e-05,
+      "loss": 0.0,
+      "step": 149
+    },
+    {
+      "epoch": 0.3313315386208325,
+      "grad_norm": 9.5816605607979e-05,
+      "learning_rate": 2.423834640897079e-05,
+      "loss": 0.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.3313315386208325,
+      "eval_loss": 2.842964704541373e-06,
+      "eval_runtime": 2.1034,
+      "eval_samples_per_second": 23.771,
+      "eval_steps_per_second": 6.18,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.87442683527168e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null