Training in progress, step 81, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +221 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2710a59fecf03a8a8d2164189f7b5a61a2aaa858533a590caaca196180c2dbf
 size 838906392

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad7089998671b2828991c7ebc54ddec960dd2311ec675a49fb8dbba18ee0c8b1
 size 838906392

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c7a26fdc4f8ef2a4f46c666afe97085b7025cfb84583f859890f6df4e2a4475
 size 1677991354

 version https://git-lfs.github.com/spec/v1
+oid sha256:1320dcafbe36ee032fc41a1f4c1e8baa2eccb06ec41471b2a75409fe051013c4
 size 1677991354

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb81f89b68366da380b1091c30c5c0ac19c1bdb0cd3f72af8a1d8afc75486321
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:96d3c925b28227b95a8ac44434affb41453bdddd6810333936ee7941a29ba2b8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbfd42597cb17dd5953648a2f02c05c3e9ff1d1ccc350bee78679ee3cb0d031a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:add675dc265850f22b56e2052f74b9f71b110b1c0f63daf3a4fb0e1b958e5b2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.9139928817749023,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 1.8785046728971961,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,223 @@
       "eval_samples_per_second": 9.134,
       "eval_steps_per_second": 4.669,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +613,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.27296891518976e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.9139928817749023,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.05607476635514,
   "eval_steps": 50,
+  "global_step": 81,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.134,
       "eval_steps_per_second": 4.669,
       "step": 50
+    },
+    {
+      "epoch": 1.9158878504672896,
+      "grad_norm": 3.224144220352173,
+      "learning_rate": 6.368314950360415e-05,
+      "loss": 3.6958,
+      "step": 51
+    },
+    {
+      "epoch": 1.953271028037383,
+      "grad_norm": 3.0531082153320312,
+      "learning_rate": 6.069665416032487e-05,
+      "loss": 2.5114,
+      "step": 52
+    },
+    {
+      "epoch": 1.9906542056074765,
+      "grad_norm": 3.5681068897247314,
+      "learning_rate": 5.7669582743934284e-05,
+      "loss": 3.0081,
+      "step": 53
+    },
+    {
+      "epoch": 2.0373831775700935,
+      "grad_norm": 2.371832847595215,
+      "learning_rate": 5.4613417973165106e-05,
+      "loss": 3.1453,
+      "step": 54
+    },
+    {
+      "epoch": 2.074766355140187,
+      "grad_norm": 2.573709011077881,
+      "learning_rate": 5.153975292780853e-05,
+      "loss": 2.6346,
+      "step": 55
+    },
+    {
+      "epoch": 2.1121495327102804,
+      "grad_norm": 2.663743019104004,
+      "learning_rate": 4.8460247072191496e-05,
+      "loss": 1.8031,
+      "step": 56
+    },
+    {
+      "epoch": 2.149532710280374,
+      "grad_norm": 2.9047420024871826,
+      "learning_rate": 4.5386582026834906e-05,
+      "loss": 1.7328,
+      "step": 57
+    },
+    {
+      "epoch": 2.1869158878504673,
+      "grad_norm": 3.729478359222412,
+      "learning_rate": 4.233041725606572e-05,
+      "loss": 2.3163,
+      "step": 58
+    },
+    {
+      "epoch": 2.2242990654205608,
+      "grad_norm": 4.952083587646484,
+      "learning_rate": 3.930334583967514e-05,
+      "loss": 2.5764,
+      "step": 59
+    },
+    {
+      "epoch": 2.2616822429906542,
+      "grad_norm": 4.32457971572876,
+      "learning_rate": 3.631685049639586e-05,
+      "loss": 3.2292,
+      "step": 60
+    },
+    {
+      "epoch": 2.2990654205607477,
+      "grad_norm": 4.138461112976074,
+      "learning_rate": 3.338226002601703e-05,
+      "loss": 2.4756,
+      "step": 61
+    },
+    {
+      "epoch": 2.336448598130841,
+      "grad_norm": 4.143182277679443,
+      "learning_rate": 3.0510706335366035e-05,
+      "loss": 1.8892,
+      "step": 62
+    },
+    {
+      "epoch": 2.3738317757009346,
+      "grad_norm": 4.396746635437012,
+      "learning_rate": 2.771308221117309e-05,
+      "loss": 1.9185,
+      "step": 63
+    },
+    {
+      "epoch": 2.411214953271028,
+      "grad_norm": 4.355361461639404,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 2.1326,
+      "step": 64
+    },
+    {
+      "epoch": 2.4485981308411215,
+      "grad_norm": 4.803137302398682,
+      "learning_rate": 2.238175135197471e-05,
+      "loss": 2.3713,
+      "step": 65
+    },
+    {
+      "epoch": 2.485981308411215,
+      "grad_norm": 3.864950656890869,
+      "learning_rate": 1.9868268181037185e-05,
+      "loss": 2.9682,
+      "step": 66
+    },
+    {
+      "epoch": 2.5233644859813085,
+      "grad_norm": 3.767958641052246,
+      "learning_rate": 1.746908498978791e-05,
+      "loss": 2.2154,
+      "step": 67
+    },
+    {
+      "epoch": 2.560747663551402,
+      "grad_norm": 3.542163133621216,
+      "learning_rate": 1.5193302701853673e-05,
+      "loss": 1.3826,
+      "step": 68
+    },
+    {
+      "epoch": 2.5981308411214954,
+      "grad_norm": 4.084665775299072,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 1.9291,
+      "step": 69
+    },
+    {
+      "epoch": 2.635514018691589,
+      "grad_norm": 3.9961540699005127,
+      "learning_rate": 1.1045971273716477e-05,
+      "loss": 1.8701,
+      "step": 70
+    },
+    {
+      "epoch": 2.6728971962616823,
+      "grad_norm": 4.667150020599365,
+      "learning_rate": 9.190154382188921e-06,
+      "loss": 2.4681,
+      "step": 71
+    },
+    {
+      "epoch": 2.710280373831776,
+      "grad_norm": 3.48500919342041,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 2.9565,
+      "step": 72
+    },
+    {
+      "epoch": 2.7476635514018692,
+      "grad_norm": 3.8385698795318604,
+      "learning_rate": 5.949390285710776e-06,
+      "loss": 2.269,
+      "step": 73
+    },
+    {
+      "epoch": 2.7850467289719627,
+      "grad_norm": 3.418334484100342,
+      "learning_rate": 4.576736409023813e-06,
+      "loss": 1.5572,
+      "step": 74
+    },
+    {
+      "epoch": 2.822429906542056,
+      "grad_norm": 3.6133151054382324,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 1.511,
+      "step": 75
+    },
+    {
+      "epoch": 2.8598130841121496,
+      "grad_norm": 4.219501972198486,
+      "learning_rate": 2.3528999786421756e-06,
+      "loss": 1.7603,
+      "step": 76
+    },
+    {
+      "epoch": 2.897196261682243,
+      "grad_norm": 4.413926601409912,
+      "learning_rate": 1.5101531982495308e-06,
+      "loss": 2.251,
+      "step": 77
+    },
+    {
+      "epoch": 2.9345794392523366,
+      "grad_norm": 3.3239150047302246,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 2.2245,
+      "step": 78
+    },
+    {
+      "epoch": 2.97196261682243,
+      "grad_norm": 3.768557548522949,
+      "learning_rate": 3.7897451640321323e-07,
+      "loss": 1.7201,
+      "step": 79
+    },
+    {
+      "epoch": 3.0186915887850465,
+      "grad_norm": 3.955902099609375,
+      "learning_rate": 9.483356314779479e-08,
+      "loss": 2.6022,
+      "step": 80
+    },
+    {
+      "epoch": 3.05607476635514,
+      "grad_norm": 3.3533103466033936,
+      "learning_rate": 0.0,
+      "loss": 2.3736,
+      "step": 81
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.0622096426074112e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null