Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37e7b647d859d3849ac373620500fcf55eebba75c407dab7260f3028b4cc5cc5
 size 201353800

 version https://git-lfs.github.com/spec/v1
+oid sha256:27bc9137bbd05894c2db07ba0927c68d75ed7f1d3ff0a61e8c1717a42f3f6c62
 size 201353800

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:867b971847038790d9f1accdba44716e88b3f8cbfe32780b5b9b141eda7c5fd4
 size 102462970

 version https://git-lfs.github.com/spec/v1
+oid sha256:f50b0581675226f3f924169da995fc914aa12ab00149c55b36cb8d38e7f8d1d8
 size 102462970

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e623de6c59ba19bca937a15f2dd1f7ccb1e661ad663bee9e609b15eb57d4c566
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d5af75c8ab623984d5ec565d782de8ee7c3f693ee1d42dcb83b7b5cc60b59a5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:191b4f74b8892fe464b31b446bc6f50032359ce22cb38236d5fdccf47f27920e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9abccd3ade815397c3a4e9cae178fd4a326a690915052661d8621974d592484a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7111726403236389,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.6342494714587738,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 31.941,
       "eval_steps_per_second": 7.985,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9281870600601600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6873600482940674,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.8456659619450317,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.941,
       "eval_steps_per_second": 7.985,
       "step": 150
+    },
+    {
+      "epoch": 0.638477801268499,
+      "grad_norm": 3.5591983795166016,
+      "learning_rate": 2.583589473684211e-05,
+      "loss": 1.6012,
+      "step": 151
+    },
+    {
+      "epoch": 0.642706131078224,
+      "grad_norm": 4.615307331085205,
+      "learning_rate": 2.530863157894737e-05,
+      "loss": 1.7284,
+      "step": 152
+    },
+    {
+      "epoch": 0.6469344608879493,
+      "grad_norm": 3.838639736175537,
+      "learning_rate": 2.4781368421052633e-05,
+      "loss": 1.5207,
+      "step": 153
+    },
+    {
+      "epoch": 0.6511627906976745,
+      "grad_norm": 3.7380716800689697,
+      "learning_rate": 2.4254105263157896e-05,
+      "loss": 1.2993,
+      "step": 154
+    },
+    {
+      "epoch": 0.6553911205073996,
+      "grad_norm": 3.57745099067688,
+      "learning_rate": 2.372684210526316e-05,
+      "loss": 1.591,
+      "step": 155
+    },
+    {
+      "epoch": 0.6596194503171248,
+      "grad_norm": 3.3314781188964844,
+      "learning_rate": 2.3199578947368422e-05,
+      "loss": 1.3037,
+      "step": 156
+    },
+    {
+      "epoch": 0.6638477801268499,
+      "grad_norm": 4.123661994934082,
+      "learning_rate": 2.2672315789473688e-05,
+      "loss": 1.8206,
+      "step": 157
+    },
+    {
+      "epoch": 0.6680761099365751,
+      "grad_norm": 3.4061641693115234,
+      "learning_rate": 2.2145052631578948e-05,
+      "loss": 1.4921,
+      "step": 158
+    },
+    {
+      "epoch": 0.6723044397463002,
+      "grad_norm": 3.731978178024292,
+      "learning_rate": 2.1617789473684214e-05,
+      "loss": 1.369,
+      "step": 159
+    },
+    {
+      "epoch": 0.6765327695560254,
+      "grad_norm": 3.8133912086486816,
+      "learning_rate": 2.1090526315789473e-05,
+      "loss": 1.4535,
+      "step": 160
+    },
+    {
+      "epoch": 0.6807610993657506,
+      "grad_norm": 3.694464921951294,
+      "learning_rate": 2.056326315789474e-05,
+      "loss": 1.4335,
+      "step": 161
+    },
+    {
+      "epoch": 0.6849894291754757,
+      "grad_norm": 3.9114949703216553,
+      "learning_rate": 2.0036000000000003e-05,
+      "loss": 1.4116,
+      "step": 162
+    },
+    {
+      "epoch": 0.6892177589852009,
+      "grad_norm": 3.615243673324585,
+      "learning_rate": 1.9508736842105266e-05,
+      "loss": 1.2621,
+      "step": 163
+    },
+    {
+      "epoch": 0.693446088794926,
+      "grad_norm": 3.213036298751831,
+      "learning_rate": 1.898147368421053e-05,
+      "loss": 1.3186,
+      "step": 164
+    },
+    {
+      "epoch": 0.6976744186046512,
+      "grad_norm": 3.6769354343414307,
+      "learning_rate": 1.8454210526315788e-05,
+      "loss": 1.4549,
+      "step": 165
+    },
+    {
+      "epoch": 0.7019027484143763,
+      "grad_norm": 3.4479939937591553,
+      "learning_rate": 1.7926947368421054e-05,
+      "loss": 1.4865,
+      "step": 166
+    },
+    {
+      "epoch": 0.7061310782241015,
+      "grad_norm": 3.636308431625366,
+      "learning_rate": 1.7399684210526317e-05,
+      "loss": 1.5404,
+      "step": 167
+    },
+    {
+      "epoch": 0.7103594080338267,
+      "grad_norm": 3.361964464187622,
+      "learning_rate": 1.687242105263158e-05,
+      "loss": 1.3839,
+      "step": 168
+    },
+    {
+      "epoch": 0.7145877378435518,
+      "grad_norm": 4.165847301483154,
+      "learning_rate": 1.6345157894736843e-05,
+      "loss": 1.3019,
+      "step": 169
+    },
+    {
+      "epoch": 0.718816067653277,
+      "grad_norm": 3.3849713802337646,
+      "learning_rate": 1.5817894736842106e-05,
+      "loss": 1.1796,
+      "step": 170
+    },
+    {
+      "epoch": 0.7230443974630021,
+      "grad_norm": 3.7786877155303955,
+      "learning_rate": 1.529063157894737e-05,
+      "loss": 1.5935,
+      "step": 171
+    },
+    {
+      "epoch": 0.7272727272727273,
+      "grad_norm": 3.401390790939331,
+      "learning_rate": 1.4763368421052632e-05,
+      "loss": 1.3369,
+      "step": 172
+    },
+    {
+      "epoch": 0.7315010570824524,
+      "grad_norm": 3.4565701484680176,
+      "learning_rate": 1.4236105263157895e-05,
+      "loss": 1.5234,
+      "step": 173
+    },
+    {
+      "epoch": 0.7357293868921776,
+      "grad_norm": 4.138101577758789,
+      "learning_rate": 1.370884210526316e-05,
+      "loss": 1.073,
+      "step": 174
+    },
+    {
+      "epoch": 0.7399577167019028,
+      "grad_norm": 4.212649345397949,
+      "learning_rate": 1.318157894736842e-05,
+      "loss": 1.5394,
+      "step": 175
+    },
+    {
+      "epoch": 0.7441860465116279,
+      "grad_norm": 3.4195027351379395,
+      "learning_rate": 1.2654315789473685e-05,
+      "loss": 0.9755,
+      "step": 176
+    },
+    {
+      "epoch": 0.7484143763213531,
+      "grad_norm": 3.423142194747925,
+      "learning_rate": 1.2127052631578948e-05,
+      "loss": 1.2978,
+      "step": 177
+    },
+    {
+      "epoch": 0.7526427061310782,
+      "grad_norm": 3.583743095397949,
+      "learning_rate": 1.1599789473684211e-05,
+      "loss": 1.5688,
+      "step": 178
+    },
+    {
+      "epoch": 0.7568710359408034,
+      "grad_norm": 4.3834733963012695,
+      "learning_rate": 1.1072526315789474e-05,
+      "loss": 1.8038,
+      "step": 179
+    },
+    {
+      "epoch": 0.7610993657505285,
+      "grad_norm": 2.9961256980895996,
+      "learning_rate": 1.0545263157894737e-05,
+      "loss": 1.1278,
+      "step": 180
+    },
+    {
+      "epoch": 0.7653276955602537,
+      "grad_norm": 3.474705219268799,
+      "learning_rate": 1.0018000000000001e-05,
+      "loss": 1.236,
+      "step": 181
+    },
+    {
+      "epoch": 0.7695560253699789,
+      "grad_norm": 3.186950445175171,
+      "learning_rate": 9.490736842105264e-06,
+      "loss": 0.9363,
+      "step": 182
+    },
+    {
+      "epoch": 0.773784355179704,
+      "grad_norm": 3.7591898441314697,
+      "learning_rate": 8.963473684210527e-06,
+      "loss": 1.3784,
+      "step": 183
+    },
+    {
+      "epoch": 0.7780126849894292,
+      "grad_norm": 4.514182090759277,
+      "learning_rate": 8.43621052631579e-06,
+      "loss": 1.4594,
+      "step": 184
+    },
+    {
+      "epoch": 0.7822410147991543,
+      "grad_norm": 3.459716558456421,
+      "learning_rate": 7.908947368421053e-06,
+      "loss": 0.9057,
+      "step": 185
+    },
+    {
+      "epoch": 0.7864693446088795,
+      "grad_norm": 3.6311416625976562,
+      "learning_rate": 7.381684210526316e-06,
+      "loss": 1.2705,
+      "step": 186
+    },
+    {
+      "epoch": 0.7906976744186046,
+      "grad_norm": 3.554105281829834,
+      "learning_rate": 6.85442105263158e-06,
+      "loss": 1.2201,
+      "step": 187
+    },
+    {
+      "epoch": 0.7949260042283298,
+      "grad_norm": 4.5764641761779785,
+      "learning_rate": 6.3271578947368425e-06,
+      "loss": 0.9699,
+      "step": 188
+    },
+    {
+      "epoch": 0.7991543340380549,
+      "grad_norm": 4.100885391235352,
+      "learning_rate": 5.7998947368421054e-06,
+      "loss": 1.4052,
+      "step": 189
+    },
+    {
+      "epoch": 0.8033826638477801,
+      "grad_norm": 3.4294564723968506,
+      "learning_rate": 5.272631578947368e-06,
+      "loss": 1.063,
+      "step": 190
+    },
+    {
+      "epoch": 0.8076109936575053,
+      "grad_norm": 3.067899703979492,
+      "learning_rate": 4.745368421052632e-06,
+      "loss": 0.9873,
+      "step": 191
+    },
+    {
+      "epoch": 0.8118393234672304,
+      "grad_norm": 3.446835517883301,
+      "learning_rate": 4.218105263157895e-06,
+      "loss": 1.095,
+      "step": 192
+    },
+    {
+      "epoch": 0.8160676532769556,
+      "grad_norm": 3.158550262451172,
+      "learning_rate": 3.690842105263158e-06,
+      "loss": 1.0896,
+      "step": 193
+    },
+    {
+      "epoch": 0.8202959830866807,
+      "grad_norm": 3.8455758094787598,
+      "learning_rate": 3.1635789473684213e-06,
+      "loss": 1.5648,
+      "step": 194
+    },
+    {
+      "epoch": 0.8245243128964059,
+      "grad_norm": 3.813826084136963,
+      "learning_rate": 2.636315789473684e-06,
+      "loss": 1.0978,
+      "step": 195
+    },
+    {
+      "epoch": 0.828752642706131,
+      "grad_norm": 3.965871810913086,
+      "learning_rate": 2.1090526315789475e-06,
+      "loss": 1.5023,
+      "step": 196
+    },
+    {
+      "epoch": 0.8329809725158562,
+      "grad_norm": 3.1118109226226807,
+      "learning_rate": 1.5817894736842106e-06,
+      "loss": 0.8994,
+      "step": 197
+    },
+    {
+      "epoch": 0.8372093023255814,
+      "grad_norm": 3.4134104251861572,
+      "learning_rate": 1.0545263157894738e-06,
+      "loss": 1.2807,
+      "step": 198
+    },
+    {
+      "epoch": 0.8414376321353065,
+      "grad_norm": 4.661491870880127,
+      "learning_rate": 5.272631578947369e-07,
+      "loss": 1.5665,
+      "step": 199
+    },
+    {
+      "epoch": 0.8456659619450317,
+      "grad_norm": 3.9415533542633057,
+      "learning_rate": 0.0,
+      "loss": 1.316,
+      "step": 200
+    },
+    {
+      "epoch": 0.8456659619450317,
+      "eval_loss": 0.6873600482940674,
+      "eval_runtime": 3.1311,
+      "eval_samples_per_second": 31.938,
+      "eval_steps_per_second": 7.984,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.23758274674688e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null