Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b20350b6df1fc085ff6e6095466515857695825f5894f7fc63c4724681c288d2
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d943ab670d2ec02491e4ebe4f1f2ed01bfa629f3da4219180d49a36fa32e8b2
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f90a07ac8c3e45fbf8562cf4d3e39253616968979deb41aa0b7191252956add8
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:90df2dd65c3d8592c96c2fd7951235ae235e6e1012c064ef1d64e770c929963d
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:206b1a7321c484e02aa59caf49e2a72a659e69232403bf3a5d7a506abcc55a7b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a271bbcc82508aea764d98d0927295201f535afc9927e075a565c461e2f8b86
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:420c9c3b2300136316283bf07027d494a21b6b2c5dcebe052deac7037529e10e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c41bb450efe89fd65fecb2593939b817f9b2794a5d87a632ad90e5ebc9b592b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7083282470703125,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.005125051250512505,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 19.206,
       "eval_steps_per_second": 4.801,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.795134465245184e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.6530330181121826,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.01025010250102501,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.206,
       "eval_steps_per_second": 4.801,
       "step": 50
+    },
+    {
+      "epoch": 0.0052275522755227555,
+      "grad_norm": 0.7324660420417786,
+      "learning_rate": 7.849947368421052e-05,
+      "loss": 1.7833,
+      "step": 51
+    },
+    {
+      "epoch": 0.005330053300533005,
+      "grad_norm": 1.0079008340835571,
+      "learning_rate": 7.797263157894736e-05,
+      "loss": 2.0611,
+      "step": 52
+    },
+    {
+      "epoch": 0.005432554325543256,
+      "grad_norm": 1.0478756427764893,
+      "learning_rate": 7.744578947368421e-05,
+      "loss": 1.5655,
+      "step": 53
+    },
+    {
+      "epoch": 0.005535055350553505,
+      "grad_norm": 1.1489282846450806,
+      "learning_rate": 7.691894736842104e-05,
+      "loss": 2.0112,
+      "step": 54
+    },
+    {
+      "epoch": 0.005637556375563756,
+      "grad_norm": 1.0685741901397705,
+      "learning_rate": 7.63921052631579e-05,
+      "loss": 1.837,
+      "step": 55
+    },
+    {
+      "epoch": 0.005740057400574006,
+      "grad_norm": 0.9752334952354431,
+      "learning_rate": 7.586526315789473e-05,
+      "loss": 1.8408,
+      "step": 56
+    },
+    {
+      "epoch": 0.005842558425584256,
+      "grad_norm": 1.0988349914550781,
+      "learning_rate": 7.533842105263158e-05,
+      "loss": 1.6708,
+      "step": 57
+    },
+    {
+      "epoch": 0.005945059450594506,
+      "grad_norm": 1.059479832649231,
+      "learning_rate": 7.481157894736841e-05,
+      "loss": 1.4579,
+      "step": 58
+    },
+    {
+      "epoch": 0.006047560475604756,
+      "grad_norm": 1.0964906215667725,
+      "learning_rate": 7.428473684210526e-05,
+      "loss": 1.944,
+      "step": 59
+    },
+    {
+      "epoch": 0.006150061500615006,
+      "grad_norm": 1.0522820949554443,
+      "learning_rate": 7.375789473684209e-05,
+      "loss": 1.762,
+      "step": 60
+    },
+    {
+      "epoch": 0.0062525625256252566,
+      "grad_norm": 1.0768394470214844,
+      "learning_rate": 7.323105263157895e-05,
+      "loss": 1.7185,
+      "step": 61
+    },
+    {
+      "epoch": 0.006355063550635506,
+      "grad_norm": 1.1058900356292725,
+      "learning_rate": 7.270421052631578e-05,
+      "loss": 1.6855,
+      "step": 62
+    },
+    {
+      "epoch": 0.006457564575645757,
+      "grad_norm": 1.0793980360031128,
+      "learning_rate": 7.217736842105263e-05,
+      "loss": 1.6553,
+      "step": 63
+    },
+    {
+      "epoch": 0.006560065600656006,
+      "grad_norm": 1.257716417312622,
+      "learning_rate": 7.165052631578947e-05,
+      "loss": 1.5379,
+      "step": 64
+    },
+    {
+      "epoch": 0.006662566625666257,
+      "grad_norm": 1.116843581199646,
+      "learning_rate": 7.11236842105263e-05,
+      "loss": 1.6653,
+      "step": 65
+    },
+    {
+      "epoch": 0.006765067650676507,
+      "grad_norm": 1.016861081123352,
+      "learning_rate": 7.059684210526315e-05,
+      "loss": 1.3905,
+      "step": 66
+    },
+    {
+      "epoch": 0.006867568675686757,
+      "grad_norm": 1.1021226644515991,
+      "learning_rate": 7.006999999999998e-05,
+      "loss": 1.6295,
+      "step": 67
+    },
+    {
+      "epoch": 0.006970069700697007,
+      "grad_norm": 1.1197590827941895,
+      "learning_rate": 6.954315789473684e-05,
+      "loss": 1.6643,
+      "step": 68
+    },
+    {
+      "epoch": 0.007072570725707257,
+      "grad_norm": 1.242423415184021,
+      "learning_rate": 6.901631578947368e-05,
+      "loss": 1.7367,
+      "step": 69
+    },
+    {
+      "epoch": 0.007175071750717507,
+      "grad_norm": 1.1498782634735107,
+      "learning_rate": 6.848947368421052e-05,
+      "loss": 1.6476,
+      "step": 70
+    },
+    {
+      "epoch": 0.007277572775727758,
+      "grad_norm": 1.174621820449829,
+      "learning_rate": 6.796263157894737e-05,
+      "loss": 2.0002,
+      "step": 71
+    },
+    {
+      "epoch": 0.007380073800738007,
+      "grad_norm": 1.1680294275283813,
+      "learning_rate": 6.74357894736842e-05,
+      "loss": 1.4689,
+      "step": 72
+    },
+    {
+      "epoch": 0.007482574825748258,
+      "grad_norm": 1.3079551458358765,
+      "learning_rate": 6.690894736842105e-05,
+      "loss": 1.5757,
+      "step": 73
+    },
+    {
+      "epoch": 0.007585075850758507,
+      "grad_norm": 1.2481780052185059,
+      "learning_rate": 6.638210526315788e-05,
+      "loss": 1.6238,
+      "step": 74
+    },
+    {
+      "epoch": 0.007687576875768758,
+      "grad_norm": 1.3879331350326538,
+      "learning_rate": 6.585526315789474e-05,
+      "loss": 1.9082,
+      "step": 75
+    },
+    {
+      "epoch": 0.007790077900779008,
+      "grad_norm": 1.6329386234283447,
+      "learning_rate": 6.532842105263157e-05,
+      "loss": 1.739,
+      "step": 76
+    },
+    {
+      "epoch": 0.007892578925789259,
+      "grad_norm": 1.4674111604690552,
+      "learning_rate": 6.480157894736842e-05,
+      "loss": 1.6045,
+      "step": 77
+    },
+    {
+      "epoch": 0.007995079950799507,
+      "grad_norm": 1.4382115602493286,
+      "learning_rate": 6.427473684210526e-05,
+      "loss": 1.4885,
+      "step": 78
+    },
+    {
+      "epoch": 0.008097580975809758,
+      "grad_norm": 1.4571962356567383,
+      "learning_rate": 6.37478947368421e-05,
+      "loss": 1.3451,
+      "step": 79
+    },
+    {
+      "epoch": 0.008200082000820008,
+      "grad_norm": 1.6193132400512695,
+      "learning_rate": 6.322105263157894e-05,
+      "loss": 1.8344,
+      "step": 80
+    },
+    {
+      "epoch": 0.008302583025830259,
+      "grad_norm": 1.673788070678711,
+      "learning_rate": 6.269421052631577e-05,
+      "loss": 1.4983,
+      "step": 81
+    },
+    {
+      "epoch": 0.008405084050840509,
+      "grad_norm": 1.7791383266448975,
+      "learning_rate": 6.216736842105263e-05,
+      "loss": 1.8181,
+      "step": 82
+    },
+    {
+      "epoch": 0.008507585075850758,
+      "grad_norm": 1.6379783153533936,
+      "learning_rate": 6.164052631578947e-05,
+      "loss": 1.5085,
+      "step": 83
+    },
+    {
+      "epoch": 0.008610086100861008,
+      "grad_norm": 2.0746712684631348,
+      "learning_rate": 6.111368421052631e-05,
+      "loss": 1.8331,
+      "step": 84
+    },
+    {
+      "epoch": 0.008712587125871259,
+      "grad_norm": 1.9016090631484985,
+      "learning_rate": 6.058684210526315e-05,
+      "loss": 1.6955,
+      "step": 85
+    },
+    {
+      "epoch": 0.008815088150881509,
+      "grad_norm": 2.0374410152435303,
+      "learning_rate": 6.005999999999999e-05,
+      "loss": 1.6262,
+      "step": 86
+    },
+    {
+      "epoch": 0.00891758917589176,
+      "grad_norm": 2.458918809890747,
+      "learning_rate": 5.953315789473684e-05,
+      "loss": 1.782,
+      "step": 87
+    },
+    {
+      "epoch": 0.009020090200902008,
+      "grad_norm": 2.420497179031372,
+      "learning_rate": 5.9006315789473676e-05,
+      "loss": 1.4256,
+      "step": 88
+    },
+    {
+      "epoch": 0.009122591225912259,
+      "grad_norm": 3.1010406017303467,
+      "learning_rate": 5.847947368421053e-05,
+      "loss": 1.5861,
+      "step": 89
+    },
+    {
+      "epoch": 0.00922509225092251,
+      "grad_norm": 2.9462192058563232,
+      "learning_rate": 5.795263157894737e-05,
+      "loss": 1.7643,
+      "step": 90
+    },
+    {
+      "epoch": 0.00932759327593276,
+      "grad_norm": 3.0921950340270996,
+      "learning_rate": 5.742578947368421e-05,
+      "loss": 1.7482,
+      "step": 91
+    },
+    {
+      "epoch": 0.00943009430094301,
+      "grad_norm": 3.2740588188171387,
+      "learning_rate": 5.6898947368421046e-05,
+      "loss": 1.7103,
+      "step": 92
+    },
+    {
+      "epoch": 0.009532595325953259,
+      "grad_norm": 3.282179594039917,
+      "learning_rate": 5.6372105263157886e-05,
+      "loss": 1.3958,
+      "step": 93
+    },
+    {
+      "epoch": 0.00963509635096351,
+      "grad_norm": 2.8050053119659424,
+      "learning_rate": 5.584526315789473e-05,
+      "loss": 1.0987,
+      "step": 94
+    },
+    {
+      "epoch": 0.00973759737597376,
+      "grad_norm": 3.167931079864502,
+      "learning_rate": 5.531842105263158e-05,
+      "loss": 1.0239,
+      "step": 95
+    },
+    {
+      "epoch": 0.00984009840098401,
+      "grad_norm": 4.01984167098999,
+      "learning_rate": 5.4791578947368424e-05,
+      "loss": 2.1924,
+      "step": 96
+    },
+    {
+      "epoch": 0.00994259942599426,
+      "grad_norm": 7.1500983238220215,
+      "learning_rate": 5.426473684210526e-05,
+      "loss": 1.6619,
+      "step": 97
+    },
+    {
+      "epoch": 0.01004510045100451,
+      "grad_norm": 3.5555317401885986,
+      "learning_rate": 5.37378947368421e-05,
+      "loss": 1.0794,
+      "step": 98
+    },
+    {
+      "epoch": 0.01014760147601476,
+      "grad_norm": 3.8048603534698486,
+      "learning_rate": 5.321105263157894e-05,
+      "loss": 1.4438,
+      "step": 99
+    },
+    {
+      "epoch": 0.01025010250102501,
+      "grad_norm": 6.511986255645752,
+      "learning_rate": 5.268421052631578e-05,
+      "loss": 2.3968,
+      "step": 100
+    },
+    {
+      "epoch": 0.01025010250102501,
+      "eval_loss": 1.6530330181121826,
+      "eval_runtime": 213.675,
+      "eval_samples_per_second": 19.225,
+      "eval_steps_per_second": 4.806,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.572495321923584e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null