Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b745040145df648995c9311b4af089cc29a405d377f37c705af666568d7ad2d
 size 556856304

 version https://git-lfs.github.com/spec/v1
+oid sha256:de9a4be4e4d926be80de81eed81a6414b6caaec294d5f9369b3b8693f6a19b24
 size 556856304

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36c11c4310ffe48dc3c327f48a40073f501b6d9f2af441c0cebf3ce9f5e7c00a
 size 21599060

 version https://git-lfs.github.com/spec/v1
+oid sha256:b59047ac41b505d071b4881081a5b000079fc2b7de3225f06633696cd3d5d5f7
 size 21599060

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f22ee2bd42680b5f198191658fb24f22c66fd0968ab26c6e7ce687248687a73
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f5adca46d5768b2528d11214bf50c169d41422785e8b729921c5399a7fed5df
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28418a35cb7e15ebbce37743b08fd366c25ee320167b307a3e449a74781d02de
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:439e51f57871ee9c2bc8b35458a0c03f9b948af7a0d15ffe5e1cf9789955c6c8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.007259528130671506,
   "eval_steps": 13,
-  "global_step": 39,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -312,6 +312,83 @@
       "eval_samples_per_second": 55.962,
       "eval_steps_per_second": 27.993,
       "step": 39
     }
   ],
   "logging_steps": 1,
@@ -326,12 +403,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2546502362726400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.009307087347014752,
   "eval_steps": 13,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 55.962,
       "eval_steps_per_second": 27.993,
       "step": 39
+    },
+    {
+      "epoch": 0.007445669877611801,
+      "grad_norm": 8.265851974487305,
+      "learning_rate": 7.3223304703363135e-06,
+      "loss": 10.5586,
+      "step": 40
+    },
+    {
+      "epoch": 0.007631811624552096,
+      "grad_norm": 10.229063034057617,
+      "learning_rate": 5.989850859999227e-06,
+      "loss": 11.0341,
+      "step": 41
+    },
+    {
+      "epoch": 0.007817953371492391,
+      "grad_norm": 9.973093032836914,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 11.297,
+      "step": 42
+    },
+    {
+      "epoch": 0.008004095118432687,
+      "grad_norm": 11.262483596801758,
+      "learning_rate": 3.6839958911476957e-06,
+      "loss": 11.0967,
+      "step": 43
+    },
+    {
+      "epoch": 0.008190236865372982,
+      "grad_norm": 7.972822666168213,
+      "learning_rate": 2.7248368952908053e-06,
+      "loss": 10.4,
+      "step": 44
+    },
+    {
+      "epoch": 0.008376378612313277,
+      "grad_norm": 9.389989852905273,
+      "learning_rate": 1.9030116872178316e-06,
+      "loss": 11.1129,
+      "step": 45
+    },
+    {
+      "epoch": 0.008562520359253572,
+      "grad_norm": 8.1710844039917,
+      "learning_rate": 1.2235870926211619e-06,
+      "loss": 10.1175,
+      "step": 46
+    },
+    {
+      "epoch": 0.008748662106193867,
+      "grad_norm": 10.404925346374512,
+      "learning_rate": 6.907519900580861e-07,
+      "loss": 11.9329,
+      "step": 47
+    },
+    {
+      "epoch": 0.008934803853134162,
+      "grad_norm": 8.423550605773926,
+      "learning_rate": 3.077914851215585e-07,
+      "loss": 11.0093,
+      "step": 48
+    },
+    {
+      "epoch": 0.009120945600074457,
+      "grad_norm": 10.052149772644043,
+      "learning_rate": 7.706665667180091e-08,
+      "loss": 12.0759,
+      "step": 49
+    },
+    {
+      "epoch": 0.009307087347014752,
+      "grad_norm": 9.366209030151367,
+      "learning_rate": 0.0,
+      "loss": 12.1312,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3264746618880000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null