Training in progress, step 359, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +101 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4dff6b0dedb8cd82daf4930c2c298d702697c3a8d49a7cf3e70227e87220ea03
 size 60010048

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cd7a11dd960decfde159b9ffedcb277804a1627b5b44f99755257d42961884c
 size 60010048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0b091d67757c0024511fb220800a0f1709b5b2e2e280a5eb53a82c2d0649560
 size 30428180

 version https://git-lfs.github.com/spec/v1
+oid sha256:85eceb4a829aa4047fbf635b04070cf32f5480f395bbe1d6b5df070c2e3a1aac
 size 30428180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:248bdad94673d0171b6613a2054004ce1fbd7cc6609011663f62eb8bd70a3480
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7dcd05dfbbdeba643c656ed11b36a8a6487d3151c9ac5ff333ebbd78351d6657
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9465020576131687,
   "eval_steps": 500,
-  "global_step": 345,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2422,6 +2422,104 @@
       "learning_rate": 1.4393939393939396e-05,
       "loss": 1.1526,
       "step": 345
     }
   ],
   "logging_steps": 1,
@@ -2441,7 +2539,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.21047247978709e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9849108367626886,
   "eval_steps": 500,
+  "global_step": 359,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4393939393939396e-05,
       "loss": 1.1526,
       "step": 345
+    },
+    {
+      "epoch": 0.9492455418381345,
+      "grad_norm": 0.14177238941192627,
+      "learning_rate": 1.3636363636363637e-05,
+      "loss": 1.132,
+      "step": 346
+    },
+    {
+      "epoch": 0.9519890260631001,
+      "grad_norm": 0.13835884630680084,
+      "learning_rate": 1.287878787878788e-05,
+      "loss": 1.1599,
+      "step": 347
+    },
+    {
+      "epoch": 0.9547325102880658,
+      "grad_norm": 0.14390669763088226,
+      "learning_rate": 1.2121212121212122e-05,
+      "loss": 1.15,
+      "step": 348
+    },
+    {
+      "epoch": 0.9574759945130316,
+      "grad_norm": 0.14811821281909943,
+      "learning_rate": 1.1363636363636365e-05,
+      "loss": 1.0759,
+      "step": 349
+    },
+    {
+      "epoch": 0.9602194787379973,
+      "grad_norm": 0.14959345757961273,
+      "learning_rate": 1.0606060606060607e-05,
+      "loss": 1.126,
+      "step": 350
+    },
+    {
+      "epoch": 0.9629629629629629,
+      "grad_norm": 0.14656995236873627,
+      "learning_rate": 9.848484848484848e-06,
+      "loss": 1.1341,
+      "step": 351
+    },
+    {
+      "epoch": 0.9657064471879286,
+      "grad_norm": 0.14695106446743011,
+      "learning_rate": 9.090909090909091e-06,
+      "loss": 1.1259,
+      "step": 352
+    },
+    {
+      "epoch": 0.9684499314128944,
+      "grad_norm": 0.14155460894107819,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 1.1503,
+      "step": 353
+    },
+    {
+      "epoch": 0.9711934156378601,
+      "grad_norm": 0.1382407397031784,
+      "learning_rate": 7.5757575757575764e-06,
+      "loss": 1.1417,
+      "step": 354
+    },
+    {
+      "epoch": 0.9739368998628258,
+      "grad_norm": 0.14089229702949524,
+      "learning_rate": 6.818181818181818e-06,
+      "loss": 1.1551,
+      "step": 355
+    },
+    {
+      "epoch": 0.9766803840877915,
+      "grad_norm": 0.14886945486068726,
+      "learning_rate": 6.060606060606061e-06,
+      "loss": 1.0973,
+      "step": 356
+    },
+    {
+      "epoch": 0.9794238683127572,
+      "grad_norm": 0.1485728621482849,
+      "learning_rate": 5.303030303030304e-06,
+      "loss": 1.1028,
+      "step": 357
+    },
+    {
+      "epoch": 0.9821673525377229,
+      "grad_norm": 0.1496025174856186,
+      "learning_rate": 4.5454545454545455e-06,
+      "loss": 1.0941,
+      "step": 358
+    },
+    {
+      "epoch": 0.9849108367626886,
+      "grad_norm": 0.1394403725862503,
+      "learning_rate": 3.7878787878787882e-06,
+      "loss": 1.1452,
+      "step": 359
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.3742608796698214e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null