Training in progress, epoch 1

Files changed (10) hide show

logs/events.out.tfevents.1709737131.1dcff7455174.4515.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d91e9148ba06f491d0300049693c2341f409577e770e2d96ea5a140e22454fc6
+size 4184

logs/events.out.tfevents.1709737211.1dcff7455174.7719.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c9e523b45bc117c2c3cbadd311f9ddb670d13903eeffafd785984849a8ace2c
+size 5315

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7792eeef88f2b040b67cfb87d16e308ac052158fc8773d47734046de06043a16
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:c92adc7dcf01e1fbb05511c4493199b42d344e3981283ab521c477e538f9aceb
 size 17549312

run-0/checkpoint-96/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:436c1223e40cc411f216b7a6eb698829ee74318f91d2f9ee826e701f17df37dd
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:c92adc7dcf01e1fbb05511c4493199b42d344e3981283ab521c477e538f9aceb
 size 17549312

run-0/checkpoint-96/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14bc0388b3bb04b006cead065a5f3631ae5c4ff9034a98aaaa7d3d7bb6d3b660
 size 35122746

 version https://git-lfs.github.com/spec/v1
+oid sha256:db3824b1c6f237bcf65a54ea4cca3aa03e4cfa3c81a3b6afc4e72e03c759249e
 size 35122746

run-0/checkpoint-96/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98396641528716b08d59d05e7221ff89e68ef8805fd0936c83a7ab0df7e80e0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:43ddd3b1c41613cd7e1e1227d929645b76a450f62d1a6ce5575c2f7873d9d0b6
 size 1064

run-0/checkpoint-96/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.7436399217221135,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -10,36 +10,36 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 1.8004393577575684,
-      "learning_rate": 0.0007697157586205755,
-      "loss": 0.4935,
       "step": 96
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.7436399217221135,
-      "eval_f1": 0.7689594356261024,
-      "eval_loss": 0.448881596326828,
-      "eval_precision": 0.6998394863563403,
-      "eval_recall": 0.8532289628180039,
-      "eval_runtime": 28.6476,
-      "eval_samples_per_second": 35.675,
-      "eval_steps_per_second": 1.117,
       "step": 96
     }
   ],
   "logging_steps": 500,
-  "max_steps": 960,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.735211359802956,
-    "learning_rate": 0.0008552397318006393,
-    "num_train_epochs": 10,
-    "temperature": 25
   }
 }

 {
+  "best_metric": 0.7514677103718199,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 1.6740443706512451,
+      "learning_rate": 0.0001218354408608861,
+      "loss": 0.4816,
       "step": 96
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.7514677103718199,
+      "eval_f1": 0.7519531250000001,
+      "eval_loss": 0.41538161039352417,
+      "eval_precision": 0.7504873294346979,
+      "eval_recall": 0.7534246575342466,
+      "eval_runtime": 26.5175,
+      "eval_samples_per_second": 38.541,
+      "eval_steps_per_second": 1.207,
       "step": 96
     }
   ],
   "logging_steps": 500,
+  "max_steps": 480,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.679174768290245,
+    "learning_rate": 0.00015229430107610762,
+    "num_train_epochs": 5,
+    "temperature": 27
   }
 }

run-0/checkpoint-96/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23804eaaa0feed30486319d2982557f65dce708f7d788864c811c20b2e8c33b4
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:1873f3a503b2d5b3f7200baa33f737ba864dd65edb3834d85c5b8e40b6b72f07
 size 4920

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 33,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 31,
     "strategy": "LongestFirst",
     "stride": 0
   },

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ccea265ae9122da077ff8def664c83c587f85ed26bee202b170a564116cd542
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:1873f3a503b2d5b3f7200baa33f737ba864dd65edb3834d85c5b8e40b6b72f07
 size 4920