xuancoblab2023 commited on Mar 24, 2024

Commit

c66bcd4

verified ·

1 Parent(s): 0edd31d

Training in progress, epoch 1

Browse files

Files changed (22) hide show

logs/events.out.tfevents.1711282118.73e3a81c01ef.4225.11 +2 -2
logs/events.out.tfevents.1711282685.73e3a81c01ef.4225.12 +3 -0
model.safetensors +1 -1
run-11/checkpoint-384/config.json +34 -0
run-11/checkpoint-384/model.safetensors +3 -0
run-11/checkpoint-384/optimizer.pt +3 -0
run-11/checkpoint-384/rng_state.pth +3 -0
run-11/checkpoint-384/scheduler.pt +3 -0
run-11/checkpoint-384/special_tokens_map.json +7 -0
run-11/checkpoint-384/tokenizer.json +0 -0
run-11/checkpoint-384/tokenizer_config.json +57 -0
run-11/checkpoint-384/trainer_state.json +67 -0
run-11/checkpoint-384/training_args.bin +3 -0
run-11/checkpoint-384/vocab.txt +0 -0
run-12/checkpoint-192/config.json +1 -1
run-12/checkpoint-192/model.safetensors +1 -1
run-12/checkpoint-192/optimizer.pt +1 -1
run-12/checkpoint-192/rng_state.pth +1 -1
run-12/checkpoint-192/scheduler.pt +1 -1
run-12/checkpoint-192/trainer_state.json +24 -79
run-12/checkpoint-192/training_args.bin +2 -2
training_args.bin +1 -1

logs/events.out.tfevents.1711282118.73e3a81c01ef.4225.11 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9672ca8369db0d1b2e24fce0a475388b89cbd90363b79d428c879e7a8976af4b
-size 5406

 version https://git-lfs.github.com/spec/v1
+oid sha256:6eea5e6e481b3227f9639ac37105278f19f495bc8ac3007f2491efe2f94afd7d
+size 6490

logs/events.out.tfevents.1711282685.73e3a81c01ef.4225.12 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38639f8b81d295535291f4a763126613d20a17abbec540e2b0f02651fbef6b6f
+size 5407

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c69f969bceed1b54c23220f18ec5897c0ce3f2a2ad6c45a39a124fb62daf1064
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:fddfebc5a15e22a9cb6eacb3cac3a512030db6c96bc37a4e5c4bc4f1ceac2470
 size 17549312

run-11/checkpoint-384/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-11/checkpoint-384/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9f76e5ca7dac9462ac2f5615b56794a86afc37b8b2d6a496dc56228176caab0
+size 17549312

run-11/checkpoint-384/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a6af454b0b6d64b2a42e086e964574a846567f461ae49f1e4d2b6c0c7235921
+size 35122746

run-11/checkpoint-384/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2558f72cda987826e5e7caf54cc6282fe335ebecbddfed7bb83f0184d1f54cc
+size 14054

run-11/checkpoint-384/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1625719570c77e6147a70efc390d8fdd63e41ac6211a419770291abeb6ca70e
+size 1064

run-11/checkpoint-384/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-384/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-11/checkpoint-384/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-384/trainer_state.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+  "best_metric": 0.7857142857142857,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-384",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 384,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 3.698505163192749,
+      "learning_rate": 0.0004729463641993846,
+      "loss": 0.4978,
+      "step": 192
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7465753424657534,
+      "eval_f1": 0.7821698906644239,
+      "eval_loss": 0.46408411860466003,
+      "eval_mcc": 0.5218028809356926,
+      "eval_precision": 0.6858407079646017,
+      "eval_recall": 0.9099804305283757,
+      "eval_runtime": 66.6598,
+      "eval_samples_per_second": 15.332,
+      "eval_steps_per_second": 0.48,
+      "step": 192
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 4.018568515777588,
+      "learning_rate": 0.0,
+      "loss": 0.4526,
+      "step": 384
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7857142857142857,
+      "eval_f1": 0.7999999999999999,
+      "eval_loss": 0.43016862869262695,
+      "eval_mcc": 0.5773502691896257,
+      "eval_precision": 0.75,
+      "eval_recall": 0.8571428571428571,
+      "eval_runtime": 66.4246,
+      "eval_samples_per_second": 15.386,
+      "eval_steps_per_second": 0.482,
+      "step": 384
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 384,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "total_flos": 471390394560.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.7646810195593648,
+    "learning_rate": 0.0009458927283987692,
+    "num_train_epochs": 2,
+    "per_device_train_batch_size": 16,
+    "temperature": 27
+  }
+}

run-11/checkpoint-384/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65c6400aa5bf6c65e8a3c85d0e6533fdb72aa1696c70cf3415b3377fc76b815a
+size 4984

run-11/checkpoint-384/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-12/checkpoint-192/config.json CHANGED Viewed

@@ -27,7 +27,7 @@
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
-  "transformers_version": "4.38.2",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522

   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522

run-12/checkpoint-192/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb89bed9e1f4b3ca24a09b2ab853283ddb97a2d28a54f9a8c321bbb63821368e
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:fddfebc5a15e22a9cb6eacb3cac3a512030db6c96bc37a4e5c4bc4f1ceac2470
 size 17549312

run-12/checkpoint-192/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4180b4ec9100e5561ed1e6aabd5c182f142c745c2d34296099d0ee77328b0d40
 size 35122746

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e81ecfd555d700deb004865edbcb68c826d130f8c8fc25f3ee99d60ed0986c1
 size 35122746

run-12/checkpoint-192/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d267b33c0df8c744bb62ca8fc0e243d0ddded8c46ea62639c56f51d1dc7a548
 size 14054

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee09ea0d216727b799a80771850b95d7d61b646360702c64b2ec889cdc725399
 size 14054

run-12/checkpoint-192/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89765b76133dde54addbf058c2cd7573561d645f440f8ee45c7bc0ca64411cbf
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae55f8912cfe90bd83bf0281b3dcc94b61b7a669fc8193b18746eecfedd10840
 size 1064

run-12/checkpoint-192/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.8131115459882583,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-12/checkpoint-192",
-  "epoch": 4.0,
   "eval_steps": 500,
   "global_step": 192,
   "is_hyper_param_search": true,
@@ -10,93 +10,38 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 2.1590919494628906,
-      "learning_rate": 0.0007554421241694311,
-      "loss": 0.5878,
-      "step": 48
-    },
-    {
-      "epoch": 1.0,
-      "eval_accuracy": 0.7602739726027398,
-      "eval_f1": 0.7911338448422848,
-      "eval_loss": 0.49036356806755066,
-      "eval_precision": 0.7009063444108762,
-      "eval_recall": 0.9080234833659491,
-      "eval_runtime": 27.1211,
-      "eval_samples_per_second": 37.683,
-      "eval_steps_per_second": 0.59,
-      "step": 48
-    },
-    {
-      "epoch": 2.0,
-      "grad_norm": 5.101812362670898,
-      "learning_rate": 0.0006475218207166552,
-      "loss": 0.4815,
-      "step": 96
-    },
-    {
-      "epoch": 2.0,
-      "eval_accuracy": 0.7857142857142857,
-      "eval_f1": 0.8003646308113036,
-      "eval_loss": 0.45267680287361145,
-      "eval_precision": 0.7491467576791809,
-      "eval_recall": 0.8590998043052838,
-      "eval_runtime": 27.6158,
-      "eval_samples_per_second": 37.008,
-      "eval_steps_per_second": 0.579,
-      "step": 96
-    },
-    {
-      "epoch": 3.0,
-      "grad_norm": 3.0611319541931152,
-      "learning_rate": 0.0005396015172638793,
-      "loss": 0.4483,
-      "step": 144
-    },
-    {
-      "epoch": 3.0,
-      "eval_accuracy": 0.7896281800391389,
-      "eval_f1": 0.8151332760103182,
-      "eval_loss": 0.47112011909484863,
-      "eval_precision": 0.7269938650306749,
-      "eval_recall": 0.9275929549902152,
-      "eval_runtime": 28.0904,
-      "eval_samples_per_second": 36.383,
-      "eval_steps_per_second": 0.57,
-      "step": 144
-    },
-    {
-      "epoch": 4.0,
-      "grad_norm": 1.9773441553115845,
-      "learning_rate": 0.0004316812138111035,
-      "loss": 0.4206,
       "step": 192
     },
     {
-      "epoch": 4.0,
-      "eval_accuracy": 0.8131115459882583,
-      "eval_f1": 0.8168744007670182,
-      "eval_loss": 0.4279371500015259,
-      "eval_precision": 0.8007518796992481,
-      "eval_recall": 0.8336594911937377,
-      "eval_runtime": 27.1892,
-      "eval_samples_per_second": 37.588,
-      "eval_steps_per_second": 0.588,
       "step": 192
     }
   ],
   "logging_steps": 500,
-  "max_steps": 384,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 8,
   "save_steps": 500,
-  "total_flos": 942780789120.0,
-  "train_batch_size": 64,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.9354133816526636,
-    "learning_rate": 0.000863362427622207,
-    "num_train_epochs": 8,
-    "temperature": 15
   }
 }

 {
+  "best_metric": 0.7915851272015656,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-12/checkpoint-192",
+  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 192,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 0.9969735741615295,
+      "learning_rate": 0.0003519988357465585,
+      "loss": 0.4838,
       "step": 192
     },
     {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7915851272015656,
+      "eval_f1": 0.8149435273675064,
+      "eval_loss": 0.4351368844509125,
+      "eval_mcc": 0.6026908465743466,
+      "eval_precision": 0.7328125,
+      "eval_recall": 0.9178082191780822,
+      "eval_runtime": 66.7179,
+      "eval_samples_per_second": 15.318,
+      "eval_steps_per_second": 0.48,
       "step": 192
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1344,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
   "save_steps": 500,
+  "total_flos": 235695197280.0,
+  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.7654129557516562,
+    "learning_rate": 0.00041066530837098494,
+    "num_train_epochs": 7,
+    "per_device_train_batch_size": 16,
+    "temperature": 40
   }
 }

run-12/checkpoint-192/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de3788bfc8a3541ae46e22029a32017f9c59ccb7742b5f55bae39d09e3690a65
-size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:d85b6e817a8acea631d86d5c2c6470764f0dfa446cf60f01180ee7e29f4e646c
+size 4984

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65c6400aa5bf6c65e8a3c85d0e6533fdb72aa1696c70cf3415b3377fc76b815a
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d85b6e817a8acea631d86d5c2c6470764f0dfa446cf60f01180ee7e29f4e646c
 size 4984