xuancoblab2023 commited on Mar 6, 2024

Commit

d819b41

verified ·

1 Parent(s): d24a2a9

Training in progress, epoch 2

Browse files

Files changed (26) hide show

logs/events.out.tfevents.1709742338.fcc57e6e3903.3035.3 +2 -2
model.safetensors +1 -1
run-3/checkpoint-176/config.json +34 -0
run-3/checkpoint-176/model.safetensors +3 -0
run-3/checkpoint-176/optimizer.pt +3 -0
run-3/checkpoint-176/rng_state.pth +3 -0
run-3/checkpoint-176/scheduler.pt +3 -0
run-3/checkpoint-176/special_tokens_map.json +7 -0
run-3/checkpoint-176/tokenizer.json +0 -0
run-3/checkpoint-176/tokenizer_config.json +57 -0
run-3/checkpoint-176/trainer_state.json +46 -0
run-3/checkpoint-176/training_args.bin +3 -0
run-3/checkpoint-176/vocab.txt +0 -0
run-3/checkpoint-352/config.json +34 -0
run-3/checkpoint-352/model.safetensors +3 -0
run-3/checkpoint-352/optimizer.pt +3 -0
run-3/checkpoint-352/rng_state.pth +3 -0
run-3/checkpoint-352/scheduler.pt +3 -0
run-3/checkpoint-352/special_tokens_map.json +7 -0
run-3/checkpoint-352/tokenizer.json +0 -0
run-3/checkpoint-352/tokenizer_config.json +57 -0
run-3/checkpoint-352/trainer_state.json +65 -0
run-3/checkpoint-352/training_args.bin +3 -0
run-3/checkpoint-352/vocab.txt +0 -0
tokenizer.json +1 -1
training_args.bin +1 -1

logs/events.out.tfevents.1709742338.fcc57e6e3903.3035.3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fc37f035c54ce47187cd97c64bdefab842600ca13b7b3b129c51201bf249d29
-size 5328

 version https://git-lfs.github.com/spec/v1
+oid sha256:e93aadbbfe3ced2c9914b0a9803b303f44e2911fe4db39dda063742b8c9f4b7e
+size 6011

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b72faf6103e6a3c3d6bdafbff0cbeaac121ce7de8cb2bb06a35f4382a46810f7
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:1974eec902e3411aa7bdd0f55ac57f6756f178ad876646ecf017e352ad9e898e
 size 17549312

run-3/checkpoint-176/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-176/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97780c69f05de76f9d89d5b245a3979297519b2bb08ee6e324817933a1e9732d
+size 17549312

run-3/checkpoint-176/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f8e333d05e84d805f18a38e78ce9501048bf03a54bec47e756aa8091e56d87d
+size 35122746

run-3/checkpoint-176/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb717feef51d4e88669e8d1d471d0d708609e7a7f8d567ec759694a724e3fc78
+size 14054

run-3/checkpoint-176/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4df630553ac056afaca829e09299470a446e3edfbcc58c6ef47d35774499ff5a
+size 1064

run-3/checkpoint-176/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-176/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-176/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-176/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.5148514851485149,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-176",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 176,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.5431386232376099,
+      "learning_rate": 0.00012088388613301416,
+      "loss": 0.2133,
+      "step": 176
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5148514851485149,
+      "eval_f1": 0.14035087719298245,
+      "eval_loss": 0.20053359866142273,
+      "eval_precision": 0.6060606060606061,
+      "eval_recall": 0.07936507936507936,
+      "eval_runtime": 15.0376,
+      "eval_samples_per_second": 33.582,
+      "eval_steps_per_second": 1.064,
+      "step": 176
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1760,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 777843283140.0,
+  "train_batch_size": 54,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.239256796645401,
+    "learning_rate": 0.0001343154290366824,
+    "num_train_epochs": 10,
+    "per_device_train_batch_size": 54,
+    "temperature": 13
+  }
+}

run-3/checkpoint-176/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71e3fa9f35ba1a109d4dbcaa99a0da7b79ffac3932fc22f17787de76b9701cac
+size 4920

run-3/checkpoint-176/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-352/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-352/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1974eec902e3411aa7bdd0f55ac57f6756f178ad876646ecf017e352ad9e898e
+size 17549312

run-3/checkpoint-352/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fe56813aec20e9c530c799541711276e4122a893f587c682768f294add19ae8
+size 35122746

run-3/checkpoint-352/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a77ce242917a7974aafb79775c4d65cace9e5b2e8fa33c1e308b9820377f088
+size 14054

run-3/checkpoint-352/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:002fe54a80031d1f95e02e2eee1d855cb07114f7aafaa809fb60363c389c81c4
+size 1064

run-3/checkpoint-352/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-352/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-352/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-352/trainer_state.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "best_metric": 0.5148514851485149,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-176",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 352,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.5431386232376099,
+      "learning_rate": 0.00012088388613301416,
+      "loss": 0.2133,
+      "step": 176
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5148514851485149,
+      "eval_f1": 0.14035087719298245,
+      "eval_loss": 0.20053359866142273,
+      "eval_precision": 0.6060606060606061,
+      "eval_recall": 0.07936507936507936,
+      "eval_runtime": 15.0376,
+      "eval_samples_per_second": 33.582,
+      "eval_steps_per_second": 1.064,
+      "step": 176
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2198910117149353,
+      "learning_rate": 0.00010745234322934593,
+      "loss": 0.2028,
+      "step": 352
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5069306930693069,
+      "eval_f1": 0.1263157894736842,
+      "eval_loss": 0.20091117918491364,
+      "eval_precision": 0.5454545454545454,
+      "eval_recall": 0.07142857142857142,
+      "eval_runtime": 14.9354,
+      "eval_samples_per_second": 33.812,
+      "eval_steps_per_second": 1.071,
+      "step": 352
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1760,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1555686566280.0,
+  "train_batch_size": 54,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.239256796645401,
+    "learning_rate": 0.0001343154290366824,
+    "num_train_epochs": 10,
+    "per_device_train_batch_size": 54,
+    "temperature": 13
+  }
+}

run-3/checkpoint-352/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71e3fa9f35ba1a109d4dbcaa99a0da7b79ffac3932fc22f17787de76b9701cac
+size 4920

run-3/checkpoint-352/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 31,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 33,
     "strategy": "LongestFirst",
     "stride": 0
   },

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28fccab1cf160e9a976a6e2833e6b2e52a870fecc8101aaab94f3f44ebbc487e
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:71e3fa9f35ba1a109d4dbcaa99a0da7b79ffac3932fc22f17787de76b9701cac
 size 4920