xuancoblab2023 commited on Apr 20, 2024

Commit

3468edf

verified ·

1 Parent(s): 332f821

Training in progress, epoch 1

Browse files

Files changed (26) hide show

logs/events.out.tfevents.1713611082.a9446dbff3d4.7440.3 +2 -2
logs/events.out.tfevents.1713611132.a9446dbff3d4.7440.4 +3 -0
model.safetensors +1 -1
run-1/checkpoint-800/config.json +34 -0
run-1/checkpoint-800/model.safetensors +3 -0
run-1/checkpoint-800/optimizer.pt +3 -0
run-1/checkpoint-800/rng_state.pth +3 -0
run-1/checkpoint-800/scheduler.pt +3 -0
run-1/checkpoint-800/special_tokens_map.json +7 -0
run-1/checkpoint-800/tokenizer.json +0 -0
run-1/checkpoint-800/tokenizer_config.json +57 -0
run-1/checkpoint-800/trainer_state.json +126 -0
run-1/checkpoint-800/training_args.bin +3 -0
run-1/checkpoint-800/vocab.txt +0 -0
run-2/checkpoint-160/config.json +34 -0
run-2/checkpoint-160/model.safetensors +3 -0
run-2/checkpoint-160/optimizer.pt +3 -0
run-2/checkpoint-160/rng_state.pth +3 -0
run-2/checkpoint-160/scheduler.pt +3 -0
run-2/checkpoint-160/special_tokens_map.json +7 -0
run-2/checkpoint-160/tokenizer.json +0 -0
run-2/checkpoint-160/tokenizer_config.json +57 -0
run-2/checkpoint-160/trainer_state.json +46 -0
run-2/checkpoint-160/training_args.bin +3 -0
run-2/checkpoint-160/vocab.txt +0 -0
training_args.bin +1 -1

logs/events.out.tfevents.1713611082.a9446dbff3d4.7440.3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33f7fe348e090a15c367ea897dfde0f26f263d6e6a7efc49ecacefadfa746edb
-size 6211

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c6239cb5af2132fcce11ef629b101dfbf31864a7c7b60f7c128242f6d3c2635
+size 8755

logs/events.out.tfevents.1713611132.a9446dbff3d4.7440.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d56a4b8dcfc1709c5df8174528b83d8cc15c4e7ead6663aff83ed933b5f060e3
+size 5482

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50f2b0594ac5309d345714ef79b577867430219bd88635a6039be25de3c34c19
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed3b3d937d069fa8cc34c6258fb5007b1cb569cefda27a2b494d938e7afd1ac4
 size 17549312

run-1/checkpoint-800/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-1/checkpoint-800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffbf1cd9ebcbf7209e6f52eb0e88c68d037a3625bf3ba539ab9b8da2f9c67a79
+size 17549312

run-1/checkpoint-800/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4da5df282db642d7c528c74c75dbcfc550f60683939050fa2fe2f1bbed54c662
+size 35123898

run-1/checkpoint-800/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ace9fbf746198b511b010eea6f5f472699d26cf6b3b3dc0129b41ac522dca49
+size 14308

run-1/checkpoint-800/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b38ebaa5035a46adf4f65ea3403412ce3180dd395abc57bffe2e98a3d3415ea8
+size 1064

run-1/checkpoint-800/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-800/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-800/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-800/trainer_state.json ADDED Viewed

	@@ -0,0 +1,126 @@

+{
+  "best_metric": 0.6666666666666666,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-160",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 800,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.6969084143638611,
+      "learning_rate": 2.475738136709632e-05,
+      "loss": 0.064,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.04275359958410263,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.9263,
+      "eval_samples_per_second": 661.882,
+      "eval_steps_per_second": 20.765,
+      "step": 160
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.23619167506694794,
+      "learning_rate": 1.8568036025322237e-05,
+      "loss": 0.0425,
+      "step": 320
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.038055840879678726,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8737,
+      "eval_samples_per_second": 680.488,
+      "eval_steps_per_second": 21.349,
+      "step": 320
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.1795702576637268,
+      "learning_rate": 1.237869068354816e-05,
+      "loss": 0.0382,
+      "step": 480
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.009324009324009322,
+      "eval_loss": 0.03556740656495094,
+      "eval_mcc": 0.01983408759395048,
+      "eval_precision": 0.5,
+      "eval_recall": 0.004705882352941176,
+      "eval_runtime": 1.8538,
+      "eval_samples_per_second": 687.768,
+      "eval_steps_per_second": 21.577,
+      "step": 480
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.33438435196876526,
+      "learning_rate": 6.18934534177408e-06,
+      "loss": 0.0364,
+      "step": 640
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.6658823529411765,
+      "eval_f1": 0.013888888888888888,
+      "eval_loss": 0.0341472327709198,
+      "eval_mcc": 0.015010886840871084,
+      "eval_precision": 0.42857142857142855,
+      "eval_recall": 0.007058823529411765,
+      "eval_runtime": 1.8431,
+      "eval_samples_per_second": 691.757,
+      "eval_steps_per_second": 21.702,
+      "step": 640
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.3762623369693756,
+      "learning_rate": 0.0,
+      "loss": 0.0359,
+      "step": 800
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.6643137254901961,
+      "eval_f1": 0.013824884792626727,
+      "eval_loss": 0.033982668071985245,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.3333333333333333,
+      "eval_recall": 0.007058823529411765,
+      "eval_runtime": 1.8546,
+      "eval_samples_per_second": 687.49,
+      "eval_steps_per_second": 21.568,
+      "step": 800
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 800,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 1458777189600.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.031053442748151472,
+    "learning_rate": 3.09467267088704e-05,
+    "num_train_epochs": 5,
+    "temperature": 3
+  }
+}

run-1/checkpoint-800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb9f8cf4dc423dcb7868c518785a0253072977db3803f1a1b0ba4c814e989172
+size 5048

run-1/checkpoint-800/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-160/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-2/checkpoint-160/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed3b3d937d069fa8cc34c6258fb5007b1cb569cefda27a2b494d938e7afd1ac4
+size 17549312

run-2/checkpoint-160/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8cba58376cc2ddad4eaff7a7ea1c892a277c3c7edf25d0211fda22c3839801bc
+size 35123898

run-2/checkpoint-160/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0792eaff268dd73c8e104b5060a487f4ef56535ad3b58888006338b8bc298137
+size 14308

run-2/checkpoint-160/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05e632b422821fe0c077c20af55fc323ac5a5c20957f540a07e38fa504e54803
+size 1064

run-2/checkpoint-160/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-160/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-160/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-160/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.6666666666666666,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-160",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.8257476687431335,
+      "learning_rate": 9.75330205032834e-06,
+      "loss": 0.152,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.12596820294857025,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8567,
+      "eval_samples_per_second": 686.692,
+      "eval_steps_per_second": 21.543,
+      "step": 160
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 320,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "total_flos": 291755437920.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.15677692354654504,
+    "learning_rate": 1.950660410065668e-05,
+    "num_train_epochs": 2,
+    "temperature": 16
+  }
+}

run-2/checkpoint-160/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:911e06bb20ede717ce9c1b860631dc491d7b93ed3f0156050c473bce5083d214
+size 5048

run-2/checkpoint-160/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb9f8cf4dc423dcb7868c518785a0253072977db3803f1a1b0ba4c814e989172
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:911e06bb20ede717ce9c1b860631dc491d7b93ed3f0156050c473bce5083d214
 size 5048