xuancoblab2023 commited on Apr 20, 2024

Commit

631d470

verified ·

1 Parent(s): a624889

Training in progress, epoch 1

Browse files

Files changed (37) hide show

logs/events.out.tfevents.1713611453.a9446dbff3d4.7440.12 +2 -2
logs/events.out.tfevents.1713611523.a9446dbff3d4.7440.13 +3 -0
model.safetensors +1 -1
run-10/checkpoint-800/config.json +34 -0
run-10/checkpoint-800/model.safetensors +3 -0
run-10/checkpoint-800/optimizer.pt +3 -0
run-10/checkpoint-800/rng_state.pth +3 -0
run-10/checkpoint-800/scheduler.pt +3 -0
run-10/checkpoint-800/special_tokens_map.json +7 -0
run-10/checkpoint-800/tokenizer.json +0 -0
run-10/checkpoint-800/tokenizer_config.json +57 -0
run-10/checkpoint-800/trainer_state.json +126 -0
run-10/checkpoint-800/training_args.bin +3 -0
run-10/checkpoint-800/vocab.txt +0 -0
run-10/checkpoint-960/config.json +34 -0
run-10/checkpoint-960/model.safetensors +3 -0
run-10/checkpoint-960/optimizer.pt +3 -0
run-10/checkpoint-960/rng_state.pth +3 -0
run-10/checkpoint-960/scheduler.pt +3 -0
run-10/checkpoint-960/special_tokens_map.json +7 -0
run-10/checkpoint-960/tokenizer.json +0 -0
run-10/checkpoint-960/tokenizer_config.json +57 -0
run-10/checkpoint-960/trainer_state.json +146 -0
run-10/checkpoint-960/training_args.bin +3 -0
run-10/checkpoint-960/vocab.txt +0 -0
run-11/checkpoint-160/config.json +34 -0
run-11/checkpoint-160/model.safetensors +3 -0
run-11/checkpoint-160/optimizer.pt +3 -0
run-11/checkpoint-160/rng_state.pth +3 -0
run-11/checkpoint-160/scheduler.pt +3 -0
run-11/checkpoint-160/special_tokens_map.json +7 -0
run-11/checkpoint-160/tokenizer.json +0 -0
run-11/checkpoint-160/tokenizer_config.json +57 -0
run-11/checkpoint-160/trainer_state.json +46 -0
run-11/checkpoint-160/training_args.bin +3 -0
run-11/checkpoint-160/vocab.txt +0 -0
training_args.bin +1 -1

logs/events.out.tfevents.1713611453.a9446dbff3d4.7440.12 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:077e622b1ef7a9f904e281c3128cf550a8d692a3b2093bb1b70ac0a5e022354b
-size 6212

 version https://git-lfs.github.com/spec/v1
+oid sha256:412285beccbfea2188cc08e4868178fd8262a8eee5a0b1c1908618b664d02288
+size 9862

logs/events.out.tfevents.1713611523.a9446dbff3d4.7440.13 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61adc76c615590e29064e0e9141b6e11b054b70bc929f1821d383504302a8a24
+size 5483

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03dfe0952f0a6030e34124601ff79f276e2ceb48a63399634f680d64e7b92c8a
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:32926685d21f549f57d363821973e2f801a85e48500642723d244e67b93d8faa
 size 17549312

run-10/checkpoint-800/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-10/checkpoint-800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:442c7792459a5d6d1ca580493df77192d27154d24844fda019c92a1a2c4d3d6b
+size 17549312

run-10/checkpoint-800/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:913ba612823504d3c2680168a7310c6ecdcc268f45746aeb39d45148f05694e3
+size 35123898

run-10/checkpoint-800/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ace9fbf746198b511b010eea6f5f472699d26cf6b3b3dc0129b41ac522dca49
+size 14308

run-10/checkpoint-800/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0c76362307ad13296aa8e59bc95fed85b0e611763f49dc91837a93b4279649f
+size 1064

run-10/checkpoint-800/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-10/checkpoint-800/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-10/checkpoint-800/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-10/checkpoint-800/trainer_state.json ADDED Viewed

	@@ -0,0 +1,126 @@

+{
+  "best_metric": 0.7301960784313726,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-800",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 800,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.012742280960083,
+      "learning_rate": 0.0007591954920690624,
+      "loss": 0.5869,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.5785399675369263,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8697,
+      "eval_samples_per_second": 681.924,
+      "eval_steps_per_second": 21.394,
+      "step": 160
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 2.057482957839966,
+      "learning_rate": 0.0006642960555604296,
+      "loss": 0.5715,
+      "step": 320
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.676078431372549,
+      "eval_f1": 0.4411366711772666,
+      "eval_loss": 0.5550761222839355,
+      "eval_mcc": 0.22526645932553852,
+      "eval_precision": 0.5191082802547771,
+      "eval_recall": 0.3835294117647059,
+      "eval_runtime": 1.8672,
+      "eval_samples_per_second": 682.827,
+      "eval_steps_per_second": 21.422,
+      "step": 320
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 2.645397663116455,
+      "learning_rate": 0.0005693966190517968,
+      "loss": 0.5736,
+      "step": 480
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7043137254901961,
+      "eval_f1": 0.28733459357277885,
+      "eval_loss": 0.5566152334213257,
+      "eval_mcc": 0.25125318174069416,
+      "eval_precision": 0.7307692307692307,
+      "eval_recall": 0.17882352941176471,
+      "eval_runtime": 1.8663,
+      "eval_samples_per_second": 683.154,
+      "eval_steps_per_second": 21.432,
+      "step": 480
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.3315376043319702,
+      "learning_rate": 0.000474497182543164,
+      "loss": 0.5569,
+      "step": 640
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7113725490196079,
+      "eval_f1": 0.4121405750798722,
+      "eval_loss": 0.5367588400840759,
+      "eval_mcc": 0.2830725629191131,
+      "eval_precision": 0.6417910447761194,
+      "eval_recall": 0.3035294117647059,
+      "eval_runtime": 1.8722,
+      "eval_samples_per_second": 681.014,
+      "eval_steps_per_second": 21.365,
+      "step": 640
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.5584187507629395,
+      "learning_rate": 0.0003795977460345312,
+      "loss": 0.5469,
+      "step": 800
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7301960784313726,
+      "eval_f1": 0.50997150997151,
+      "eval_loss": 0.5246202945709229,
+      "eval_mcc": 0.3496658305237892,
+      "eval_precision": 0.6462093862815884,
+      "eval_recall": 0.4211764705882353,
+      "eval_runtime": 1.8851,
+      "eval_samples_per_second": 676.35,
+      "eval_steps_per_second": 21.219,
+      "step": 800
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1440,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
+  "save_steps": 500,
+  "total_flos": 1458777189600.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.9116955099903541,
+    "learning_rate": 0.0008540949285776952,
+    "num_train_epochs": 9,
+    "temperature": 21
+  }
+}

run-10/checkpoint-800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34acf28a009a32e591bbfa0e1392d8f5ce60c738ac5de3b2d0a889c73580dc19
+size 5048

run-10/checkpoint-800/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-10/checkpoint-960/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-10/checkpoint-960/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66f73965da9d37cb87fb28260d07b1861ad6dc8f1f68f4df0681fda5ec30a1e6
+size 17549312

run-10/checkpoint-960/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee88a9602fa9870a896c72e0093fea956191d279be0245f994f701f908147b24
+size 35123898

run-10/checkpoint-960/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04833703a2abb12fa47ad4211546498a7ae2dcd9a28a03549753cff4beb5c8aa
+size 14308

run-10/checkpoint-960/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b287167c5ed0ac61b129f7451c1d02b70ff2ea1e90c4038c6878ce104ddfe8f9
+size 1064

run-10/checkpoint-960/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-10/checkpoint-960/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-10/checkpoint-960/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-10/checkpoint-960/trainer_state.json ADDED Viewed

	@@ -0,0 +1,146 @@

+{
+  "best_metric": 0.7301960784313726,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-800",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 960,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.012742280960083,
+      "learning_rate": 0.0007591954920690624,
+      "loss": 0.5869,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.5785399675369263,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8697,
+      "eval_samples_per_second": 681.924,
+      "eval_steps_per_second": 21.394,
+      "step": 160
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 2.057482957839966,
+      "learning_rate": 0.0006642960555604296,
+      "loss": 0.5715,
+      "step": 320
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.676078431372549,
+      "eval_f1": 0.4411366711772666,
+      "eval_loss": 0.5550761222839355,
+      "eval_mcc": 0.22526645932553852,
+      "eval_precision": 0.5191082802547771,
+      "eval_recall": 0.3835294117647059,
+      "eval_runtime": 1.8672,
+      "eval_samples_per_second": 682.827,
+      "eval_steps_per_second": 21.422,
+      "step": 320
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 2.645397663116455,
+      "learning_rate": 0.0005693966190517968,
+      "loss": 0.5736,
+      "step": 480
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7043137254901961,
+      "eval_f1": 0.28733459357277885,
+      "eval_loss": 0.5566152334213257,
+      "eval_mcc": 0.25125318174069416,
+      "eval_precision": 0.7307692307692307,
+      "eval_recall": 0.17882352941176471,
+      "eval_runtime": 1.8663,
+      "eval_samples_per_second": 683.154,
+      "eval_steps_per_second": 21.432,
+      "step": 480
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.3315376043319702,
+      "learning_rate": 0.000474497182543164,
+      "loss": 0.5569,
+      "step": 640
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7113725490196079,
+      "eval_f1": 0.4121405750798722,
+      "eval_loss": 0.5367588400840759,
+      "eval_mcc": 0.2830725629191131,
+      "eval_precision": 0.6417910447761194,
+      "eval_recall": 0.3035294117647059,
+      "eval_runtime": 1.8722,
+      "eval_samples_per_second": 681.014,
+      "eval_steps_per_second": 21.365,
+      "step": 640
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.5584187507629395,
+      "learning_rate": 0.0003795977460345312,
+      "loss": 0.5469,
+      "step": 800
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7301960784313726,
+      "eval_f1": 0.50997150997151,
+      "eval_loss": 0.5246202945709229,
+      "eval_mcc": 0.3496658305237892,
+      "eval_precision": 0.6462093862815884,
+      "eval_recall": 0.4211764705882353,
+      "eval_runtime": 1.8851,
+      "eval_samples_per_second": 676.35,
+      "eval_steps_per_second": 21.219,
+      "step": 800
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 3.2840497493743896,
+      "learning_rate": 0.0002846983095258984,
+      "loss": 0.5382,
+      "step": 960
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.7168627450980393,
+      "eval_f1": 0.5305591677503251,
+      "eval_loss": 0.5253874659538269,
+      "eval_mcc": 0.3348618979447744,
+      "eval_precision": 0.5930232558139535,
+      "eval_recall": 0.48,
+      "eval_runtime": 1.8686,
+      "eval_samples_per_second": 682.333,
+      "eval_steps_per_second": 21.407,
+      "step": 960
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1440,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
+  "save_steps": 500,
+  "total_flos": 1750532627520.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.9116955099903541,
+    "learning_rate": 0.0008540949285776952,
+    "num_train_epochs": 9,
+    "temperature": 21
+  }
+}

run-10/checkpoint-960/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34acf28a009a32e591bbfa0e1392d8f5ce60c738ac5de3b2d0a889c73580dc19
+size 5048

run-10/checkpoint-960/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-11/checkpoint-160/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-11/checkpoint-160/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32926685d21f549f57d363821973e2f801a85e48500642723d244e67b93d8faa
+size 17549312

run-11/checkpoint-160/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32f0f04e4cab582205aab80c9983f31ca39914e90cc4f3740bdb71db100869de
+size 35123898

run-11/checkpoint-160/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0792eaff268dd73c8e104b5060a487f4ef56535ad3b58888006338b8bc298137
+size 14308

run-11/checkpoint-160/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aee908b8d53c9a8b49b6b40cf931ea7418b1faf606083db5859b8874ee658031
+size 1064

run-11/checkpoint-160/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-160/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-11/checkpoint-160/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-160/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.6666666666666666,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-160",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.6721558570861816,
+      "learning_rate": 0.00018503019081556044,
+      "loss": 0.4415,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.4279707670211792,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.867,
+      "eval_samples_per_second": 682.915,
+      "eval_steps_per_second": 21.425,
+      "step": 160
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 500,
+  "total_flos": 291755437920.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6824602518835456,
+    "learning_rate": 0.00021146307521778334,
+    "num_train_epochs": 8,
+    "temperature": 21
+  }
+}

run-11/checkpoint-160/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60cdc985018d73f19e5030e2edd86f80eaf7431ba2a593bc8e0e861bb44a658c
+size 5048

run-11/checkpoint-160/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34acf28a009a32e591bbfa0e1392d8f5ce60c738ac5de3b2d0a889c73580dc19
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:60cdc985018d73f19e5030e2edd86f80eaf7431ba2a593bc8e0e861bb44a658c
 size 5048