xuancoblab2023 commited on Apr 20, 2024

Commit

72a356d

verified ·

1 Parent(s): 6250445

Training in progress, epoch 1

Browse files

Files changed (37) hide show

logs/events.out.tfevents.1713611625.a9446dbff3d4.7440.15 +2 -2
logs/events.out.tfevents.1713611707.a9446dbff3d4.7440.16 +3 -0
model.safetensors +1 -1
run-13/checkpoint-1120/config.json +34 -0
run-13/checkpoint-1120/model.safetensors +3 -0
run-13/checkpoint-1120/optimizer.pt +3 -0
run-13/checkpoint-1120/rng_state.pth +3 -0
run-13/checkpoint-1120/scheduler.pt +3 -0
run-13/checkpoint-1120/special_tokens_map.json +7 -0
run-13/checkpoint-1120/tokenizer.json +0 -0
run-13/checkpoint-1120/tokenizer_config.json +57 -0
run-13/checkpoint-1120/trainer_state.json +166 -0
run-13/checkpoint-1120/training_args.bin +3 -0
run-13/checkpoint-1120/vocab.txt +0 -0
run-13/checkpoint-1280/config.json +34 -0
run-13/checkpoint-1280/model.safetensors +3 -0
run-13/checkpoint-1280/optimizer.pt +3 -0
run-13/checkpoint-1280/rng_state.pth +3 -0
run-13/checkpoint-1280/scheduler.pt +3 -0
run-13/checkpoint-1280/special_tokens_map.json +7 -0
run-13/checkpoint-1280/tokenizer.json +0 -0
run-13/checkpoint-1280/tokenizer_config.json +57 -0
run-13/checkpoint-1280/trainer_state.json +186 -0
run-13/checkpoint-1280/training_args.bin +3 -0
run-13/checkpoint-1280/vocab.txt +0 -0
run-14/checkpoint-160/config.json +34 -0
run-14/checkpoint-160/model.safetensors +3 -0
run-14/checkpoint-160/optimizer.pt +3 -0
run-14/checkpoint-160/rng_state.pth +3 -0
run-14/checkpoint-160/scheduler.pt +3 -0
run-14/checkpoint-160/special_tokens_map.json +7 -0
run-14/checkpoint-160/tokenizer.json +0 -0
run-14/checkpoint-160/tokenizer_config.json +57 -0
run-14/checkpoint-160/trainer_state.json +46 -0
run-14/checkpoint-160/training_args.bin +3 -0
run-14/checkpoint-160/vocab.txt +0 -0
training_args.bin +1 -1

logs/events.out.tfevents.1713611625.a9446dbff3d4.7440.15 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06983d2ad03d21b35010f689a27c474f7ad7bf3dbc5783a05ba2461586a91e2e
-size 6211

 version https://git-lfs.github.com/spec/v1
+oid sha256:20fbf3cb59c706cdba8bd067419ff325edb77912e498c8b7b7bd6c1cb81c42c1
+size 10945

logs/events.out.tfevents.1713611707.a9446dbff3d4.7440.16 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28043178bd882b340f3230feba3deb72bc4e78ca58adc105f0a62124530afefa
+size 5483

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92c9f6092ec70603457429477d4808f8093c435713e779bb0e2b655287caa418
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0142724fa20ed611e1b33d0280d454e12ec1c71b9b71578e3db5fc76a38a438
 size 17549312

run-13/checkpoint-1120/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-13/checkpoint-1120/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85344d367eede7437ae91a7fd4dc3238311ec14e8cb04011ecdc7dbd64d5591b
+size 17549312

run-13/checkpoint-1120/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c64244e69ca644d0cadd2dde3c2028a5a4b77fe79a30dce66a6148bc5f4cd1e1
+size 35123898

run-13/checkpoint-1120/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dff60335fa5283437e5a2cb72f0621ff75353a3210e4a78bc955f52e428c356c
+size 14308

run-13/checkpoint-1120/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a788154f87965c40ee6541d7b11959eab4d1b13ce68666f7a31632ba655b76d9
+size 1064

run-13/checkpoint-1120/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-13/checkpoint-1120/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-13/checkpoint-1120/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-13/checkpoint-1120/trainer_state.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "best_metric": 0.7607843137254902,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-13/checkpoint-1120",
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 1120,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.6348164081573486,
+      "learning_rate": 0.00028293467755282096,
+      "loss": 0.4646,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.668235294117647,
+      "eval_f1": 0.018561484918793503,
+      "eval_loss": 0.43796491622924805,
+      "eval_mcc": 0.04862166383263152,
+      "eval_precision": 0.6666666666666666,
+      "eval_recall": 0.009411764705882352,
+      "eval_runtime": 1.8631,
+      "eval_samples_per_second": 684.329,
+      "eval_steps_per_second": 21.469,
+      "step": 160
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 1.2706644535064697,
+      "learning_rate": 0.000242515437902418,
+      "loss": 0.4357,
+      "step": 320
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7184313725490196,
+      "eval_f1": 0.37565217391304345,
+      "eval_loss": 0.4178254306316376,
+      "eval_mcc": 0.2995107121067069,
+      "eval_precision": 0.72,
+      "eval_recall": 0.2541176470588235,
+      "eval_runtime": 1.8716,
+      "eval_samples_per_second": 681.232,
+      "eval_steps_per_second": 21.372,
+      "step": 320
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 2.069754123687744,
+      "learning_rate": 0.00020209619825201498,
+      "loss": 0.4247,
+      "step": 480
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7419607843137255,
+      "eval_f1": 0.5548037889039241,
+      "eval_loss": 0.4210284948348999,
+      "eval_mcc": 0.38745831003992626,
+      "eval_precision": 0.6528662420382165,
+      "eval_recall": 0.4823529411764706,
+      "eval_runtime": 1.8687,
+      "eval_samples_per_second": 682.3,
+      "eval_steps_per_second": 21.405,
+      "step": 480
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.9811561703681946,
+      "learning_rate": 0.000161676958601612,
+      "loss": 0.4179,
+      "step": 640
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7349019607843137,
+      "eval_f1": 0.43666666666666665,
+      "eval_loss": 0.4131487309932709,
+      "eval_mcc": 0.3513388123150658,
+      "eval_precision": 0.7485714285714286,
+      "eval_recall": 0.30823529411764705,
+      "eval_runtime": 1.8772,
+      "eval_samples_per_second": 679.197,
+      "eval_steps_per_second": 21.308,
+      "step": 640
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.1143430471420288,
+      "learning_rate": 0.000121257718951209,
+      "loss": 0.4113,
+      "step": 800
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7545098039215686,
+      "eval_f1": 0.5908496732026145,
+      "eval_loss": 0.4124383330345154,
+      "eval_mcc": 0.42389318898939016,
+      "eval_precision": 0.6647058823529411,
+      "eval_recall": 0.5317647058823529,
+      "eval_runtime": 1.8732,
+      "eval_samples_per_second": 680.66,
+      "eval_steps_per_second": 21.354,
+      "step": 800
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 1.0988030433654785,
+      "learning_rate": 8.0838479300806e-05,
+      "loss": 0.4058,
+      "step": 960
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.7529411764705882,
+      "eval_f1": 0.5493562231759658,
+      "eval_loss": 0.4068860113620758,
+      "eval_mcc": 0.40775530964263945,
+      "eval_precision": 0.7007299270072993,
+      "eval_recall": 0.45176470588235296,
+      "eval_runtime": 1.8719,
+      "eval_samples_per_second": 681.136,
+      "eval_steps_per_second": 21.369,
+      "step": 960
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 1.373121738433838,
+      "learning_rate": 4.0419239650403e-05,
+      "loss": 0.407,
+      "step": 1120
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.7607843137254902,
+      "eval_f1": 0.5960264900662252,
+      "eval_loss": 0.4111056327819824,
+      "eval_mcc": 0.43684887642861214,
+      "eval_precision": 0.6818181818181818,
+      "eval_recall": 0.5294117647058824,
+      "eval_runtime": 1.8826,
+      "eval_samples_per_second": 677.271,
+      "eval_steps_per_second": 21.248,
+      "step": 1120
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 500,
+  "total_flos": 2042288065440.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.7266899336949371,
+    "learning_rate": 0.000323353917203224,
+    "num_train_epochs": 8,
+    "temperature": 24
+  }
+}

run-13/checkpoint-1120/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b75d1a13345b1b7409ffa5572fa151259b0166dcbee52e95898829d69dc6b9cd
+size 5048

run-13/checkpoint-1120/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-13/checkpoint-1280/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-13/checkpoint-1280/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4725914f21b654da2a144be28990cf66590da4bba7e0da5df44d30bfeadb485
+size 17549312

run-13/checkpoint-1280/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67db50ca76c00239e6dab82dbb4b16129282d06ffc80691cfe74e7efb90ea8cc
+size 35123898

run-13/checkpoint-1280/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:242da79f0f40d17a976877b7744318ca279c3e455ed5f3332156e384fbc450c4
+size 14308

run-13/checkpoint-1280/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afad99148381fbd0ea38e76b99ebe31b1c6ba2f1dbe0b792550114cd3f1482b6
+size 1064

run-13/checkpoint-1280/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-13/checkpoint-1280/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-13/checkpoint-1280/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-13/checkpoint-1280/trainer_state.json ADDED Viewed

	@@ -0,0 +1,186 @@

+{
+  "best_metric": 0.7607843137254902,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-13/checkpoint-1120",
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 1280,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.6348164081573486,
+      "learning_rate": 0.00028293467755282096,
+      "loss": 0.4646,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.668235294117647,
+      "eval_f1": 0.018561484918793503,
+      "eval_loss": 0.43796491622924805,
+      "eval_mcc": 0.04862166383263152,
+      "eval_precision": 0.6666666666666666,
+      "eval_recall": 0.009411764705882352,
+      "eval_runtime": 1.8631,
+      "eval_samples_per_second": 684.329,
+      "eval_steps_per_second": 21.469,
+      "step": 160
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 1.2706644535064697,
+      "learning_rate": 0.000242515437902418,
+      "loss": 0.4357,
+      "step": 320
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7184313725490196,
+      "eval_f1": 0.37565217391304345,
+      "eval_loss": 0.4178254306316376,
+      "eval_mcc": 0.2995107121067069,
+      "eval_precision": 0.72,
+      "eval_recall": 0.2541176470588235,
+      "eval_runtime": 1.8716,
+      "eval_samples_per_second": 681.232,
+      "eval_steps_per_second": 21.372,
+      "step": 320
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 2.069754123687744,
+      "learning_rate": 0.00020209619825201498,
+      "loss": 0.4247,
+      "step": 480
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7419607843137255,
+      "eval_f1": 0.5548037889039241,
+      "eval_loss": 0.4210284948348999,
+      "eval_mcc": 0.38745831003992626,
+      "eval_precision": 0.6528662420382165,
+      "eval_recall": 0.4823529411764706,
+      "eval_runtime": 1.8687,
+      "eval_samples_per_second": 682.3,
+      "eval_steps_per_second": 21.405,
+      "step": 480
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.9811561703681946,
+      "learning_rate": 0.000161676958601612,
+      "loss": 0.4179,
+      "step": 640
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7349019607843137,
+      "eval_f1": 0.43666666666666665,
+      "eval_loss": 0.4131487309932709,
+      "eval_mcc": 0.3513388123150658,
+      "eval_precision": 0.7485714285714286,
+      "eval_recall": 0.30823529411764705,
+      "eval_runtime": 1.8772,
+      "eval_samples_per_second": 679.197,
+      "eval_steps_per_second": 21.308,
+      "step": 640
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.1143430471420288,
+      "learning_rate": 0.000121257718951209,
+      "loss": 0.4113,
+      "step": 800
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7545098039215686,
+      "eval_f1": 0.5908496732026145,
+      "eval_loss": 0.4124383330345154,
+      "eval_mcc": 0.42389318898939016,
+      "eval_precision": 0.6647058823529411,
+      "eval_recall": 0.5317647058823529,
+      "eval_runtime": 1.8732,
+      "eval_samples_per_second": 680.66,
+      "eval_steps_per_second": 21.354,
+      "step": 800
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 1.0988030433654785,
+      "learning_rate": 8.0838479300806e-05,
+      "loss": 0.4058,
+      "step": 960
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.7529411764705882,
+      "eval_f1": 0.5493562231759658,
+      "eval_loss": 0.4068860113620758,
+      "eval_mcc": 0.40775530964263945,
+      "eval_precision": 0.7007299270072993,
+      "eval_recall": 0.45176470588235296,
+      "eval_runtime": 1.8719,
+      "eval_samples_per_second": 681.136,
+      "eval_steps_per_second": 21.369,
+      "step": 960
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 1.373121738433838,
+      "learning_rate": 4.0419239650403e-05,
+      "loss": 0.407,
+      "step": 1120
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.7607843137254902,
+      "eval_f1": 0.5960264900662252,
+      "eval_loss": 0.4111056327819824,
+      "eval_mcc": 0.43684887642861214,
+      "eval_precision": 0.6818181818181818,
+      "eval_recall": 0.5294117647058824,
+      "eval_runtime": 1.8826,
+      "eval_samples_per_second": 677.271,
+      "eval_steps_per_second": 21.248,
+      "step": 1120
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 1.2871596813201904,
+      "learning_rate": 0.0,
+      "loss": 0.401,
+      "step": 1280
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.7521568627450981,
+      "eval_f1": 0.5524079320113314,
+      "eval_loss": 0.4067133963108063,
+      "eval_mcc": 0.40673564442456084,
+      "eval_precision": 0.693950177935943,
+      "eval_recall": 0.4588235294117647,
+      "eval_runtime": 1.8786,
+      "eval_samples_per_second": 678.7,
+      "eval_steps_per_second": 21.293,
+      "step": 1280
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 500,
+  "total_flos": 2334043503360.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.7266899336949371,
+    "learning_rate": 0.000323353917203224,
+    "num_train_epochs": 8,
+    "temperature": 24
+  }
+}

run-13/checkpoint-1280/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b75d1a13345b1b7409ffa5572fa151259b0166dcbee52e95898829d69dc6b9cd
+size 5048

run-13/checkpoint-1280/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-14/checkpoint-160/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-14/checkpoint-160/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0142724fa20ed611e1b33d0280d454e12ec1c71b9b71578e3db5fc76a38a438
+size 17549312

run-14/checkpoint-160/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06fc69a3c1845172b3302d6428a37b38d3099ba161328d68a4b57da6d70ede95
+size 35123898

run-14/checkpoint-160/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0792eaff268dd73c8e104b5060a487f4ef56535ad3b58888006338b8bc298137
+size 14308

run-14/checkpoint-160/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df2f8590bbef83e155ed5b7585f819605e89c7ca41b5c1d2dd02f39a439c8a6d
+size 1064

run-14/checkpoint-160/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-14/checkpoint-160/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-14/checkpoint-160/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-14/checkpoint-160/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.6666666666666666,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-160",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.9069746732711792,
+      "learning_rate": 0.0001514416935217273,
+      "loss": 0.5085,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.4942961037158966,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8701,
+      "eval_samples_per_second": 681.789,
+      "eval_steps_per_second": 21.389,
+      "step": 160
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 500,
+  "total_flos": 291755437920.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.7926575748257565,
+    "learning_rate": 0.00017307622116768837,
+    "num_train_epochs": 8,
+    "temperature": 11
+  }
+}

run-14/checkpoint-160/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b6b19052d4c806997afdfae7afcfc25427cf0da55d0d521079b159e1e5e6a52
+size 5048

run-14/checkpoint-160/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b75d1a13345b1b7409ffa5572fa151259b0166dcbee52e95898829d69dc6b9cd
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b6b19052d4c806997afdfae7afcfc25427cf0da55d0d521079b159e1e5e6a52
 size 5048