xuancoblab2023 commited on Apr 20, 2024

Commit

62c0e11

verified ·

1 Parent(s): f656fa7

Training in progress, epoch 1

Browse files

Files changed (26) hide show

logs/events.out.tfevents.1713611290.a9446dbff3d4.7440.7 +2 -2
logs/events.out.tfevents.1713611332.a9446dbff3d4.7440.8 +3 -0
model.safetensors +1 -1
run-5/checkpoint-640/config.json +34 -0
run-5/checkpoint-640/model.safetensors +3 -0
run-5/checkpoint-640/optimizer.pt +3 -0
run-5/checkpoint-640/rng_state.pth +3 -0
run-5/checkpoint-640/scheduler.pt +3 -0
run-5/checkpoint-640/special_tokens_map.json +7 -0
run-5/checkpoint-640/tokenizer.json +0 -0
run-5/checkpoint-640/tokenizer_config.json +57 -0
run-5/checkpoint-640/trainer_state.json +106 -0
run-5/checkpoint-640/training_args.bin +3 -0
run-5/checkpoint-640/vocab.txt +0 -0
run-6/checkpoint-160/config.json +34 -0
run-6/checkpoint-160/model.safetensors +3 -0
run-6/checkpoint-160/optimizer.pt +3 -0
run-6/checkpoint-160/rng_state.pth +3 -0
run-6/checkpoint-160/scheduler.pt +3 -0
run-6/checkpoint-160/special_tokens_map.json +7 -0
run-6/checkpoint-160/tokenizer.json +0 -0
run-6/checkpoint-160/tokenizer_config.json +57 -0
run-6/checkpoint-160/trainer_state.json +46 -0
run-6/checkpoint-160/training_args.bin +3 -0
run-6/checkpoint-160/vocab.txt +0 -0
training_args.bin +1 -1

logs/events.out.tfevents.1713611290.a9446dbff3d4.7440.7 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a46310afb864e247633d7a7a05f1b94dd9d349ee5f408b7ecfe661c442a0fdff
-size 6212

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3a335102ea542d12c51b41f0502658b3c2fd89f0f4787300c390b9aacc71e48
+size 8026

logs/events.out.tfevents.1713611332.a9446dbff3d4.7440.8 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:766c620d60f21e260986a54808ee98b4492ddf72373876e48f761679a36c1e4d
+size 5482

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f525e8451b4902afd8e971b39415474fef1509ada19ad59f421194a0eeaf7efe
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:87a9aa369fa4fe8e12c2ac8d6ca8981de3eebcba5b2a7e56b6a7417be034c719
 size 17549312

run-5/checkpoint-640/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-5/checkpoint-640/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8875c72079f2057d69b99aa9f33c11b77896e824f47ac88701155e0ed2aa32c6
+size 17549312

run-5/checkpoint-640/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8bba1712f12b52b9421ffa19e2ddef39ad098100e95c7b6ad2f35bd1e2d64e2
+size 35123898

run-5/checkpoint-640/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fae3cb80286aa140b95db910a9642553ceeaeadd75e7dc192487af85f7f7af75
+size 14308

run-5/checkpoint-640/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e930f47acf49913aa7b3e01d65e76e5191a55289fc5a131e9089604e3fbeeb5
+size 1064

run-5/checkpoint-640/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-5/checkpoint-640/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-5/checkpoint-640/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-5/checkpoint-640/trainer_state.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "best_metric": 0.6674509803921569,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-320",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 640,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.9986019134521484,
+      "learning_rate": 6.067732661916699e-05,
+      "loss": 0.3474,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.33498698472976685,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8603,
+      "eval_samples_per_second": 685.372,
+      "eval_steps_per_second": 21.502,
+      "step": 160
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.9157262444496155,
+      "learning_rate": 4.045155107944466e-05,
+      "loss": 0.3312,
+      "step": 320
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.6674509803921569,
+      "eval_f1": 0.004694835680751173,
+      "eval_loss": 0.3251466453075409,
+      "eval_mcc": 0.03962144258751637,
+      "eval_precision": 1.0,
+      "eval_recall": 0.002352941176470588,
+      "eval_runtime": 1.8638,
+      "eval_samples_per_second": 684.076,
+      "eval_steps_per_second": 21.461,
+      "step": 320
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.6788557767868042,
+      "learning_rate": 2.022577553972233e-05,
+      "loss": 0.3247,
+      "step": 480
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.049217002237136466,
+      "eval_loss": 0.32314401865005493,
+      "eval_mcc": 0.046847973719895464,
+      "eval_precision": 0.5,
+      "eval_recall": 0.02588235294117647,
+      "eval_runtime": 1.8829,
+      "eval_samples_per_second": 677.158,
+      "eval_steps_per_second": 21.244,
+      "step": 480
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.6379467844963074,
+      "learning_rate": 0.0,
+      "loss": 0.3227,
+      "step": 640
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.018475750577367205,
+      "eval_loss": 0.3209414780139923,
+      "eval_mcc": 0.028093878027715367,
+      "eval_precision": 0.5,
+      "eval_recall": 0.009411764705882352,
+      "eval_runtime": 1.8827,
+      "eval_samples_per_second": 677.207,
+      "eval_steps_per_second": 21.246,
+      "step": 640
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 640,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 1167021751680.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.5122281577891578,
+    "learning_rate": 8.090310215888932e-05,
+    "num_train_epochs": 4,
+    "temperature": 14
+  }
+}

run-5/checkpoint-640/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ffdc018305c7641913fb33119db0342b974e7cbd16a23cd7b923a830af0c23d
+size 5048

run-5/checkpoint-640/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-160/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-6/checkpoint-160/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87a9aa369fa4fe8e12c2ac8d6ca8981de3eebcba5b2a7e56b6a7417be034c719
+size 17549312

run-6/checkpoint-160/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bc697643b513962a784b5cb3a6cde7fa5309c547c5b0fa691c8178b9e7fdf5
+size 35123898

run-6/checkpoint-160/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0792eaff268dd73c8e104b5060a487f4ef56535ad3b58888006338b8bc298137
+size 14308

run-6/checkpoint-160/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1297ea273aa9c8ab10d13a3075573818c096ced4a00d66b13402cfcad10574e7
+size 1064

run-6/checkpoint-160/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-160/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-160/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-160/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.6666666666666666,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-160",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.8079733848571777,
+      "learning_rate": 5.379067903396322e-05,
+      "loss": 0.0738,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6666666666666666,
+      "eval_f1": 0.0,
+      "eval_loss": 0.05636660009622574,
+      "eval_mcc": 0.0,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.8691,
+      "eval_samples_per_second": 682.132,
+      "eval_steps_per_second": 21.4,
+      "step": 160
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 960,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "total_flos": 291755437920.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.062310076275025716,
+    "learning_rate": 6.454881484075586e-05,
+    "num_train_epochs": 6,
+    "temperature": 27
+  }
+}

run-6/checkpoint-160/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:874377bb5cc66029b2e8d2a1579f2feeb10b9a486f35c6379123c12f1d35621c
+size 5048

run-6/checkpoint-160/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ffdc018305c7641913fb33119db0342b974e7cbd16a23cd7b923a830af0c23d
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:874377bb5cc66029b2e8d2a1579f2feeb10b9a486f35c6379123c12f1d35621c
 size 5048