HikasaHana commited on Apr 18, 2024

Commit

0013c3b

verified ·

1 Parent(s): 5e14204

Training in progress, epoch 4

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-3/checkpoint-1068/config.json +43 -0
run-3/checkpoint-1068/model.safetensors +3 -0
run-3/checkpoint-1068/optimizer.pt +3 -0
run-3/checkpoint-1068/rng_state.pth +3 -0
run-3/checkpoint-1068/scheduler.pt +3 -0
run-3/checkpoint-1068/special_tokens_map.json +37 -0
run-3/checkpoint-1068/tokenizer_config.json +57 -0
run-3/checkpoint-1068/trainer_state.json +76 -0
run-3/checkpoint-1068/training_args.bin +3 -0
run-3/checkpoint-1068/vocab.txt +0 -0
run-4/checkpoint-134/config.json +43 -0
run-4/checkpoint-134/model.safetensors +3 -0
run-4/checkpoint-134/optimizer.pt +3 -0
run-4/checkpoint-134/rng_state.pth +3 -0
run-4/checkpoint-134/scheduler.pt +3 -0
run-4/checkpoint-134/special_tokens_map.json +37 -0
run-4/checkpoint-134/tokenizer_config.json +57 -0
run-4/checkpoint-134/trainer_state.json +35 -0
run-4/checkpoint-134/training_args.bin +3 -0
run-4/checkpoint-134/vocab.txt +0 -0
run-4/checkpoint-268/config.json +43 -0
run-4/checkpoint-268/model.safetensors +3 -0
run-4/checkpoint-268/optimizer.pt +3 -0
run-4/checkpoint-268/rng_state.pth +3 -0
run-4/checkpoint-268/scheduler.pt +3 -0
run-4/checkpoint-268/special_tokens_map.json +37 -0
run-4/checkpoint-268/tokenizer_config.json +57 -0
run-4/checkpoint-268/trainer_state.json +44 -0
run-4/checkpoint-268/training_args.bin +3 -0
run-4/checkpoint-268/vocab.txt +0 -0
run-4/checkpoint-402/config.json +43 -0
run-4/checkpoint-402/model.safetensors +3 -0
run-4/checkpoint-402/optimizer.pt +3 -0
run-4/checkpoint-402/rng_state.pth +3 -0
run-4/checkpoint-402/scheduler.pt +3 -0
run-4/checkpoint-402/special_tokens_map.json +37 -0
run-4/checkpoint-402/tokenizer_config.json +57 -0
run-4/checkpoint-402/trainer_state.json +53 -0
run-4/checkpoint-402/training_args.bin +3 -0
run-4/checkpoint-402/vocab.txt +0 -0
run-4/checkpoint-536/config.json +43 -0
run-4/checkpoint-536/model.safetensors +3 -0
run-4/checkpoint-536/optimizer.pt +3 -0
run-4/checkpoint-536/rng_state.pth +3 -0
run-4/checkpoint-536/scheduler.pt +3 -0
run-4/checkpoint-536/special_tokens_map.json +37 -0
run-4/checkpoint-536/tokenizer_config.json +57 -0
run-4/checkpoint-536/trainer_state.json +69 -0
run-4/checkpoint-536/training_args.bin +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05281ccfdb04657dc1f21097d3ad9814c9a9b02f644351f16dffed288040b382
 size 409103316

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e7180d1e19a898f1beb292a73f8386dde07cf28e7b65dcfb30d59b43d88e596
 size 409103316

run-3/checkpoint-1068/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-3/checkpoint-1068/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2560b43fe84762a911d714a67fc5c2919fee7fafebcd308e914817d2dc8566ef
+size 409103316

run-3/checkpoint-1068/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:854f66ea0079d652fdf4f15eeae49e6b2a1b1c1adb91ab7348289dfc954ec4ae
+size 818327802

run-3/checkpoint-1068/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:250d95c13c6d803d5fd5b5d295b3c98a11c83777eb8bc2925a16ee9368fd5b57
+size 14244

run-3/checkpoint-1068/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b015e282a908a0b0a7df9e3b39dbc5733ce7bd87981602ed5348ef6e624452a
+size 1064

run-3/checkpoint-1068/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-3/checkpoint-1068/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1068/trainer_state.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "best_metric": 0.6060042381286621,
+  "best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1068,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7262910798122065,
+      "eval_loss": 0.6716192960739136,
+      "eval_runtime": 2.087,
+      "eval_samples_per_second": 1020.621,
+      "eval_steps_per_second": 64.208,
+      "step": 267
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 8.963554382324219,
+      "learning_rate": 2.063992947537006e-05,
+      "loss": 0.5881,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7652582159624414,
+      "eval_loss": 0.6060042381286621,
+      "eval_runtime": 2.1035,
+      "eval_samples_per_second": 1012.591,
+      "eval_steps_per_second": 63.703,
+      "step": 534
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7680751173708921,
+      "eval_loss": 0.7311103343963623,
+      "eval_runtime": 1.9991,
+      "eval_samples_per_second": 1065.459,
+      "eval_steps_per_second": 67.029,
+      "step": 801
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 5.572795391082764,
+      "learning_rate": 2.470977472403458e-06,
+      "loss": 0.2017,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7690140845070422,
+      "eval_loss": 0.9167852997779846,
+      "eval_runtime": 2.1137,
+      "eval_samples_per_second": 1007.72,
+      "eval_steps_per_second": 63.396,
+      "step": 1068
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1068,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 697511614823352.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.8808881478336665e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 32,
+    "weight_decay": 1.7684940065509674e-05
+  }
+}

run-3/checkpoint-1068/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6332b6cec121d165520e5db70de2b1562eb8b726bb5c9d5f52be871bd32d17a
+size 4856

run-3/checkpoint-1068/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-134/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-4/checkpoint-134/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c67fb7b9358f57bfdaef0c3589bb5904c67c8347682bd50405361ed65907e33
+size 409103316

run-4/checkpoint-134/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ff7a2c56a2166efd74e24bd5712b8fd2fd7e799c8835c6efdc74ecaa1e64002
+size 818327802

run-4/checkpoint-134/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4942a00faeddfc94e42d2a294bdfa76898722e11695841daac4e4e6ccf109842
+size 14244

run-4/checkpoint-134/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5a4f9e43d7415e006adfefe55fd5f201e0050139566d60db3f9915f9f0e69d
+size 1064

run-4/checkpoint-134/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-4/checkpoint-134/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-134/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 0.6610550880432129,
+  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-134",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 134,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7248826291079812,
+      "eval_loss": 0.6610550880432129,
+      "eval_runtime": 2.0768,
+      "eval_samples_per_second": 1025.597,
+      "eval_steps_per_second": 64.521,
+      "step": 134
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.1434279877264858e-05,
+    "num_train_epochs": 6,
+    "per_device_train_batch_size": 64,
+    "weight_decay": 0.007798418229484321
+  }
+}

run-4/checkpoint-134/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
+size 4856

run-4/checkpoint-134/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-268/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-4/checkpoint-268/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9eb0941b8fb2df459d3d00b865513510c96290e47db2cfc7345527f4e296c0
+size 409103316

run-4/checkpoint-268/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:216c153733556ac4ce9db69c37cbcb2c1162e907d154a9eded2328d829e99e68
+size 818327802

run-4/checkpoint-268/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3710a75654d913cd9587a7637c761de20056b7c11c92135a182c6da49fe818d9
+size 14244

run-4/checkpoint-268/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f20e2d43cb43bcddd3be93609f4dfb28258ca14684d749f83306b1b2d248346f
+size 1064

run-4/checkpoint-268/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-4/checkpoint-268/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-268/trainer_state.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "best_metric": 0.6047325730323792,
+  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 268,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7248826291079812,
+      "eval_loss": 0.6610550880432129,
+      "eval_runtime": 2.0768,
+      "eval_samples_per_second": 1025.597,
+      "eval_steps_per_second": 64.521,
+      "step": 134
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.755868544600939,
+      "eval_loss": 0.6047325730323792,
+      "eval_runtime": 2.0636,
+      "eval_samples_per_second": 1032.163,
+      "eval_steps_per_second": 64.934,
+      "step": 268
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.1434279877264858e-05,
+    "num_train_epochs": 6,
+    "per_device_train_batch_size": 64,
+    "weight_decay": 0.007798418229484321
+  }
+}

run-4/checkpoint-268/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
+size 4856

run-4/checkpoint-268/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-402/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-4/checkpoint-402/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa184fe52664b1fd1422fb1186d2c131a277fe4340bbecf4f067a18ebfea8d4
+size 409103316

run-4/checkpoint-402/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4aad271e6d658426d0d4432898d11ec71c1de8756433f91545e7345a21139f6
+size 818327802

run-4/checkpoint-402/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fdcda6ce4f391ebd88ccae6650ee532b64877e7fcf08ae7e7f256cfe508f341
+size 14244

run-4/checkpoint-402/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fe0f6a964ee8f879576dfe922482680498e583a2b9aa9084940f5fc6c0a74ee
+size 1064

run-4/checkpoint-402/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-4/checkpoint-402/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-402/trainer_state.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "best_metric": 0.6047325730323792,
+  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 402,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7248826291079812,
+      "eval_loss": 0.6610550880432129,
+      "eval_runtime": 2.0768,
+      "eval_samples_per_second": 1025.597,
+      "eval_steps_per_second": 64.521,
+      "step": 134
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.755868544600939,
+      "eval_loss": 0.6047325730323792,
+      "eval_runtime": 2.0636,
+      "eval_samples_per_second": 1032.163,
+      "eval_steps_per_second": 64.934,
+      "step": 268
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7568075117370892,
+      "eval_loss": 0.6309530138969421,
+      "eval_runtime": 2.6635,
+      "eval_samples_per_second": 799.702,
+      "eval_steps_per_second": 50.31,
+      "step": 402
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.1434279877264858e-05,
+    "num_train_epochs": 6,
+    "per_device_train_batch_size": 64,
+    "weight_decay": 0.007798418229484321
+  }
+}

run-4/checkpoint-402/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
+size 4856

run-4/checkpoint-402/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-536/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-4/checkpoint-536/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e7180d1e19a898f1beb292a73f8386dde07cf28e7b65dcfb30d59b43d88e596
+size 409103316

run-4/checkpoint-536/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0945796164298b8dae5947303aaaf9d12dcf5f2fc2419842a33b25cbf85d840c
+size 818327802

run-4/checkpoint-536/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68fc68a5a2897f6cb39e3e2ec857ff1fc3fd2c96caa5464a844d2408e486abe7
+size 14244

run-4/checkpoint-536/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51915d7c9d6b8048cea791a1c7b08ecfce8e5f266f0131ad618c93aed2f4d5cf
+size 1064

run-4/checkpoint-536/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-4/checkpoint-536/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-536/trainer_state.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "best_metric": 0.6047325730323792,
+  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-268",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 536,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7248826291079812,
+      "eval_loss": 0.6610550880432129,
+      "eval_runtime": 2.0768,
+      "eval_samples_per_second": 1025.597,
+      "eval_steps_per_second": 64.521,
+      "step": 134
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.755868544600939,
+      "eval_loss": 0.6047325730323792,
+      "eval_runtime": 2.0636,
+      "eval_samples_per_second": 1032.163,
+      "eval_steps_per_second": 64.934,
+      "step": 268
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7568075117370892,
+      "eval_loss": 0.6309530138969421,
+      "eval_runtime": 2.6635,
+      "eval_samples_per_second": 799.702,
+      "eval_steps_per_second": 50.31,
+      "step": 402
+    },
+    {
+      "epoch": 3.73,
+      "grad_norm": 9.669575691223145,
+      "learning_rate": 4.323409306826513e-06,
+      "loss": 0.5385,
+      "step": 500
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7577464788732394,
+      "eval_loss": 0.6676124334335327,
+      "eval_runtime": 2.0995,
+      "eval_samples_per_second": 1014.528,
+      "eval_steps_per_second": 63.825,
+      "step": 536
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "total_flos": 709335274032504.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.1434279877264858e-05,
+    "num_train_epochs": 6,
+    "per_device_train_batch_size": 64,
+    "weight_decay": 0.007798418229484321
+  }
+}

run-4/checkpoint-536/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edc8ba15acf69ccfa7cad78d9e146e493b9003672aa3b6919156015dfb1a7a16
+size 4856