HikasaHana commited on Apr 18, 2024

Commit

664081b

verified ·

1 Parent(s): d67122c

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-11/checkpoint-533/config.json +43 -0
run-11/checkpoint-533/model.safetensors +3 -0
run-11/checkpoint-533/optimizer.pt +3 -0
run-11/checkpoint-533/rng_state.pth +3 -0
run-11/checkpoint-533/scheduler.pt +3 -0
run-11/checkpoint-533/special_tokens_map.json +37 -0
run-11/checkpoint-533/tokenizer_config.json +57 -0
run-11/checkpoint-533/trainer_state.json +39 -0
run-11/checkpoint-533/training_args.bin +3 -0
run-11/checkpoint-533/vocab.txt +0 -0
run-4/checkpoint-1066/config.json +43 -0
run-4/checkpoint-1066/model.safetensors +3 -0
run-4/checkpoint-1066/optimizer.pt +3 -0
run-4/checkpoint-1066/rng_state.pth +3 -0
run-4/checkpoint-1066/scheduler.pt +3 -0
run-4/checkpoint-1066/special_tokens_map.json +37 -0
run-4/checkpoint-1066/tokenizer_config.json +57 -0
run-4/checkpoint-1066/trainer_state.json +55 -0
run-4/checkpoint-1066/training_args.bin +3 -0
run-4/checkpoint-1066/vocab.txt +0 -0
run-4/checkpoint-1599/config.json +43 -0
run-4/checkpoint-1599/model.safetensors +3 -0
run-4/checkpoint-1599/optimizer.pt +3 -0
run-4/checkpoint-1599/rng_state.pth +3 -0
run-4/checkpoint-1599/scheduler.pt +3 -0
run-4/checkpoint-1599/special_tokens_map.json +37 -0
run-4/checkpoint-1599/tokenizer_config.json +57 -0
run-4/checkpoint-1599/trainer_state.json +71 -0
run-4/checkpoint-1599/training_args.bin +3 -0
run-4/checkpoint-1599/vocab.txt +0 -0
run-7/checkpoint-1066/config.json +43 -0
run-7/checkpoint-1066/model.safetensors +3 -0
run-7/checkpoint-1066/optimizer.pt +3 -0
run-7/checkpoint-1066/rng_state.pth +3 -0
run-7/checkpoint-1066/scheduler.pt +3 -0
run-7/checkpoint-1066/special_tokens_map.json +37 -0
run-7/checkpoint-1066/tokenizer_config.json +57 -0
run-7/checkpoint-1066/trainer_state.json +55 -0
run-7/checkpoint-1066/training_args.bin +3 -0
run-7/checkpoint-1066/vocab.txt +0 -0
run-7/checkpoint-1599/config.json +43 -0
run-7/checkpoint-1599/model.safetensors +3 -0
run-7/checkpoint-1599/optimizer.pt +3 -0
run-7/checkpoint-1599/rng_state.pth +3 -0
run-7/checkpoint-1599/scheduler.pt +3 -0
run-7/checkpoint-1599/special_tokens_map.json +37 -0
run-7/checkpoint-1599/tokenizer_config.json +57 -0
run-7/checkpoint-1599/trainer_state.json +71 -0
run-7/checkpoint-1599/training_args.bin +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f266a6c66c15ba256ad0f5c6881bb173c4f07a0fde2aa3417c850674a0fa8ac7
 size 409103316

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd996b7147ba6cf6f2b8a342d3542effbd5a3220b435041ab40b1f8ce1998aee
 size 409103316

run-11/checkpoint-533/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-11/checkpoint-533/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd996b7147ba6cf6f2b8a342d3542effbd5a3220b435041ab40b1f8ce1998aee
+size 409103316

run-11/checkpoint-533/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfbc7abdb458c169f2da6058d24884383b1bdace0da728ba663f224c910b458f
+size 818327802

run-11/checkpoint-533/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cd3c5a8822e67ef4af7188dd571a48877b29db4a131ffc031fd52e683869738
+size 14244

run-11/checkpoint-533/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:401fd72e9bcbd0f22a9b0280a80561cb3edac7d8510d49d3e9d3db14619f62a0
+size 1064

run-11/checkpoint-533/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-11/checkpoint-533/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-533/trainer_state.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "best_metric": 0.6336334943771362,
+  "best_model_checkpoint": "BERT-WMM/run-11/checkpoint-533",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 533,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 10.442337036132812,
+      "learning_rate": 1.5945617993693323e-05,
+      "loss": 0.7201,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7488262910798122,
+      "eval_loss": 0.6336334943771362,
+      "eval_runtime": 2.089,
+      "eval_samples_per_second": 1019.614,
+      "eval_steps_per_second": 64.145,
+      "step": 533
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 180039467227536.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.320022126652923e-05
+  }
+}

run-11/checkpoint-533/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d3de9b98fb94c83891cf253de759cc55e8f7d096cede3d086d146fb14704cee
+size 4856

run-11/checkpoint-533/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-1066/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-4/checkpoint-1066/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32cfbb4d4c8c34f1c2ed54631773530d4d26b34b75258c54defc85e69e41d3d1
+size 409103316

run-4/checkpoint-1066/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dce2e2a94309bca8aefdc62792f2c5f3968ed5434de423391674807a48804176
+size 818327802

run-4/checkpoint-1066/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e93670da55318d823f1e4c03808b51663a5b6c2853342674abb539144107925
+size 14244

run-4/checkpoint-1066/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8435fb26f37c7a19e41410418cae640f183a7e2091ccbf4f9abe1ef6d6a332c3
+size 1064

run-4/checkpoint-1066/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-4/checkpoint-1066/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-1066/trainer_state.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "best_metric": 0.7426179647445679,
+  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-1066",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1066,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 11.732254028320312,
+      "learning_rate": 9.02435978576971e-07,
+      "loss": 0.9819,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6502347417840375,
+      "eval_loss": 0.8438959121704102,
+      "eval_runtime": 2.0462,
+      "eval_samples_per_second": 1040.962,
+      "eval_steps_per_second": 65.488,
+      "step": 533
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 25.85025978088379,
+      "learning_rate": 4.918645597521434e-07,
+      "loss": 0.8112,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7023474178403756,
+      "eval_loss": 0.7426179647445679,
+      "eval_runtime": 2.1113,
+      "eval_samples_per_second": 1008.876,
+      "eval_steps_per_second": 63.469,
+      "step": 1066
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 338261076519408.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.3130073974017986e-06
+  }
+}

run-4/checkpoint-1066/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94cc8e8a51cd4af2cf5c7066ce44a0e4e3b97fb8a666dfb860c46b1031bf00ba
+size 4856

run-4/checkpoint-1066/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-1599/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-4/checkpoint-1599/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92bb089876b24a26fe5a22b71c0bf50d7d8202b58548c34a2ff4af0c0f2338f7
+size 409103316

run-4/checkpoint-1599/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddfaa2570482133bf92e4f658eff570bd0c52ca1ec5ad6cfd93783fe66113d1a
+size 818327802

run-4/checkpoint-1599/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0297d8a44379421fcc37b5cd89b37859cf20f999ea1e0c6d33126202fb0483
+size 14244

run-4/checkpoint-1599/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8af4b43b3dabe62d75901e509b6904ea02f1b8ed33dbda81f1768ecb9807cb55
+size 1064

run-4/checkpoint-1599/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-4/checkpoint-1599/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-1599/trainer_state.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "best_metric": 0.715109646320343,
+  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-1599",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1599,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 11.732254028320312,
+      "learning_rate": 9.02435978576971e-07,
+      "loss": 0.9819,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6502347417840375,
+      "eval_loss": 0.8438959121704102,
+      "eval_runtime": 2.0462,
+      "eval_samples_per_second": 1040.962,
+      "eval_steps_per_second": 65.488,
+      "step": 533
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 25.85025978088379,
+      "learning_rate": 4.918645597521434e-07,
+      "loss": 0.8112,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7023474178403756,
+      "eval_loss": 0.7426179647445679,
+      "eval_runtime": 2.1113,
+      "eval_samples_per_second": 1008.876,
+      "eval_steps_per_second": 63.469,
+      "step": 1066
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 10.392358779907227,
+      "learning_rate": 8.129314092731586e-08,
+      "loss": 0.7317,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7145539906103286,
+      "eval_loss": 0.715109646320343,
+      "eval_runtime": 2.0834,
+      "eval_samples_per_second": 1022.379,
+      "eval_steps_per_second": 64.319,
+      "step": 1599
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 507646505902536.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.3130073974017986e-06
+  }
+}

run-4/checkpoint-1599/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94cc8e8a51cd4af2cf5c7066ce44a0e4e3b97fb8a666dfb860c46b1031bf00ba
+size 4856

run-4/checkpoint-1599/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-1066/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-7/checkpoint-1066/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d687896574b3cfbcbd77dc675eb3ba460a5e0b2d02f7f67dd67a1c46be651a7
+size 409103316

run-7/checkpoint-1066/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f926aa034e43bfd8358a06cee96ad5fdae50a44a683a1bcbcdded88a17a2859
+size 818327802

run-7/checkpoint-1066/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e93670da55318d823f1e4c03808b51663a5b6c2853342674abb539144107925
+size 14244

run-7/checkpoint-1066/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6df23cc8585b78952fac85340f47228ba271da3c4c9a808084429b3cc9655377
+size 1064

run-7/checkpoint-1066/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-7/checkpoint-1066/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1066/trainer_state.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "best_metric": 0.6044057011604309,
+  "best_model_checkpoint": "BERT-WMM/run-7/checkpoint-533",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1066,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 10.467696189880371,
+      "learning_rate": 1.2929689145457759e-05,
+      "loss": 0.7152,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.747887323943662,
+      "eval_loss": 0.6044057011604309,
+      "eval_runtime": 2.0899,
+      "eval_samples_per_second": 1019.204,
+      "eval_steps_per_second": 64.119,
+      "step": 533
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 12.011512756347656,
+      "learning_rate": 7.047210007396903e-06,
+      "loss": 0.456,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7633802816901408,
+      "eval_loss": 0.6140081882476807,
+      "eval_runtime": 2.0772,
+      "eval_samples_per_second": 1025.443,
+      "eval_steps_per_second": 64.511,
+      "step": 1066
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 348929503378848.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.8812168283518612e-05
+  }
+}

run-7/checkpoint-1066/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:614d198bdfc8a658c1a625b161214d455d51113aeb53105ef4726d8c0445a481
+size 4856

run-7/checkpoint-1066/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-1599/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-7/checkpoint-1599/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1ec85ae953a566720dcba253732a03d6d4ba0f9bff5fc676ee3b6d442dd1679
+size 409103316

run-7/checkpoint-1599/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da9c051ae7296b94bc2eb25d5998875a2e9ef9302f3bdbd5b80c97ebcd70ce86
+size 818327802

run-7/checkpoint-1599/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0297d8a44379421fcc37b5cd89b37859cf20f999ea1e0c6d33126202fb0483
+size 14244

run-7/checkpoint-1599/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dcf418105c6ad46f1cdfc8465bbe98ae72e4d2d2e43481489ab059617b93861
+size 1064

run-7/checkpoint-1599/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-7/checkpoint-1599/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1599/trainer_state.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "best_metric": 0.6044057011604309,
+  "best_model_checkpoint": "BERT-WMM/run-7/checkpoint-533",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1599,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 10.467696189880371,
+      "learning_rate": 1.2929689145457759e-05,
+      "loss": 0.7152,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.747887323943662,
+      "eval_loss": 0.6044057011604309,
+      "eval_runtime": 2.0899,
+      "eval_samples_per_second": 1019.204,
+      "eval_steps_per_second": 64.119,
+      "step": 533
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 12.011512756347656,
+      "learning_rate": 7.047210007396903e-06,
+      "loss": 0.456,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7633802816901408,
+      "eval_loss": 0.6140081882476807,
+      "eval_runtime": 2.0772,
+      "eval_samples_per_second": 1025.443,
+      "eval_steps_per_second": 64.511,
+      "step": 1066
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 8.719986915588379,
+      "learning_rate": 1.1647308693360492e-06,
+      "loss": 0.286,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7615023474178404,
+      "eval_loss": 0.7435628771781921,
+      "eval_runtime": 2.9291,
+      "eval_samples_per_second": 727.196,
+      "eval_steps_per_second": 45.748,
+      "step": 1599
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 518314932761976.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.8812168283518612e-05
+  }
+}

run-7/checkpoint-1599/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:614d198bdfc8a658c1a625b161214d455d51113aeb53105ef4726d8c0445a481
+size 4856