HikasaHana commited on Apr 18, 2024

Commit

678d666

verified ·

1 Parent(s): b2c6b1f

Training in progress, epoch 1

Browse files

Files changed (28) hide show

model.safetensors +1 -1
run-12/checkpoint-1066/config.json +43 -0
run-12/checkpoint-1066/model.safetensors +3 -0
run-12/checkpoint-1066/optimizer.pt +3 -0
run-12/checkpoint-1066/rng_state.pth +3 -0
run-12/checkpoint-1066/scheduler.pt +3 -0
run-12/checkpoint-1066/special_tokens_map.json +37 -0
run-12/checkpoint-1066/tokenizer_config.json +57 -0
run-12/checkpoint-1066/trainer_state.json +55 -0
run-12/checkpoint-1066/training_args.bin +3 -0
run-12/checkpoint-1066/vocab.txt +0 -0
run-12/checkpoint-1599/config.json +43 -0
run-12/checkpoint-1599/model.safetensors +3 -0
run-12/checkpoint-1599/optimizer.pt +3 -0
run-12/checkpoint-1599/rng_state.pth +3 -0
run-12/checkpoint-1599/scheduler.pt +3 -0
run-12/checkpoint-1599/special_tokens_map.json +37 -0
run-12/checkpoint-1599/tokenizer_config.json +57 -0
run-12/checkpoint-1599/trainer_state.json +71 -0
run-12/checkpoint-1599/training_args.bin +3 -0
run-12/checkpoint-1599/vocab.txt +0 -0
run-13/checkpoint-533/model.safetensors +1 -1
run-13/checkpoint-533/optimizer.pt +1 -1
run-13/checkpoint-533/scheduler.pt +1 -1
run-13/checkpoint-533/trainer_state.json +12 -15
run-13/checkpoint-533/training_args.bin +1 -1
runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713459519.544fc269209b.792.13 +3 -0
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe07259d9730d4e977f0ace3095ed9a12db8ca213a077aeeb4a5734c51a25244
 size 409103316

 version https://git-lfs.github.com/spec/v1
+oid sha256:70eeaf9350225dda5a0f2d7d25da6b9991bac7fadb137f197edf5640a325b3af
 size 409103316

run-12/checkpoint-1066/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-12/checkpoint-1066/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c77f3d4b9fd1426ccb708631c63840f2f782b85a4177115d5f7ad195a765127f
+size 409103316

run-12/checkpoint-1066/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:930cfe7f801c357cd26151bafe7a7c79bce1942f6d8f49e81ca2361c3f240571
+size 818327802

run-12/checkpoint-1066/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e93670da55318d823f1e4c03808b51663a5b6c2853342674abb539144107925
+size 14244

run-12/checkpoint-1066/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01566d09b959fa7876a7b4c5f191b36630af280370c683efc42faf753dca04ea
+size 1064

run-12/checkpoint-1066/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-12/checkpoint-1066/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-12/checkpoint-1066/trainer_state.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "best_metric": 0.6433334350585938,
+  "best_model_checkpoint": "BERT-WMM/run-12/checkpoint-1066",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1066,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 10.519768714904785,
+      "learning_rate": 2.0153542741677195e-05,
+      "loss": 0.7357,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7380281690140845,
+      "eval_loss": 0.6585038304328918,
+      "eval_runtime": 2.092,
+      "eval_samples_per_second": 1018.141,
+      "eval_steps_per_second": 64.052,
+      "step": 533
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 10.527291297912598,
+      "learning_rate": 1.0984506007520144e-05,
+      "loss": 0.4807,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7624413145539906,
+      "eval_loss": 0.6433334350585938,
+      "eval_runtime": 2.1058,
+      "eval_samples_per_second": 1011.511,
+      "eval_steps_per_second": 63.635,
+      "step": 1066
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 338261076519408.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.9322579475834245e-05
+  }
+}

run-12/checkpoint-1066/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4cc0e168941911ceed1d2a5b00947af0590a3aef308b6c49e334538f60ccac1
+size 4856

run-12/checkpoint-1066/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-12/checkpoint-1599/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "hfl/chinese-bert-wwm-ext",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE",
+    "2": "NEUTRAL"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "NEGATIVE": 0,
+    "NEUTRAL": 2,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

run-12/checkpoint-1599/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0afa6460faa921a394cb65651e8f781c01ef4ec980a2e5f506e92acb5f6f5fba
+size 409103316

run-12/checkpoint-1599/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca678a0148195272928542e152c5453c9eeeaacd2cba9db27791580af03d7791
+size 818327802

run-12/checkpoint-1599/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0297d8a44379421fcc37b5cd89b37859cf20f999ea1e0c6d33126202fb0483
+size 14244

run-12/checkpoint-1599/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca0187aba9001887cea0c1fea9f711f19a38e350f855f202624d90b355f15904
+size 1064

run-12/checkpoint-1599/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-12/checkpoint-1599/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-12/checkpoint-1599/trainer_state.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "best_metric": 0.6433334350585938,
+  "best_model_checkpoint": "BERT-WMM/run-12/checkpoint-1066",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1599,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.94,
+      "grad_norm": 10.519768714904785,
+      "learning_rate": 2.0153542741677195e-05,
+      "loss": 0.7357,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7380281690140845,
+      "eval_loss": 0.6585038304328918,
+      "eval_runtime": 2.092,
+      "eval_samples_per_second": 1018.141,
+      "eval_steps_per_second": 64.052,
+      "step": 533
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 10.527291297912598,
+      "learning_rate": 1.0984506007520144e-05,
+      "loss": 0.4807,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7624413145539906,
+      "eval_loss": 0.6433334350585938,
+      "eval_runtime": 2.1058,
+      "eval_samples_per_second": 1011.511,
+      "eval_steps_per_second": 63.635,
+      "step": 1066
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 16.033649444580078,
+      "learning_rate": 1.8154692733630958e-06,
+      "loss": 0.2957,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7713615023474178,
+      "eval_loss": 0.7535221576690674,
+      "eval_runtime": 2.0955,
+      "eval_samples_per_second": 1016.443,
+      "eval_steps_per_second": 63.945,
+      "step": 1599
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1599,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 507646505902536.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.9322579475834245e-05
+  }
+}

run-12/checkpoint-1599/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4cc0e168941911ceed1d2a5b00947af0590a3aef308b6c49e334538f60ccac1
+size 4856

run-12/checkpoint-1599/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-13/checkpoint-533/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19cf132e1ec02e0c30d81009a761fc4520c92c64412183f266bf3de7c588a666
 size 409103316

 version https://git-lfs.github.com/spec/v1
+oid sha256:70eeaf9350225dda5a0f2d7d25da6b9991bac7fadb137f197edf5640a325b3af
 size 409103316

run-13/checkpoint-533/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d2e1b0dfc1f51895712dcfe08b77fa0369aa05eaa30e73279457dc58f81cc6d
 size 818327802

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbc34a4ada868fe18028722fdd3af34b640747388c1efb0592c4f7d7af2e9366
 size 818327802

run-13/checkpoint-533/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:229b4fbc616a55a6a6eb2b1116adb38304a2b6c1cedd5849bbec02d9163089aa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:28833eb2e810e4dca2d9066862f3ae1fdc7b441f7af868505dbd8ae8ca72e062
 size 1064

run-13/checkpoint-533/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.6162846088409424,
   "best_model_checkpoint": "BERT-WMM/run-13/checkpoint-533",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -10,33 +10,30 @@
   "log_history": [
     {
       "epoch": 0.94,
-      "grad_norm": 12.600181579589844,
-      "learning_rate": 1.4607577277616791e-05,
-      "loss": 0.7148,
       "step": 500
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.7577464788732394,
-      "eval_loss": 0.6162846088409424,
-      "eval_runtime": 1.9944,
-      "eval_samples_per_second": 1067.994,
-      "eval_steps_per_second": 67.188,
       "step": 533
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2665,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 169371040368096.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 1.7981151706627596e-05,
-    "num_train_epochs": 5,
-    "per_device_train_batch_size": 16,
-    "weight_decay": 0.009594844383188954
   }
 }

 {
+  "best_metric": 0.6320850253105164,
   "best_model_checkpoint": "BERT-WMM/run-13/checkpoint-533",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 0.94,
+      "grad_norm": 10.287070274353027,
+      "learning_rate": 2.5052467945332135e-05,
+      "loss": 0.738,
       "step": 500
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.7553990610328638,
+      "eval_loss": 0.6320850253105164,
+      "eval_runtime": 2.1232,
+      "eval_samples_per_second": 1003.214,
+      "eval_steps_per_second": 63.113,
       "step": 533
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1599,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 500,
   "total_flos": 169371040368096.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 3.645031505421846e-05
   }
 }

run-13/checkpoint-533/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:487692c351886fb721523721f90e7b355b077959171c7e7c7bf47d30d3da6baa
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:25eca8561e5e43ced594151a61c7d90ab719b2866c7c317a196854a07d1bafd0
 size 4856

runs/Apr18_16-31-07_544fc269209b/events.out.tfevents.1713459519.544fc269209b.792.13 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2058b196438af8bea2636b30adcd926f3e696e5b78ad3df92d9603b40abf4821
+size 5961

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4cc0e168941911ceed1d2a5b00947af0590a3aef308b6c49e334538f60ccac1
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:25eca8561e5e43ced594151a61c7d90ab719b2866c7c317a196854a07d1bafd0
 size 4856