xuancoblab2023 commited on Mar 6, 2024

Commit

7abaf46

verified ·

1 Parent(s): bc28f7d

Training in progress, epoch 1

Browse files

Files changed (22) hide show

logs/events.out.tfevents.1709718096.adc675a344d5.67573.5 +2 -2
logs/events.out.tfevents.1709718664.adc675a344d5.67573.6 +3 -0
logs/events.out.tfevents.1709718777.adc675a344d5.67573.7 +3 -0
model.safetensors +1 -1
run-4/checkpoint-480/config.json +34 -0
run-4/checkpoint-480/model.safetensors +3 -0
run-4/checkpoint-480/optimizer.pt +3 -0
run-4/checkpoint-480/rng_state.pth +3 -0
run-4/checkpoint-480/scheduler.pt +3 -0
run-4/checkpoint-480/special_tokens_map.json +7 -0
run-4/checkpoint-480/tokenizer.json +0 -0
run-4/checkpoint-480/tokenizer_config.json +57 -0
run-4/checkpoint-480/trainer_state.json +121 -0
run-4/checkpoint-480/training_args.bin +3 -0
run-4/checkpoint-480/vocab.txt +0 -0
run-6/checkpoint-96/model.safetensors +1 -1
run-6/checkpoint-96/optimizer.pt +1 -1
run-6/checkpoint-96/scheduler.pt +1 -1
run-6/checkpoint-96/trainer_state.json +18 -18
run-6/checkpoint-96/training_args.bin +1 -1
tokenizer.json +1 -1
training_args.bin +1 -1

logs/events.out.tfevents.1709718096.adc675a344d5.67573.5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1a76573fcb36df9088e4f12fdddff1f1588cbda95ea5777c00266ed9b539456
-size 5997

 version https://git-lfs.github.com/spec/v1
+oid sha256:a474c75d807b3be28f576270f80d8dde938f6d3ba95d1fc2f6d758748ce7d589
+size 8400

logs/events.out.tfevents.1709718664.adc675a344d5.67573.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:838924cc3ab09db568f8c0edb7f4963397771f1a7e60c5e19e47aec960e53a5b
+size 5314

logs/events.out.tfevents.1709718777.adc675a344d5.67573.7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d745bcdeefaa5fcf0d992d30806763a0fd865c8f3f00622e1ea794f94f4a153
+size 5315

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ea44d39603508a74d1eb35e54104733b6b06a24b07179d636be073910d892aa
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:490f0363ce94024364841fd7a0a06bf8589adbba7a9ab31207817f7711e2a787
 size 17549312

run-4/checkpoint-480/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-4/checkpoint-480/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdb79fef73a1c1acbffe286a08bd27b24b15111131b9263da0048fa949fe8d1d
+size 17549312

run-4/checkpoint-480/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:066a971143d4ae463fe8dbf6e669d72f24304a7b9f50d0798d7738ca2c63f006
+size 35122746

run-4/checkpoint-480/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d93f4cfe67413cd27374342b3b9f13806d5fb593a9dc59106a8b24dedb590a8
+size 14054

run-4/checkpoint-480/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3ab06c0aa0ea39478d2e18ab700a01d612f0fb16bc3eeb822d35ff7877fc65
+size 1064

run-4/checkpoint-480/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-480/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-480/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-480/trainer_state.json ADDED Viewed

	@@ -0,0 +1,121 @@

+{
+  "best_metric": 0.541095890410959,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-96",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 480,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.4191925525665283,
+      "learning_rate": 0.00022738674133918383,
+      "loss": 0.098,
+      "step": 96
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20642978003384094,
+      "eval_loss": 0.08223184198141098,
+      "eval_precision": 0.7625,
+      "eval_recall": 0.11937377690802348,
+      "eval_runtime": 28.0826,
+      "eval_samples_per_second": 36.393,
+      "eval_steps_per_second": 1.139,
+      "step": 96
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.35244929790496826,
+      "learning_rate": 0.00017054005600438787,
+      "loss": 0.0833,
+      "step": 192
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5371819960861057,
+      "eval_f1": 0.19145299145299144,
+      "eval_loss": 0.07941487431526184,
+      "eval_precision": 0.7567567567567568,
+      "eval_recall": 0.1095890410958904,
+      "eval_runtime": 28.0455,
+      "eval_samples_per_second": 36.441,
+      "eval_steps_per_second": 1.141,
+      "step": 192
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.2497691810131073,
+      "learning_rate": 0.00011369337066959191,
+      "loss": 0.0813,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.5342465753424658,
+      "eval_f1": 0.18213058419243985,
+      "eval_loss": 0.07839526236057281,
+      "eval_precision": 0.7464788732394366,
+      "eval_recall": 0.10371819960861056,
+      "eval_runtime": 28.0524,
+      "eval_samples_per_second": 36.432,
+      "eval_steps_per_second": 1.141,
+      "step": 288
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.2048983871936798,
+      "learning_rate": 5.684668533479596e-05,
+      "loss": 0.0805,
+      "step": 384
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.5332681017612525,
+      "eval_f1": 0.17331022530329288,
+      "eval_loss": 0.07847526669502258,
+      "eval_precision": 0.7575757575757576,
+      "eval_recall": 0.09784735812133072,
+      "eval_runtime": 28.003,
+      "eval_samples_per_second": 36.496,
+      "eval_steps_per_second": 1.143,
+      "step": 384
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.3579064607620239,
+      "learning_rate": 0.0,
+      "loss": 0.0801,
+      "step": 480
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.538160469667319,
+      "eval_f1": 0.1945392491467577,
+      "eval_loss": 0.07800330221652985,
+      "eval_precision": 0.76,
+      "eval_recall": 0.11154598825831702,
+      "eval_runtime": 27.9144,
+      "eval_samples_per_second": 36.612,
+      "eval_steps_per_second": 1.146,
+      "step": 480
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 480,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 1178475986400.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.09292461434001364,
+    "learning_rate": 0.0002842334266739798,
+    "num_train_epochs": 5,
+    "temperature": 2
+  }
+}

run-4/checkpoint-480/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f441eefbf68b7466d3f5396e1b086e4cb1ae1be01fffc857e92bd1ad437f6f6d
+size 4920

run-4/checkpoint-480/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-96/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c352e2bac7a45465ed2ddc39a8f061c65975878b3151f36a86114c66d7f88c44
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:490f0363ce94024364841fd7a0a06bf8589adbba7a9ab31207817f7711e2a787
 size 17549312

run-6/checkpoint-96/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95a821e784c2c70a4b2609b9406269eab9081b1f00eee2c1b00baeaa97c9ba52
 size 35122746

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc122370edfc08f3a6a1fc20310a190a25e55c7f69ffce66ba6af527318241e9
 size 35122746

run-6/checkpoint-96/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:856798b3561863f6a85cefb19b668f5d57b83cfdd60cb15a083e7c3adc63311b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1f1b6a0bb6f0c88ef3bfde3dcfd387cb722657f776180861b739662f23b7101
 size 1064

run-6/checkpoint-96/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.5391389432485323,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -10,36 +10,36 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 0.47783181071281433,
-      "learning_rate": 0.00014230986748683228,
-      "loss": 0.0575,
       "step": 96
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.5391389432485323,
-      "eval_f1": 0.19487179487179485,
-      "eval_loss": 0.03508628159761429,
-      "eval_precision": 0.7702702702702703,
-      "eval_recall": 0.11154598825831702,
-      "eval_runtime": 29.4088,
-      "eval_samples_per_second": 34.751,
-      "eval_steps_per_second": 1.088,
       "step": 96
     }
   ],
   "logging_steps": 500,
-  "max_steps": 864,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 9,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.026597019089590446,
-    "learning_rate": 0.00016009860092268632,
-    "num_train_epochs": 9,
-    "temperature": 12
   }
 }

 {
+  "best_metric": 0.761252446183953,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 1.7334731817245483,
+      "learning_rate": 0.00019089508944155825,
+      "loss": 0.4515,
       "step": 96
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.761252446183953,
+      "eval_f1": 0.7676190476190476,
+      "eval_loss": 0.40420547127723694,
+      "eval_precision": 0.7476808905380334,
+      "eval_recall": 0.7886497064579256,
+      "eval_runtime": 28.5902,
+      "eval_samples_per_second": 35.747,
+      "eval_steps_per_second": 1.119,
       "step": 96
     }
   ],
   "logging_steps": 500,
+  "max_steps": 672,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.6366473529685639,
+    "learning_rate": 0.00022271093768181798,
+    "num_train_epochs": 7,
+    "temperature": 14
   }
 }

run-6/checkpoint-96/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27848829eb837b4e7e050e92d020fde804496616685e101b0644a01c57d5cd92
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:11cd627d32c55435bd1d55cca3ffe9ac8a08ee3c4b43c65b7e3f22d01d249f85
 size 4920

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 33,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 31,
     "strategy": "LongestFirst",
     "stride": 0
   },

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:019a0c30743454578a20e039f50246add333dec6abb36273bd6bc2339b797b34
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:11cd627d32c55435bd1d55cca3ffe9ac8a08ee3c4b43c65b7e3f22d01d249f85
 size 4920