xuancoblab2023 commited on Mar 6, 2024

Commit

a0e6abd

verified ·

1 Parent(s): a7780bd

Training in progress, epoch 1

Browse files

Files changed (47) hide show

logs/events.out.tfevents.1709719487.a2333cf40ebd.21010.62 +2 -2
logs/events.out.tfevents.1709720645.a2333cf40ebd.21010.63 +3 -0
model.safetensors +1 -1
run-5/checkpoint-402/config.json +34 -0
run-5/checkpoint-402/model.safetensors +3 -0
run-5/checkpoint-402/optimizer.pt +3 -0
run-5/checkpoint-402/rng_state.pth +3 -0
run-5/checkpoint-402/scheduler.pt +3 -0
run-5/checkpoint-402/special_tokens_map.json +7 -0
run-5/checkpoint-402/tokenizer.json +0 -0
run-5/checkpoint-402/tokenizer_config.json +57 -0
run-5/checkpoint-402/trainer_state.json +141 -0
run-5/checkpoint-402/training_args.bin +3 -0
run-5/checkpoint-402/vocab.txt +0 -0
run-5/checkpoint-670/config.json +34 -0
run-5/checkpoint-670/model.safetensors +3 -0
run-5/checkpoint-670/optimizer.pt +3 -0
run-5/checkpoint-670/rng_state.pth +3 -0
run-5/checkpoint-670/scheduler.pt +3 -0
run-5/checkpoint-670/special_tokens_map.json +7 -0
run-5/checkpoint-670/tokenizer.json +0 -0
run-5/checkpoint-670/tokenizer_config.json +57 -0
run-5/checkpoint-670/trainer_state.json +217 -0
run-5/checkpoint-670/training_args.bin +3 -0
run-5/checkpoint-670/vocab.txt +0 -0
run-6/checkpoint-93/config.json +34 -0
run-6/checkpoint-93/model.safetensors +3 -0
run-6/checkpoint-93/optimizer.pt +3 -0
run-6/checkpoint-93/rng_state.pth +3 -0
run-6/checkpoint-93/scheduler.pt +3 -0
run-6/checkpoint-93/special_tokens_map.json +7 -0
run-6/checkpoint-93/tokenizer.json +0 -0
run-6/checkpoint-93/tokenizer_config.json +57 -0
run-6/checkpoint-93/trainer_state.json +46 -0
run-6/checkpoint-93/training_args.bin +3 -0
run-6/checkpoint-93/vocab.txt +0 -0
run-8/checkpoint-96/model.safetensors +1 -1
run-8/checkpoint-96/optimizer.pt +1 -1
run-8/checkpoint-96/scheduler.pt +1 -1
run-8/checkpoint-96/trainer_state.json +18 -18
run-8/checkpoint-96/training_args.bin +1 -1
run-9/checkpoint-96/model.safetensors +1 -1
run-9/checkpoint-96/optimizer.pt +1 -1
run-9/checkpoint-96/scheduler.pt +1 -1
run-9/checkpoint-96/trainer_state.json +18 -18
run-9/checkpoint-96/training_args.bin +1 -1
training_args.bin +1 -1

logs/events.out.tfevents.1709719487.a2333cf40ebd.21010.62 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c93f9efdd5269965a100fe7d527345579ba6b7b7aa96b7030bd3af44979c547
-size 5316

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdbeac9d614ebb1fb53a9ebcc8f06ed9e51ae33afe4884fa169ab6bcd86b5bdf
+size 11817

logs/events.out.tfevents.1709720645.a2333cf40ebd.21010.63 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b791daf0e1b0ed68b1c0e62303e2d85234d8a88de6027c2c007c71d9e1278dcb
+size 5314

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b81abfa32ad5173708e5a7c5daeb714ab88b27f2272cc5f9512ce4882d1dceff
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:091f91bf8e06bfea46fc6caf25daa4a0d13a1d310b2b8d902cc3a58d34b2d3b4
 size 17549312

run-5/checkpoint-402/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-5/checkpoint-402/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64a0fd11605c626e05c5c7fcd9020be8ae1860c723723c9983d472cdcbd40c9c
+size 17549312

run-5/checkpoint-402/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f00cdfd1c148cb9c87455a788842f729619b2b3c60d9c8105d9cf71b1233638b
+size 35122746

run-5/checkpoint-402/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b8f6147105b5cb171058af11810256ae8ec293f62dafd322114780fb990c4a0
+size 14054

run-5/checkpoint-402/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4bc4bb9a98d0edebaa2dcf33f9df0b7c6a37ebb34cd3c52ca4d87395ed1e1e2
+size 1064

run-5/checkpoint-402/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-5/checkpoint-402/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-5/checkpoint-402/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-5/checkpoint-402/trainer_state.json ADDED Viewed

	@@ -0,0 +1,141 @@

+{
+  "best_metric": 0.541095890410959,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-402",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 402,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.22246789932250977,
+      "learning_rate": 1.5679734473172616e-05,
+      "loss": 0.2458,
+      "step": 67
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5,
+      "eval_f1": 0.0,
+      "eval_loss": 0.1889893114566803,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 28.3905,
+      "eval_samples_per_second": 35.998,
+      "eval_steps_per_second": 1.127,
+      "step": 67
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2654605507850647,
+      "learning_rate": 1.3937541753931215e-05,
+      "loss": 0.1861,
+      "step": 134
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5,
+      "eval_f1": 0.0,
+      "eval_loss": 0.17826829850673676,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 29.9866,
+      "eval_samples_per_second": 34.082,
+      "eval_steps_per_second": 1.067,
+      "step": 134
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.32397571206092834,
+      "learning_rate": 1.2195349034689811e-05,
+      "loss": 0.1759,
+      "step": 201
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.5107632093933464,
+      "eval_f1": 0.07063197026022304,
+      "eval_loss": 0.16691070795059204,
+      "eval_precision": 0.7037037037037037,
+      "eval_recall": 0.03718199608610567,
+      "eval_runtime": 28.5351,
+      "eval_samples_per_second": 35.816,
+      "eval_steps_per_second": 1.121,
+      "step": 201
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.6136437058448792,
+      "learning_rate": 1.045315631544841e-05,
+      "loss": 0.168,
+      "step": 268
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.5362035225048923,
+      "eval_f1": 0.18556701030927833,
+      "eval_loss": 0.160459965467453,
+      "eval_precision": 0.7605633802816901,
+      "eval_recall": 0.10567514677103718,
+      "eval_runtime": 28.6402,
+      "eval_samples_per_second": 35.684,
+      "eval_steps_per_second": 1.117,
+      "step": 268
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.3216162919998169,
+      "learning_rate": 8.710963596207009e-06,
+      "loss": 0.1647,
+      "step": 335
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.5401174168297456,
+      "eval_f1": 0.20068027210884357,
+      "eval_loss": 0.1576094627380371,
+      "eval_precision": 0.7662337662337663,
+      "eval_recall": 0.11545988258317025,
+      "eval_runtime": 28.3765,
+      "eval_samples_per_second": 36.016,
+      "eval_steps_per_second": 1.128,
+      "step": 335
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.45282211899757385,
+      "learning_rate": 6.968770876965607e-06,
+      "loss": 0.1625,
+      "step": 402
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20373514431239387,
+      "eval_loss": 0.15556302666664124,
+      "eval_precision": 0.7692307692307693,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 28.8112,
+      "eval_samples_per_second": 35.472,
+      "eval_steps_per_second": 1.111,
+      "step": 402
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 670,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1414171183680.0,
+  "train_batch_size": 46,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.19075569878013487,
+    "learning_rate": 1.7421927192414017e-05,
+    "num_train_epochs": 10,
+    "per_device_train_batch_size": 46,
+    "temperature": 24
+  }
+}

run-5/checkpoint-402/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:557dc8a5a76d4b478175e4b8394a3e21bc66975595c37bce97ba59c7190b899c
+size 4920

run-5/checkpoint-402/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-5/checkpoint-670/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-5/checkpoint-670/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:575ca98bf296b080a72c0d6dd52be9200676461b3e7e7d27907d42ff324aa3ab
+size 17549312

run-5/checkpoint-670/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b13e9195b025f7da017e1830d435fe8b7b8500a6e3509ac023d7f59503d1f1a4
+size 35122746

run-5/checkpoint-670/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b9508485dbdb133df6b713b38533dd5eb9442565adc1f6b4e0a9cb742ecd19a
+size 14054

run-5/checkpoint-670/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e1b91ab299d310fcb602a86ce4eef9cd66c51133a0e148af12d39b0ce6cae24
+size 1064

run-5/checkpoint-670/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-5/checkpoint-670/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-5/checkpoint-670/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-5/checkpoint-670/trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": 0.541095890410959,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-402",
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 670,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.22246789932250977,
+      "learning_rate": 1.5679734473172616e-05,
+      "loss": 0.2458,
+      "step": 67
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5,
+      "eval_f1": 0.0,
+      "eval_loss": 0.1889893114566803,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 28.3905,
+      "eval_samples_per_second": 35.998,
+      "eval_steps_per_second": 1.127,
+      "step": 67
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2654605507850647,
+      "learning_rate": 1.3937541753931215e-05,
+      "loss": 0.1861,
+      "step": 134
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5,
+      "eval_f1": 0.0,
+      "eval_loss": 0.17826829850673676,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 29.9866,
+      "eval_samples_per_second": 34.082,
+      "eval_steps_per_second": 1.067,
+      "step": 134
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.32397571206092834,
+      "learning_rate": 1.2195349034689811e-05,
+      "loss": 0.1759,
+      "step": 201
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.5107632093933464,
+      "eval_f1": 0.07063197026022304,
+      "eval_loss": 0.16691070795059204,
+      "eval_precision": 0.7037037037037037,
+      "eval_recall": 0.03718199608610567,
+      "eval_runtime": 28.5351,
+      "eval_samples_per_second": 35.816,
+      "eval_steps_per_second": 1.121,
+      "step": 201
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.6136437058448792,
+      "learning_rate": 1.045315631544841e-05,
+      "loss": 0.168,
+      "step": 268
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.5362035225048923,
+      "eval_f1": 0.18556701030927833,
+      "eval_loss": 0.160459965467453,
+      "eval_precision": 0.7605633802816901,
+      "eval_recall": 0.10567514677103718,
+      "eval_runtime": 28.6402,
+      "eval_samples_per_second": 35.684,
+      "eval_steps_per_second": 1.117,
+      "step": 268
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.3216162919998169,
+      "learning_rate": 8.710963596207009e-06,
+      "loss": 0.1647,
+      "step": 335
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.5401174168297456,
+      "eval_f1": 0.20068027210884357,
+      "eval_loss": 0.1576094627380371,
+      "eval_precision": 0.7662337662337663,
+      "eval_recall": 0.11545988258317025,
+      "eval_runtime": 28.3765,
+      "eval_samples_per_second": 36.016,
+      "eval_steps_per_second": 1.128,
+      "step": 335
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.45282211899757385,
+      "learning_rate": 6.968770876965607e-06,
+      "loss": 0.1625,
+      "step": 402
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20373514431239387,
+      "eval_loss": 0.15556302666664124,
+      "eval_precision": 0.7692307692307693,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 28.8112,
+      "eval_samples_per_second": 35.472,
+      "eval_steps_per_second": 1.111,
+      "step": 402
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.3418940007686615,
+      "learning_rate": 5.226578157724205e-06,
+      "loss": 0.1611,
+      "step": 469
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20373514431239387,
+      "eval_loss": 0.15463578701019287,
+      "eval_precision": 0.7692307692307693,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 28.8835,
+      "eval_samples_per_second": 35.384,
+      "eval_steps_per_second": 1.108,
+      "step": 469
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.3419853448867798,
+      "learning_rate": 3.4843854384828036e-06,
+      "loss": 0.1596,
+      "step": 536
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20373514431239387,
+      "eval_loss": 0.15391579270362854,
+      "eval_precision": 0.7692307692307693,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 28.6467,
+      "eval_samples_per_second": 35.676,
+      "eval_steps_per_second": 1.117,
+      "step": 536
+    },
+    {
+      "epoch": 9.0,
+      "grad_norm": 0.3018151521682739,
+      "learning_rate": 1.7421927192414018e-06,
+      "loss": 0.1589,
+      "step": 603
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20373514431239387,
+      "eval_loss": 0.15353241562843323,
+      "eval_precision": 0.7692307692307693,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 28.7082,
+      "eval_samples_per_second": 35.6,
+      "eval_steps_per_second": 1.115,
+      "step": 603
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.4547845721244812,
+      "learning_rate": 0.0,
+      "loss": 0.1592,
+      "step": 670
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.541095890410959,
+      "eval_f1": 0.20373514431239387,
+      "eval_loss": 0.15339058637619019,
+      "eval_precision": 0.7692307692307693,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 28.3962,
+      "eval_samples_per_second": 35.991,
+      "eval_steps_per_second": 1.127,
+      "step": 670
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 670,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 2356951972800.0,
+  "train_batch_size": 46,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.19075569878013487,
+    "learning_rate": 1.7421927192414017e-05,
+    "num_train_epochs": 10,
+    "per_device_train_batch_size": 46,
+    "temperature": 24
+  }
+}

run-5/checkpoint-670/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:557dc8a5a76d4b478175e4b8394a3e21bc66975595c37bce97ba59c7190b899c
+size 4920

run-5/checkpoint-670/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-93/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-6/checkpoint-93/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091f91bf8e06bfea46fc6caf25daa4a0d13a1d310b2b8d902cc3a58d34b2d3b4
+size 17549312

run-6/checkpoint-93/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c4dc4e6a4705d6a4aa925ed18fcba0c5107d6b189857077c1e8b1548e5eb876
+size 35122746

run-6/checkpoint-93/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13b8e331f3380aab23e9eb3e617f6b97a18048bd47a3ea8f14cf82dde08be706
+size 14054

run-6/checkpoint-93/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f80568e1169ca26ecc896bfbd4e7e169354eb0bf7282ec144987a58ee6ae0650
+size 1064

run-6/checkpoint-93/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-93/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-93/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-93/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.5,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-93",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 93,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.4286664128303528,
+      "learning_rate": 1.945409831472016e-05,
+      "loss": 0.4806,
+      "step": 93
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5,
+      "eval_f1": 0.0,
+      "eval_loss": 0.4703535735607147,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 28.42,
+      "eval_samples_per_second": 35.961,
+      "eval_steps_per_second": 1.126,
+      "step": 93
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 837,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
+  "save_steps": 500,
+  "total_flos": 235695197280.0,
+  "train_batch_size": 33,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6122687021783514,
+    "learning_rate": 2.188586060406018e-05,
+    "num_train_epochs": 9,
+    "per_device_train_batch_size": 33,
+    "temperature": 14
+  }
+}

run-6/checkpoint-93/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e40b6510f54e1dab2cd156ebde5daec51272f5467eda7ac498712f745cbb3237
+size 4920

run-6/checkpoint-93/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-8/checkpoint-96/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:415159172ccea99175bbf0b16be3b73dce96c89652c91701080b51ac71fb25ea
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:84bec5042b87086a45dca7b3c56d04cf79d83ed1b52a762adc677152de33272c
 size 17549312

run-8/checkpoint-96/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac5e0f8916248b91ce3c39f2a229a973b917d0137dfcefc765b085f79f699292
 size 35122746

 version https://git-lfs.github.com/spec/v1
+oid sha256:1da6291d75080430d6c4ab323e31c11d497e573a325f8f4db8e787327b6178a6
 size 35122746

run-8/checkpoint-96/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a987235109284c8b908892c9fe6cf9e664fed2bf7d9115e29e411d71c61a9b90
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:47d32b52b4cbac032624e075d8b37b40470a77d3ab11ecb407b7b2017aefcbcc
 size 1064

run-8/checkpoint-96/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.7602739726027398,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-8/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -10,36 +10,36 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 3.2297720909118652,
-      "learning_rate": 0.0003356185870926135,
-      "loss": 0.5529,
       "step": 96
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.7602739726027398,
-      "eval_f1": 0.7973531844499586,
-      "eval_loss": 0.4773445129394531,
-      "eval_precision": 0.6905444126074498,
-      "eval_recall": 0.9432485322896281,
-      "eval_runtime": 28.1534,
-      "eval_samples_per_second": 36.301,
-      "eval_steps_per_second": 1.137,
       "step": 96
     }
   ],
   "logging_steps": 500,
-  "max_steps": 576,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 6,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.8957226795607325,
-    "learning_rate": 0.0004027423045111362,
-    "num_train_epochs": 6,
-    "temperature": 3
   }
 }

 {
+  "best_metric": 0.5,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-8/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 0.41845789551734924,
+      "learning_rate": 1.3576479969391517e-05,
+      "loss": 0.1501,
       "step": 96
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.5,
+      "eval_f1": 0.0,
+      "eval_loss": 0.0910131111741066,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 28.1809,
+      "eval_samples_per_second": 36.266,
+      "eval_steps_per_second": 1.136,
       "step": 96
     }
   ],
   "logging_steps": 500,
+  "max_steps": 480,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.06528729720326021,
+    "learning_rate": 1.6970599961739396e-05,
+    "num_train_epochs": 5,
+    "temperature": 4
   }
 }

run-8/checkpoint-96/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c62ca979c251b24426f83ee19cf958429aed20a5e41a6549bd0708ed8286fb21
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:669b7645cf6dd2271363aa4a300b453105055ad2059ef2460a17a64f32148151
 size 4920

run-9/checkpoint-96/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:239dd84c1acbf3926d1b2ffcc0c8dbbb872ee0abbb6b9b92cf4944b98a755de2
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d8d6934d852376a5bc13fbfb17016d4c3adcd7b7bdd42a9aab8b9b5d9f62a1b
 size 17549312

run-9/checkpoint-96/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb67d3ee336b55baf61cb096cd593c5a926ac525753c86191a6d55862e3338d9
 size 35122746

 version https://git-lfs.github.com/spec/v1
+oid sha256:45b565c370bd07ca4b61cdb8cbc27c9334a05f68332f9e1d6be3633edc32ad6c
 size 35122746

run-9/checkpoint-96/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e91027d86c4ea0d43b412d03e2ef80532fe534e4a3c1c0000c6005738343a2bd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:50d2214d797b0d557f68148a57b0b4d53efab48f681a8a8f578594a80924ae16
 size 1064

run-9/checkpoint-96/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.7514677103718199,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-9/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -10,36 +10,36 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 2.0101799964904785,
-      "learning_rate": 5.9842495749639135e-05,
-      "loss": 0.6452,
       "step": 96
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.7514677103718199,
-      "eval_f1": 0.7806563039723661,
-      "eval_loss": 0.5494381785392761,
-      "eval_precision": 0.6986089644513137,
-      "eval_recall": 0.8845401174168297,
-      "eval_runtime": 28.605,
-      "eval_samples_per_second": 35.728,
-      "eval_steps_per_second": 1.119,
       "step": 96
     }
   ],
   "logging_steps": 500,
-  "max_steps": 384,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.9656943950307342,
-    "learning_rate": 7.978999433285218e-05,
-    "num_train_epochs": 4,
-    "temperature": 18
   }
 }

 {
+  "best_metric": 0.50293542074364,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-9/checkpoint-96",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 0.7869714498519897,
+      "learning_rate": 1.3073475217173054e-05,
+      "loss": 0.6637,
       "step": 96
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.50293542074364,
+      "eval_f1": 0.03422053231939163,
+      "eval_loss": 0.6569339036941528,
+      "eval_precision": 0.6,
+      "eval_recall": 0.01761252446183953,
+      "eval_runtime": 28.4278,
+      "eval_samples_per_second": 35.951,
+      "eval_steps_per_second": 1.126,
       "step": 96
     }
   ],
   "logging_steps": 500,
+  "max_steps": 768,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
   "save_steps": 500,
   "total_flos": 235695197280.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.9372925528810219,
+    "learning_rate": 1.4941114533912061e-05,
+    "num_train_epochs": 8,
+    "temperature": 17
   }
 }

run-9/checkpoint-96/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb87382f1b3aca8f77e985c5e3b04a5f9ce84af6f6fa934f965157269a10afa9
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c88ab003cb8d443d320a7d2a50b7663500e4bdf0605d3f2691a06db8acbcf72
 size 4920

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb87382f1b3aca8f77e985c5e3b04a5f9ce84af6f6fa934f965157269a10afa9
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:e40b6510f54e1dab2cd156ebde5daec51272f5467eda7ac498712f745cbb3237
 size 4920