xuancoblab2023 commited on Mar 24, 2024

Commit

bc10741

verified ·

1 Parent(s): 062d1e9

Training in progress, epoch 1

Browse files

Files changed (33) hide show

logs/events.out.tfevents.1711273902.73e3a81c01ef.4225.3 +2 -2
logs/events.out.tfevents.1711275891.73e3a81c01ef.4225.4 +3 -0
model.safetensors +1 -1
run-3/checkpoint-1152/config.json +34 -0
run-3/checkpoint-1152/model.safetensors +3 -0
run-3/checkpoint-1152/optimizer.pt +3 -0
run-3/checkpoint-1152/rng_state.pth +3 -0
run-3/checkpoint-1152/scheduler.pt +3 -0
run-3/checkpoint-1152/special_tokens_map.json +7 -0
run-3/checkpoint-1152/tokenizer.json +0 -0
run-3/checkpoint-1152/tokenizer_config.json +57 -0
run-3/checkpoint-1152/trainer_state.json +147 -0
run-3/checkpoint-1152/training_args.bin +3 -0
run-3/checkpoint-1152/vocab.txt +0 -0
run-3/checkpoint-1344/config.json +34 -0
run-3/checkpoint-1344/model.safetensors +3 -0
run-3/checkpoint-1344/optimizer.pt +3 -0
run-3/checkpoint-1344/rng_state.pth +3 -0
run-3/checkpoint-1344/scheduler.pt +3 -0
run-3/checkpoint-1344/special_tokens_map.json +7 -0
run-3/checkpoint-1344/tokenizer.json +0 -0
run-3/checkpoint-1344/tokenizer_config.json +57 -0
run-3/checkpoint-1344/trainer_state.json +167 -0
run-3/checkpoint-1344/training_args.bin +3 -0
run-3/checkpoint-1344/vocab.txt +0 -0
run-4/checkpoint-192/config.json +1 -1
run-4/checkpoint-192/model.safetensors +1 -1
run-4/checkpoint-192/optimizer.pt +1 -1
run-4/checkpoint-192/rng_state.pth +1 -1
run-4/checkpoint-192/scheduler.pt +1 -1
run-4/checkpoint-192/trainer_state.json +24 -41
run-4/checkpoint-192/training_args.bin +2 -2
training_args.bin +1 -1

logs/events.out.tfevents.1711273902.73e3a81c01ef.4225.3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74e26e192d8c7f213d390797c7892456060b9a21f5cea17a996123c87d0b2be8
-size 6137

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfdba282cf4a89fd87020e1a0abacd9ce361065be753b3efebfa4915e103d884
+size 10141

logs/events.out.tfevents.1711275891.73e3a81c01ef.4225.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34d4250fabcfc496191e27fc78abb7caec110608b0400aac750fe46d2e3c8396
+size 5406

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70a388d72e55b7953b67cdfa00ddc695e26206a74815a6d36483067f798bda2f
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:beddae8f631e85aed94420246e7c55fb026c1a0c8796f4c1e8f344dfaa4dfbfd
 size 17549312

run-3/checkpoint-1152/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-1152/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13c916f6b784b4fc9e1da9af6da348f68516d337f785d2ae7904c5818b72ad2b
+size 17549312

run-3/checkpoint-1152/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ec1fd8ca620f350b703329c26079a2bd7f18e85564af0de1c754e0ae516d06f
+size 35122746

run-3/checkpoint-1152/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d556941e171165349f251b7e66a50d0ff07f00ec3ebab16ee56553cc2ce38c40
+size 14054

run-3/checkpoint-1152/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8becf9ec5529f94aac4db7bc33b53b3100aa0310444842364b2c518cda585582
+size 1064

run-3/checkpoint-1152/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1152/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-1152/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1152/trainer_state.json ADDED Viewed

	@@ -0,0 +1,147 @@

+{
+  "best_metric": 0.7798434442270059,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-1152",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 1152,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.7568773031234741,
+      "learning_rate": 0.00039540069434609415,
+      "loss": 0.3274,
+      "step": 192
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5430528375733855,
+      "eval_f1": 0.20442930153321975,
+      "eval_loss": 0.30545827746391296,
+      "eval_mcc": 0.1640968755879385,
+      "eval_precision": 0.7894736842105263,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 66.7048,
+      "eval_samples_per_second": 15.321,
+      "eval_steps_per_second": 0.48,
+      "step": 192
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 1.822717547416687,
+      "learning_rate": 0.0003295005786217451,
+      "loss": 0.3076,
+      "step": 384
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5459882583170255,
+      "eval_f1": 0.21355932203389827,
+      "eval_loss": 0.2995118200778961,
+      "eval_mcc": 0.17219814389174506,
+      "eval_precision": 0.7974683544303798,
+      "eval_recall": 0.1232876712328767,
+      "eval_runtime": 66.8,
+      "eval_samples_per_second": 15.299,
+      "eval_steps_per_second": 0.479,
+      "step": 384
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.0308061838150024,
+      "learning_rate": 0.0002636004628973961,
+      "loss": 0.2967,
+      "step": 576
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.5694716242661448,
+      "eval_f1": 0.29936305732484075,
+      "eval_loss": 0.2949042022228241,
+      "eval_mcc": 0.21819315809183495,
+      "eval_precision": 0.8034188034188035,
+      "eval_recall": 0.18395303326810175,
+      "eval_runtime": 66.6296,
+      "eval_samples_per_second": 15.339,
+      "eval_steps_per_second": 0.48,
+      "step": 576
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.6645914316177368,
+      "learning_rate": 0.00019770034717304707,
+      "loss": 0.296,
+      "step": 768
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.6183953033268101,
+      "eval_f1": 0.4298245614035087,
+      "eval_loss": 0.29466578364372253,
+      "eval_mcc": 0.3157246377506289,
+      "eval_precision": 0.8497109826589595,
+      "eval_recall": 0.2876712328767123,
+      "eval_runtime": 67.3924,
+      "eval_samples_per_second": 15.165,
+      "eval_steps_per_second": 0.475,
+      "step": 768
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.67695951461792,
+      "learning_rate": 0.00013180023144869805,
+      "loss": 0.2906,
+      "step": 960
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7240704500978473,
+      "eval_f1": 0.6658767772511849,
+      "eval_loss": 0.29085618257522583,
+      "eval_mcc": 0.478083499336065,
+      "eval_precision": 0.8438438438438438,
+      "eval_recall": 0.5499021526418787,
+      "eval_runtime": 66.3348,
+      "eval_samples_per_second": 15.407,
+      "eval_steps_per_second": 0.482,
+      "step": 960
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.9722081422805786,
+      "learning_rate": 6.590011572434902e-05,
+      "loss": 0.2869,
+      "step": 1152
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.7798434442270059,
+      "eval_f1": 0.7583243823845327,
+      "eval_loss": 0.29121583700180054,
+      "eval_mcc": 0.5687784616694839,
+      "eval_precision": 0.8404761904761905,
+      "eval_recall": 0.6908023483365949,
+      "eval_runtime": 66.588,
+      "eval_samples_per_second": 15.348,
+      "eval_steps_per_second": 0.481,
+      "step": 1152
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1344,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 1414171183680.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.4369516344275761,
+    "learning_rate": 0.00046130081007044317,
+    "num_train_epochs": 7,
+    "per_device_train_batch_size": 16,
+    "temperature": 15
+  }
+}

run-3/checkpoint-1152/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1984bbced85533f487884fbafe7fdded01dc8a3f7df5f2a96fbc5883290bb68c
+size 4984

run-3/checkpoint-1152/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-1344/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-1344/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:918ef759dc65f5be7216a6de3674f90680c17d4c6925e640dfac0dd9e2f132ab
+size 17549312

run-3/checkpoint-1344/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:116adeb0a15c5b5db4bcacbdd560820edc09bcb15e32ed09ff335b4ab83e1126
+size 35122746

run-3/checkpoint-1344/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5daa194f252973a6e638902bc960b7e07ea23884d291e0a3c2a48ccf865c60f8
+size 14054

run-3/checkpoint-1344/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17ca7f5170db956d3a2fd78bc7b2881fbcdd477c5576d0238f3072962c92ebbe
+size 1064

run-3/checkpoint-1344/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1344/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-1344/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1344/trainer_state.json ADDED Viewed

	@@ -0,0 +1,167 @@

+{
+  "best_metric": 0.7798434442270059,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-1152",
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 1344,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.7568773031234741,
+      "learning_rate": 0.00039540069434609415,
+      "loss": 0.3274,
+      "step": 192
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5430528375733855,
+      "eval_f1": 0.20442930153321975,
+      "eval_loss": 0.30545827746391296,
+      "eval_mcc": 0.1640968755879385,
+      "eval_precision": 0.7894736842105263,
+      "eval_recall": 0.11741682974559686,
+      "eval_runtime": 66.7048,
+      "eval_samples_per_second": 15.321,
+      "eval_steps_per_second": 0.48,
+      "step": 192
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 1.822717547416687,
+      "learning_rate": 0.0003295005786217451,
+      "loss": 0.3076,
+      "step": 384
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5459882583170255,
+      "eval_f1": 0.21355932203389827,
+      "eval_loss": 0.2995118200778961,
+      "eval_mcc": 0.17219814389174506,
+      "eval_precision": 0.7974683544303798,
+      "eval_recall": 0.1232876712328767,
+      "eval_runtime": 66.8,
+      "eval_samples_per_second": 15.299,
+      "eval_steps_per_second": 0.479,
+      "step": 384
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.0308061838150024,
+      "learning_rate": 0.0002636004628973961,
+      "loss": 0.2967,
+      "step": 576
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.5694716242661448,
+      "eval_f1": 0.29936305732484075,
+      "eval_loss": 0.2949042022228241,
+      "eval_mcc": 0.21819315809183495,
+      "eval_precision": 0.8034188034188035,
+      "eval_recall": 0.18395303326810175,
+      "eval_runtime": 66.6296,
+      "eval_samples_per_second": 15.339,
+      "eval_steps_per_second": 0.48,
+      "step": 576
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.6645914316177368,
+      "learning_rate": 0.00019770034717304707,
+      "loss": 0.296,
+      "step": 768
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.6183953033268101,
+      "eval_f1": 0.4298245614035087,
+      "eval_loss": 0.29466578364372253,
+      "eval_mcc": 0.3157246377506289,
+      "eval_precision": 0.8497109826589595,
+      "eval_recall": 0.2876712328767123,
+      "eval_runtime": 67.3924,
+      "eval_samples_per_second": 15.165,
+      "eval_steps_per_second": 0.475,
+      "step": 768
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.67695951461792,
+      "learning_rate": 0.00013180023144869805,
+      "loss": 0.2906,
+      "step": 960
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7240704500978473,
+      "eval_f1": 0.6658767772511849,
+      "eval_loss": 0.29085618257522583,
+      "eval_mcc": 0.478083499336065,
+      "eval_precision": 0.8438438438438438,
+      "eval_recall": 0.5499021526418787,
+      "eval_runtime": 66.3348,
+      "eval_samples_per_second": 15.407,
+      "eval_steps_per_second": 0.482,
+      "step": 960
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.9722081422805786,
+      "learning_rate": 6.590011572434902e-05,
+      "loss": 0.2869,
+      "step": 1152
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.7798434442270059,
+      "eval_f1": 0.7583243823845327,
+      "eval_loss": 0.29121583700180054,
+      "eval_mcc": 0.5687784616694839,
+      "eval_precision": 0.8404761904761905,
+      "eval_recall": 0.6908023483365949,
+      "eval_runtime": 66.588,
+      "eval_samples_per_second": 15.348,
+      "eval_steps_per_second": 0.481,
+      "step": 1152
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.8847911953926086,
+      "learning_rate": 0.0,
+      "loss": 0.2834,
+      "step": 1344
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.7328767123287672,
+      "eval_f1": 0.6799531066822977,
+      "eval_loss": 0.2881581485271454,
+      "eval_mcc": 0.4935253301118275,
+      "eval_precision": 0.847953216374269,
+      "eval_recall": 0.5675146771037182,
+      "eval_runtime": 66.664,
+      "eval_samples_per_second": 15.331,
+      "eval_steps_per_second": 0.48,
+      "step": 1344
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1344,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 1649866380960.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.4369516344275761,
+    "learning_rate": 0.00046130081007044317,
+    "num_train_epochs": 7,
+    "per_device_train_batch_size": 16,
+    "temperature": 15
+  }
+}

run-3/checkpoint-1344/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1984bbced85533f487884fbafe7fdded01dc8a3f7df5f2a96fbc5883290bb68c
+size 4984

run-3/checkpoint-1344/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-192/config.json CHANGED Viewed

@@ -27,7 +27,7 @@
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
-  "transformers_version": "4.38.2",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522

   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522

run-4/checkpoint-192/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ed76d0615eeacc08a9a395555f9a13d817f19b9602e4d02539c6d4372101621
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:beddae8f631e85aed94420246e7c55fb026c1a0c8796f4c1e8f344dfaa4dfbfd
 size 17549312

run-4/checkpoint-192/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c19a1826c8d0aca72647ad3fdca18d14b502b67f180863a57adc3a64c0a8584d
 size 35122746

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bca37b48a52cc8b1452a086607d87d718c6edd0659a2c5cbc9f1c815e538485
 size 35122746

run-4/checkpoint-192/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c58c0607d97c986e9d4c30b0aa74b33350e0c7e86978aaaaadda4ebb78f21ad
 size 14054

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee09ea0d216727b799a80771850b95d7d61b646360702c64b2ec889cdc725399
 size 14054

run-4/checkpoint-192/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a1067aad6aa353293eb8df7988e7c2da561db0b9e5ab179cc37f0386f34f018
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:01c139076401e29154c78e9e5cc74e074010d381a1d1305758d3f37231a3f172
 size 1064

run-4/checkpoint-192/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.7896281800391389,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-192",
-  "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 192,
   "is_hyper_param_search": true,
@@ -10,55 +10,38 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 1.90240478515625,
-      "learning_rate": 0.0008040462940429662,
-      "loss": 0.4563,
-      "step": 96
-    },
-    {
-      "epoch": 1.0,
-      "eval_accuracy": 0.7446183953033269,
-      "eval_f1": 0.7473378509196515,
-      "eval_loss": 0.415781170129776,
-      "eval_precision": 0.7394636015325671,
-      "eval_recall": 0.7553816046966731,
-      "eval_runtime": 25.3619,
-      "eval_samples_per_second": 40.297,
-      "eval_steps_per_second": 1.262,
-      "step": 96
-    },
-    {
-      "epoch": 2.0,
-      "grad_norm": 0.7548394799232483,
-      "learning_rate": 0.0007147078169270811,
-      "loss": 0.4243,
       "step": 192
     },
     {
-      "epoch": 2.0,
-      "eval_accuracy": 0.7896281800391389,
-      "eval_f1": 0.7969782813975448,
-      "eval_loss": 0.39640527963638306,
-      "eval_precision": 0.7700729927007299,
-      "eval_recall": 0.8258317025440313,
-      "eval_runtime": 25.8877,
-      "eval_samples_per_second": 39.478,
-      "eval_steps_per_second": 1.236,
       "step": 192
     }
   ],
   "logging_steps": 500,
-  "max_steps": 960,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 500,
-  "total_flos": 471390394560.0,
-  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.6601531096042508,
-    "learning_rate": 0.0008933847711588513,
-    "num_train_epochs": 10,
-    "temperature": 4
   }
 }

 {
+  "best_metric": 0.525440313111546,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-192",
+  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 192,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 0.896482527256012,
+      "learning_rate": 2.4429947101690063e-05,
+      "loss": 0.3642,
       "step": 192
     },
     {
+      "epoch": 1.0,
+      "eval_accuracy": 0.525440313111546,
+      "eval_f1": 0.14762741652021089,
+      "eval_loss": 0.33992624282836914,
+      "eval_mcc": 0.1099564814603399,
+      "eval_precision": 0.7241379310344828,
+      "eval_recall": 0.0821917808219178,
+      "eval_runtime": 66.5832,
+      "eval_samples_per_second": 15.349,
+      "eval_steps_per_second": 0.481,
       "step": 192
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1728,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
   "save_steps": 500,
+  "total_flos": 235695197280.0,
+  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.4423735218743273,
+    "learning_rate": 2.748369048940132e-05,
+    "num_train_epochs": 9,
+    "per_device_train_batch_size": 16,
+    "temperature": 18
   }
 }

run-4/checkpoint-192/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05a27237c59bc1684c5bc675b662fc3c971fc4d8663bd9fafef0f646ea921a93
-size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47c874c1c72dc81a89e5bda294da8bfbc1cbebbfcc825e670082b4e15f0e50a
+size 4984

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1984bbced85533f487884fbafe7fdded01dc8a3f7df5f2a96fbc5883290bb68c
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47c874c1c72dc81a89e5bda294da8bfbc1cbebbfcc825e670082b4e15f0e50a
 size 4984