xuancoblab2023 commited on
Commit
def0b1d
·
verified ·
1 Parent(s): 0de5f20

Training in progress, epoch 1

Browse files
Files changed (35) hide show
  1. logs/events.out.tfevents.1710425174.4cb0d5d7fb51.369.0 +3 -0
  2. logs/events.out.tfevents.1710425892.4cb0d5d7fb51.5918.0 +3 -0
  3. logs/events.out.tfevents.1710434095.4cb0d5d7fb51.5918.1 +3 -0
  4. model.safetensors +1 -1
  5. run-0/checkpoint-1782/config.json +34 -0
  6. run-0/checkpoint-1782/model.safetensors +3 -0
  7. run-0/checkpoint-1782/optimizer.pt +3 -0
  8. run-0/checkpoint-1782/rng_state.pth +3 -0
  9. run-0/checkpoint-1782/scheduler.pt +3 -0
  10. run-0/checkpoint-1782/special_tokens_map.json +7 -0
  11. run-0/checkpoint-1782/tokenizer.json +0 -0
  12. run-0/checkpoint-1782/tokenizer_config.json +57 -0
  13. run-0/checkpoint-1782/trainer_state.json +122 -0
  14. run-0/checkpoint-1782/training_args.bin +3 -0
  15. run-0/checkpoint-1782/vocab.txt +0 -0
  16. run-0/checkpoint-2079/config.json +34 -0
  17. run-0/checkpoint-2079/model.safetensors +3 -0
  18. run-0/checkpoint-2079/optimizer.pt +3 -0
  19. run-0/checkpoint-2079/rng_state.pth +3 -0
  20. run-0/checkpoint-2079/scheduler.pt +3 -0
  21. run-0/checkpoint-2079/special_tokens_map.json +7 -0
  22. run-0/checkpoint-2079/tokenizer.json +0 -0
  23. run-0/checkpoint-2079/tokenizer_config.json +57 -0
  24. run-0/checkpoint-2079/trainer_state.json +138 -0
  25. run-0/checkpoint-2079/training_args.bin +3 -0
  26. run-0/checkpoint-2079/vocab.txt +0 -0
  27. run-1/checkpoint-297/model.safetensors +1 -1
  28. run-1/checkpoint-297/optimizer.pt +1 -1
  29. run-1/checkpoint-297/rng_state.pth +1 -1
  30. run-1/checkpoint-297/scheduler.pt +1 -1
  31. run-1/checkpoint-297/tokenizer.json +1 -1
  32. run-1/checkpoint-297/trainer_state.json +19 -61
  33. run-1/checkpoint-297/training_args.bin +2 -2
  34. tokenizer.json +1 -1
  35. training_args.bin +1 -1
logs/events.out.tfevents.1710425174.4cb0d5d7fb51.369.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6774434efae0697cfede9134a6783675bec591bdaf77fc9a7098b8aa51c03f4
3
+ size 4184
logs/events.out.tfevents.1710425892.4cb0d5d7fb51.5918.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7320cc2f1c05909e739c57b342aa3a7226dd8af56e11e6d4ee9daffbfe8215
3
+ size 8736
logs/events.out.tfevents.1710434095.4cb0d5d7fb51.5918.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026d7c7b4ff6d5b241f21c4d927c92019c5434ea2c4d35a0321fdafed4d6dee5
3
+ size 5178
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d37c360175c163911096dea965479fbd6880c91653c4ee3ac72c0beb708fd5da
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf78f191f8ec19931f9cd904378746a6b65295ee17e7e52a6a382fd0d4c8a6dd
3
  size 17549312
run-0/checkpoint-1782/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-0/checkpoint-1782/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcd66c4c990c188df0c7f38f13626d4e1b07d8ea5c83b02ae9479a6de6c6a72
3
+ size 17549312
run-0/checkpoint-1782/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04dc1645677358f2f419f5a53d294ca59d04a32e753cea261c45d7d776d9dd2
3
+ size 35122746
run-0/checkpoint-1782/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee696d9318370c8d16ff7888fb5ee35c97fd7bd51ddd72203fc319df161c4dd1
3
+ size 14054
run-0/checkpoint-1782/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66fa3be451e42a6a44a297a37139cd92903b66779648a5144184672ea1a354a
3
+ size 1064
run-0/checkpoint-1782/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-1782/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-1782/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-1782/trainer_state.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5247524752475248,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1782",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1782,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.8027682304382324,
14
+ "learning_rate": 0.0002796956441517031,
15
+ "loss": 0.2656,
16
+ "step": 297
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5128712871287129,
21
+ "eval_loss": 0.27001720666885376,
22
+ "eval_runtime": 55.7019,
23
+ "eval_samples_per_second": 9.066,
24
+ "eval_steps_per_second": 0.287,
25
+ "step": 297
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "grad_norm": 0.2776716649532318,
30
+ "learning_rate": 0.00023307970345975263,
31
+ "loss": 0.2582,
32
+ "step": 594
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.5108910891089109,
37
+ "eval_loss": 0.25422945618629456,
38
+ "eval_runtime": 56.0184,
39
+ "eval_samples_per_second": 9.015,
40
+ "eval_steps_per_second": 0.286,
41
+ "step": 594
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "grad_norm": 0.49428918957710266,
46
+ "learning_rate": 0.0001864637627678021,
47
+ "loss": 0.255,
48
+ "step": 891
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.5168316831683168,
53
+ "eval_loss": 0.2529114782810211,
54
+ "eval_runtime": 56.4466,
55
+ "eval_samples_per_second": 8.947,
56
+ "eval_steps_per_second": 0.283,
57
+ "step": 891
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "grad_norm": 0.3203761875629425,
62
+ "learning_rate": 0.00013984782207585156,
63
+ "loss": 0.2523,
64
+ "step": 1188
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_accuracy": 0.5148514851485149,
69
+ "eval_loss": 0.2504001557826996,
70
+ "eval_runtime": 56.4336,
71
+ "eval_samples_per_second": 8.949,
72
+ "eval_steps_per_second": 0.284,
73
+ "step": 1188
74
+ },
75
+ {
76
+ "epoch": 5.0,
77
+ "grad_norm": 0.3831195831298828,
78
+ "learning_rate": 9.323188138390105e-05,
79
+ "loss": 0.2499,
80
+ "step": 1485
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_accuracy": 0.5188118811881188,
85
+ "eval_loss": 0.24836017191410065,
86
+ "eval_runtime": 58.52,
87
+ "eval_samples_per_second": 8.63,
88
+ "eval_steps_per_second": 0.273,
89
+ "step": 1485
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "grad_norm": 0.5231035947799683,
94
+ "learning_rate": 4.661594069195052e-05,
95
+ "loss": 0.2486,
96
+ "step": 1782
97
+ },
98
+ {
99
+ "epoch": 6.0,
100
+ "eval_accuracy": 0.5247524752475248,
101
+ "eval_loss": 0.24778248369693756,
102
+ "eval_runtime": 56.1683,
103
+ "eval_samples_per_second": 8.991,
104
+ "eval_steps_per_second": 0.285,
105
+ "step": 1782
106
+ }
107
+ ],
108
+ "logging_steps": 500,
109
+ "max_steps": 2079,
110
+ "num_input_tokens_seen": 0,
111
+ "num_train_epochs": 7,
112
+ "save_steps": 500,
113
+ "total_flos": 4667059698840.0,
114
+ "train_batch_size": 32,
115
+ "trial_name": null,
116
+ "trial_params": {
117
+ "alpha": 0.3139802761523254,
118
+ "learning_rate": 0.0003263115848436537,
119
+ "num_train_epochs": 7,
120
+ "temperature": 8
121
+ }
122
+ }
run-0/checkpoint-1782/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66890374d28b0a8287d00c772a9c1c12834f9338c43f525c95c7228993d7f197
3
+ size 4984
run-0/checkpoint-1782/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-2079/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-0/checkpoint-2079/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3098a15679860f575e848c5a7576e11b45e65bf521cd91a6fbe675d4ace61ca9
3
+ size 17549312
run-0/checkpoint-2079/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ccbaa42e566d4f27196035bf7ee60a779e9fede03183ac7d442dfd5d20ef67e
3
+ size 35122746
run-0/checkpoint-2079/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddf4483b0cc001197871d638833a4a3bbdd0c0a8ea4710051f83b263dbe74c2
3
+ size 14054
run-0/checkpoint-2079/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fba327764f45428e92d040851d74cba465a132e33a42f5628421e8f28e93207
3
+ size 1064
run-0/checkpoint-2079/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-2079/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-2079/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-0/checkpoint-2079/trainer_state.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5247524752475248,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1782",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2079,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.8027682304382324,
14
+ "learning_rate": 0.0002796956441517031,
15
+ "loss": 0.2656,
16
+ "step": 297
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5128712871287129,
21
+ "eval_loss": 0.27001720666885376,
22
+ "eval_runtime": 55.7019,
23
+ "eval_samples_per_second": 9.066,
24
+ "eval_steps_per_second": 0.287,
25
+ "step": 297
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "grad_norm": 0.2776716649532318,
30
+ "learning_rate": 0.00023307970345975263,
31
+ "loss": 0.2582,
32
+ "step": 594
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.5108910891089109,
37
+ "eval_loss": 0.25422945618629456,
38
+ "eval_runtime": 56.0184,
39
+ "eval_samples_per_second": 9.015,
40
+ "eval_steps_per_second": 0.286,
41
+ "step": 594
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "grad_norm": 0.49428918957710266,
46
+ "learning_rate": 0.0001864637627678021,
47
+ "loss": 0.255,
48
+ "step": 891
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.5168316831683168,
53
+ "eval_loss": 0.2529114782810211,
54
+ "eval_runtime": 56.4466,
55
+ "eval_samples_per_second": 8.947,
56
+ "eval_steps_per_second": 0.283,
57
+ "step": 891
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "grad_norm": 0.3203761875629425,
62
+ "learning_rate": 0.00013984782207585156,
63
+ "loss": 0.2523,
64
+ "step": 1188
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_accuracy": 0.5148514851485149,
69
+ "eval_loss": 0.2504001557826996,
70
+ "eval_runtime": 56.4336,
71
+ "eval_samples_per_second": 8.949,
72
+ "eval_steps_per_second": 0.284,
73
+ "step": 1188
74
+ },
75
+ {
76
+ "epoch": 5.0,
77
+ "grad_norm": 0.3831195831298828,
78
+ "learning_rate": 9.323188138390105e-05,
79
+ "loss": 0.2499,
80
+ "step": 1485
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_accuracy": 0.5188118811881188,
85
+ "eval_loss": 0.24836017191410065,
86
+ "eval_runtime": 58.52,
87
+ "eval_samples_per_second": 8.63,
88
+ "eval_steps_per_second": 0.273,
89
+ "step": 1485
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "grad_norm": 0.5231035947799683,
94
+ "learning_rate": 4.661594069195052e-05,
95
+ "loss": 0.2486,
96
+ "step": 1782
97
+ },
98
+ {
99
+ "epoch": 6.0,
100
+ "eval_accuracy": 0.5247524752475248,
101
+ "eval_loss": 0.24778248369693756,
102
+ "eval_runtime": 56.1683,
103
+ "eval_samples_per_second": 8.991,
104
+ "eval_steps_per_second": 0.285,
105
+ "step": 1782
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "grad_norm": 0.4036513566970825,
110
+ "learning_rate": 0.0,
111
+ "loss": 0.2466,
112
+ "step": 2079
113
+ },
114
+ {
115
+ "epoch": 7.0,
116
+ "eval_accuracy": 0.5247524752475248,
117
+ "eval_loss": 0.24736006557941437,
118
+ "eval_runtime": 54.9354,
119
+ "eval_samples_per_second": 9.193,
120
+ "eval_steps_per_second": 0.291,
121
+ "step": 2079
122
+ }
123
+ ],
124
+ "logging_steps": 500,
125
+ "max_steps": 2079,
126
+ "num_input_tokens_seen": 0,
127
+ "num_train_epochs": 7,
128
+ "save_steps": 500,
129
+ "total_flos": 5444902981980.0,
130
+ "train_batch_size": 32,
131
+ "trial_name": null,
132
+ "trial_params": {
133
+ "alpha": 0.3139802761523254,
134
+ "learning_rate": 0.0003263115848436537,
135
+ "num_train_epochs": 7,
136
+ "temperature": 8
137
+ }
138
+ }
run-0/checkpoint-2079/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66890374d28b0a8287d00c772a9c1c12834f9338c43f525c95c7228993d7f197
3
+ size 4984
run-0/checkpoint-2079/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-297/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea0112e85d86eb2b488ac7806696395cdc2d93066660543d5a796d9cb413f740
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf78f191f8ec19931f9cd904378746a6b65295ee17e7e52a6a382fd0d4c8a6dd
3
  size 17549312
run-1/checkpoint-297/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e6e32e811aa3cd3523492b4f2973fd30aa324f91218a2a2ae62fbe22606b61
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd6c9baeefb8f102b3b39c4118c7136ac58f455f69bd550edb68349840eb054
3
  size 35122746
run-1/checkpoint-297/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1636667e228312c6c014885f930bd4ecaadaec1e01e8acfbfaaae3bd47d2d61
3
  size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f414017d19e8a66d09e6a16c0bca909eff6c9e5541f54da3f0dba2607378e04d
3
  size 14054
run-1/checkpoint-297/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:886375653aaeba7f27a6b77deb23f8953f50b87b17eec91bd8fec82461326ad2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883a13d0ce4b22682af6eb4d99c939f3ce2e855ed8d1ade27c75e0163ba553ff
3
  size 1064
run-1/checkpoint-297/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-1/checkpoint-297/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.7886497064579256,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-297",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 297,
7
  "is_hyper_param_search": true,
@@ -10,75 +10,33 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.8574622273445129,
14
- "learning_rate": 0.0005047460143242413,
15
- "loss": 0.4026,
16
- "step": 99
17
- },
18
- {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.5684931506849316,
21
- "eval_f1": 0.28985507246376807,
22
- "eval_loss": 0.3736521005630493,
23
- "eval_precision": 0.8181818181818182,
24
- "eval_recall": 0.1761252446183953,
25
- "eval_runtime": 30.2349,
26
- "eval_samples_per_second": 33.802,
27
- "eval_steps_per_second": 1.058,
28
- "step": 99
29
- },
30
- {
31
- "epoch": 2.0,
32
- "grad_norm": 1.298500657081604,
33
- "learning_rate": 0.00025237300716212063,
34
- "loss": 0.3707,
35
- "step": 198
36
- },
37
- {
38
- "epoch": 2.0,
39
- "eval_accuracy": 0.7602739726027398,
40
- "eval_f1": 0.7231638418079097,
41
- "eval_loss": 0.3502894341945648,
42
- "eval_precision": 0.8556149732620321,
43
- "eval_recall": 0.6262230919765166,
44
- "eval_runtime": 30.2651,
45
- "eval_samples_per_second": 33.768,
46
- "eval_steps_per_second": 1.057,
47
- "step": 198
48
- },
49
- {
50
- "epoch": 3.0,
51
- "grad_norm": 1.427898645401001,
52
- "learning_rate": 0.0,
53
- "loss": 0.3525,
54
  "step": 297
55
  },
56
  {
57
- "epoch": 3.0,
58
- "eval_accuracy": 0.7886497064579256,
59
- "eval_f1": 0.7782340862422998,
60
- "eval_loss": 0.3467503488063812,
61
- "eval_precision": 0.8185745140388769,
62
- "eval_recall": 0.7416829745596869,
63
- "eval_runtime": 29.9113,
64
- "eval_samples_per_second": 34.168,
65
- "eval_steps_per_second": 1.07,
66
  "step": 297
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 297,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 3,
73
  "save_steps": 500,
74
- "total_flos": 707085591840.0,
75
- "train_batch_size": 31,
76
  "trial_name": null,
77
  "trial_params": {
78
- "alpha": 0.571532167699631,
79
- "learning_rate": 0.000757119021486362,
80
- "num_train_epochs": 3,
81
- "per_device_train_batch_size": 31,
82
- "temperature": 19
83
  }
84
  }
 
1
  {
2
+ "best_metric": 0.5841584158415841,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-297",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 297,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.3426679372787476,
14
+ "learning_rate": 0.0003560526201910554,
15
+ "loss": 0.6371,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 297
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5841584158415841,
21
+ "eval_loss": 0.637409508228302,
22
+ "eval_runtime": 56.7978,
23
+ "eval_samples_per_second": 8.891,
24
+ "eval_steps_per_second": 0.282,
 
 
 
25
  "step": 297
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 1188,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 4,
32
  "save_steps": 500,
33
+ "total_flos": 777843283140.0,
34
+ "train_batch_size": 32,
35
  "trial_name": null,
36
  "trial_params": {
37
+ "alpha": 0.900582717199523,
38
+ "learning_rate": 0.0004747368269214072,
39
+ "num_train_epochs": 4,
40
+ "temperature": 18
 
41
  }
42
  }
run-1/checkpoint-297/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9a22962199e8db5aed7f91cc38b31e6e9016a67071067e10647b0bbadfffa51
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6f636dbab8b5e516bcd85051f9f0732a28727ed1675f1d1f3076c7baf2be402
3
+ size 4984
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80dc70ad7da501d435234fbefa375c80fa3a6718b03d738d8b00028f0713e645
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6f636dbab8b5e516bcd85051f9f0732a28727ed1675f1d1f3076c7baf2be402
3
  size 4984