xuancoblab2023 commited on
Commit
850bdc8
·
verified ·
1 Parent(s): 7628d5b

Training in progress, epoch 1

Browse files
logs/events.out.tfevents.1711293435.73e3a81c01ef.4225.22 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d018368c8d23742a0bf21ddbebca05f04def17cdc750fb9362b5ef6b6071a448
3
- size 6866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e3d5e969a283a35ec44cc78c0b7ab7022225e4c713687aeaf28cc6375a3f6a
3
+ size 7950
logs/events.out.tfevents.1711294573.73e3a81c01ef.4225.23 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24d3b6cb52817ca63af63d91ae17ce4d9d3ce27aeb399ab6fd6980e3ea9989f3
3
+ size 5406
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:609a49676de3f288302c9ea32b4c05014e9da1acd55839559c592513cd7750e9
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5232039d97d4db99e7ba6271deeedb0c6527fc47119f8e18fad59ab2cd36f0ae
3
  size 17549312
run-22/checkpoint-768/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-22/checkpoint-768/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b57cafbb8e48fb5903d46ff2e9249ecf67abede5242cf13bf2be71f485980b4
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2660522f3f61ad11613ebd20dc63c24a1df03f2e77a3458f5313438bb64b72a3
3
  size 17549312
run-22/checkpoint-768/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc25cb8b5e9caa5eb64f5a05755f30df44c467da9ded5af1dd782659b5afdce9
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f352f7dfe2bed7e58160bf262f617600c06c6a128e81171aa0da47cb47d3888
3
  size 35122746
run-22/checkpoint-768/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33d55a7a7e131cd196205e2fe9c7e3f3a3d61ed6d79c468c68d4163c6b46138d
3
  size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed72394efd052c27a025cbbb902950bb9336778a32545e8b891d3c9c80c35d9
3
  size 14054
run-22/checkpoint-768/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:194b2bcad0b65c198819c73bdb4afeb3cb08e5349e435e338a3c60c1c604d53f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d0a7398c4812feab42cc83f5a64621597262a04f434af87b31905dd2d6350a
3
  size 1064
run-22/checkpoint-768/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8395303326810176,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-22/checkpoint-768",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
  "global_step": 768,
7
  "is_hyper_param_search": true,
@@ -10,169 +10,98 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.464465618133545,
14
- "learning_rate": 0.0001794635836369996,
15
- "loss": 0.565,
16
- "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7788649706457925,
21
- "eval_f1": 0.8078231292517007,
22
- "eval_loss": 0.4691426157951355,
23
- "eval_precision": 0.7142857142857143,
24
- "eval_recall": 0.9295499021526419,
25
- "eval_runtime": 25.0337,
26
- "eval_samples_per_second": 40.825,
27
- "eval_steps_per_second": 1.278,
28
- "step": 96
29
- },
30
- {
31
- "epoch": 2.0,
32
- "grad_norm": 4.053184509277344,
33
- "learning_rate": 0.00015703063568237466,
34
- "loss": 0.4676,
35
  "step": 192
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7984344422700587,
40
- "eval_f1": 0.8170515097690941,
41
- "eval_loss": 0.4367915391921997,
42
- "eval_precision": 0.7479674796747967,
43
- "eval_recall": 0.9001956947162426,
44
- "eval_runtime": 25.2941,
45
- "eval_samples_per_second": 40.405,
46
- "eval_steps_per_second": 1.265,
47
- "step": 192
48
- },
49
- {
50
- "epoch": 3.0,
51
- "grad_norm": 2.9524621963500977,
52
- "learning_rate": 0.00013459768772774972,
53
- "loss": 0.4358,
54
- "step": 288
55
- },
56
- {
57
- "epoch": 3.0,
58
- "eval_accuracy": 0.8209393346379648,
59
- "eval_f1": 0.8334849863512284,
60
- "eval_loss": 0.4263817071914673,
61
- "eval_precision": 0.7789115646258503,
62
- "eval_recall": 0.8962818003913894,
63
- "eval_runtime": 24.9367,
64
- "eval_samples_per_second": 40.984,
65
- "eval_steps_per_second": 1.283,
66
- "step": 288
67
- },
68
- {
69
- "epoch": 4.0,
70
- "grad_norm": 6.636496067047119,
71
- "learning_rate": 0.00011216473977312478,
72
- "loss": 0.4174,
73
  "step": 384
74
  },
75
  {
76
- "epoch": 4.0,
77
- "eval_accuracy": 0.8160469667318982,
78
- "eval_f1": 0.8181818181818182,
79
- "eval_loss": 0.41675594449043274,
80
- "eval_precision": 0.8087954110898662,
81
- "eval_recall": 0.8277886497064579,
82
- "eval_runtime": 24.9785,
83
- "eval_samples_per_second": 40.915,
84
- "eval_steps_per_second": 1.281,
 
85
  "step": 384
86
  },
87
  {
88
- "epoch": 5.0,
89
- "grad_norm": 2.0662448406219482,
90
- "learning_rate": 8.97317918184998e-05,
91
- "loss": 0.4077,
92
- "step": 480
93
- },
94
- {
95
- "epoch": 5.0,
96
- "eval_accuracy": 0.8356164383561644,
97
- "eval_f1": 0.8502673796791443,
98
- "eval_loss": 0.40537795424461365,
99
- "eval_precision": 0.7806873977086743,
100
- "eval_recall": 0.9334637964774951,
101
- "eval_runtime": 25.8814,
102
- "eval_samples_per_second": 39.488,
103
- "eval_steps_per_second": 1.236,
104
- "step": 480
105
- },
106
- {
107
- "epoch": 6.0,
108
- "grad_norm": 4.637610912322998,
109
- "learning_rate": 6.729884386387486e-05,
110
- "loss": 0.4004,
111
  "step": 576
112
  },
113
  {
114
- "epoch": 6.0,
115
- "eval_accuracy": 0.8258317025440313,
116
- "eval_f1": 0.842756183745583,
117
- "eval_loss": 0.40995147824287415,
118
- "eval_precision": 0.7681159420289855,
119
- "eval_recall": 0.9334637964774951,
120
- "eval_runtime": 25.8126,
121
- "eval_samples_per_second": 39.593,
122
- "eval_steps_per_second": 1.24,
 
123
  "step": 576
124
  },
125
  {
126
- "epoch": 7.0,
127
- "grad_norm": 3.4253664016723633,
128
- "learning_rate": 4.48658959092499e-05,
129
- "loss": 0.391,
130
- "step": 672
131
- },
132
- {
133
- "epoch": 7.0,
134
- "eval_accuracy": 0.8326810176125244,
135
- "eval_f1": 0.8488063660477454,
136
- "eval_loss": 0.40309804677963257,
137
- "eval_precision": 0.7741935483870968,
138
- "eval_recall": 0.9393346379647749,
139
- "eval_runtime": 26.0268,
140
- "eval_samples_per_second": 39.267,
141
- "eval_steps_per_second": 1.23,
142
- "step": 672
143
- },
144
- {
145
- "epoch": 8.0,
146
- "grad_norm": 4.7185893058776855,
147
- "learning_rate": 2.243294795462495e-05,
148
- "loss": 0.3869,
149
  "step": 768
150
  },
151
  {
152
- "epoch": 8.0,
153
- "eval_accuracy": 0.8395303326810176,
154
- "eval_f1": 0.851985559566787,
155
- "eval_loss": 0.39594146609306335,
156
- "eval_precision": 0.7906197654941374,
157
- "eval_recall": 0.923679060665362,
158
- "eval_runtime": 25.1887,
159
- "eval_samples_per_second": 40.574,
160
- "eval_steps_per_second": 1.27,
 
161
  "step": 768
162
  }
163
  ],
164
  "logging_steps": 500,
165
- "max_steps": 864,
166
  "num_input_tokens_seen": 0,
167
- "num_train_epochs": 9,
168
  "save_steps": 500,
169
- "total_flos": 1885561578240.0,
170
- "train_batch_size": 32,
171
  "trial_name": null,
172
  "trial_params": {
173
- "alpha": 0.8956145079239534,
174
- "learning_rate": 0.00020189653159162458,
175
- "num_train_epochs": 9,
176
- "temperature": 8
 
177
  }
178
  }
 
1
  {
2
+ "best_metric": 0.8365949119373777,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-22/checkpoint-768",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 768,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.364559650421143,
14
+ "learning_rate": 0.0002529924510479742,
15
+ "loss": 0.5381,
16
+ "step": 192
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7896281800391389,
21
+ "eval_f1": 0.8071748878923767,
22
+ "eval_loss": 0.45070555806159973,
23
+ "eval_mcc": 0.5890946801852007,
24
+ "eval_precision": 0.7450331125827815,
25
+ "eval_recall": 0.8806262230919765,
26
+ "eval_runtime": 67.3745,
27
+ "eval_samples_per_second": 15.169,
28
+ "eval_steps_per_second": 0.475,
 
 
 
 
 
 
29
  "step": 192
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 9.345165252685547,
34
+ "learning_rate": 0.0001686616340319828,
35
+ "loss": 0.4554,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "step": 384
37
  },
38
  {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.8043052837573386,
41
+ "eval_f1": 0.8127340823970037,
42
+ "eval_loss": 0.4277065694332123,
43
+ "eval_mcc": 0.6110916014068072,
44
+ "eval_precision": 0.7791741472172352,
45
+ "eval_recall": 0.8493150684931506,
46
+ "eval_runtime": 67.2822,
47
+ "eval_samples_per_second": 15.19,
48
+ "eval_steps_per_second": 0.476,
49
  "step": 384
50
  },
51
  {
52
+ "epoch": 3.0,
53
+ "grad_norm": 6.078808307647705,
54
+ "learning_rate": 8.43308170159914e-05,
55
+ "loss": 0.418,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "step": 576
57
  },
58
  {
59
+ "epoch": 3.0,
60
+ "eval_accuracy": 0.8238747553816047,
61
+ "eval_f1": 0.8387096774193549,
62
+ "eval_loss": 0.4266253113746643,
63
+ "eval_mcc": 0.6589952419988697,
64
+ "eval_precision": 0.7735537190082644,
65
+ "eval_recall": 0.9158512720156555,
66
+ "eval_runtime": 67.2428,
67
+ "eval_samples_per_second": 15.199,
68
+ "eval_steps_per_second": 0.476,
69
  "step": 576
70
  },
71
  {
72
+ "epoch": 4.0,
73
+ "grad_norm": 2.4011917114257812,
74
+ "learning_rate": 0.0,
75
+ "loss": 0.3997,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  "step": 768
77
  },
78
  {
79
+ "epoch": 4.0,
80
+ "eval_accuracy": 0.8365949119373777,
81
+ "eval_f1": 0.8477666362807658,
82
+ "eval_loss": 0.41222792863845825,
83
+ "eval_mcc": 0.680559953194408,
84
+ "eval_precision": 0.7935153583617748,
85
+ "eval_recall": 0.9099804305283757,
86
+ "eval_runtime": 67.3829,
87
+ "eval_samples_per_second": 15.167,
88
+ "eval_steps_per_second": 0.475,
89
  "step": 768
90
  }
91
  ],
92
  "logging_steps": 500,
93
+ "max_steps": 768,
94
  "num_input_tokens_seen": 0,
95
+ "num_train_epochs": 4,
96
  "save_steps": 500,
97
+ "total_flos": 942780789120.0,
98
+ "train_batch_size": 16,
99
  "trial_name": null,
100
  "trial_params": {
101
+ "alpha": 0.885980659186716,
102
+ "learning_rate": 0.0003373232680639656,
103
+ "num_train_epochs": 4,
104
+ "per_device_train_batch_size": 16,
105
+ "temperature": 44
106
  }
107
  }
run-22/checkpoint-768/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59435df03122e4e47861365c1b2756f99313581e8b38e82a031f16aa354557d
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c24fdecf1c90c419bf77f10ab570330e1370392bd9d70f58780b22b36367b96
3
+ size 4984
run-23/checkpoint-192/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-23/checkpoint-192/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5232039d97d4db99e7ba6271deeedb0c6527fc47119f8e18fad59ab2cd36f0ae
3
+ size 17549312
run-23/checkpoint-192/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c157f8b1e6c757c6a1627b714248b67fb274338e597948892131f94097ac049b
3
+ size 35122746
run-23/checkpoint-192/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee09ea0d216727b799a80771850b95d7d61b646360702c64b2ec889cdc725399
3
+ size 14054
run-23/checkpoint-192/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b516bf36e657d8bce07aedbe1849132b97b28a9794479afcb0e8a12f37e729a
3
+ size 1064
run-23/checkpoint-192/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-23/checkpoint-192/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-23/checkpoint-192/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-23/checkpoint-192/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7935420743639922,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-23/checkpoint-192",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 192,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 5.60397481918335,
14
+ "learning_rate": 0.00021040106446320055,
15
+ "loss": 0.5452,
16
+ "step": 192
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7935420743639922,
21
+ "eval_f1": 0.809049773755656,
22
+ "eval_loss": 0.44930505752563477,
23
+ "eval_mcc": 0.5949851791037805,
24
+ "eval_precision": 0.7525252525252525,
25
+ "eval_recall": 0.8747553816046967,
26
+ "eval_runtime": 66.8274,
27
+ "eval_samples_per_second": 15.293,
28
+ "eval_steps_per_second": 0.479,
29
+ "step": 192
30
+ }
31
+ ],
32
+ "logging_steps": 500,
33
+ "max_steps": 768,
34
+ "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 4,
36
+ "save_steps": 500,
37
+ "total_flos": 235695197280.0,
38
+ "train_batch_size": 16,
39
+ "trial_name": null,
40
+ "trial_params": {
41
+ "alpha": 0.9076998236532954,
42
+ "learning_rate": 0.0002805347526176007,
43
+ "num_train_epochs": 4,
44
+ "per_device_train_batch_size": 16,
45
+ "temperature": 32
46
+ }
47
+ }
run-23/checkpoint-192/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1388c03a76bdbf0955d3ee662bb7d2ee1c5be7b9ed87ead1481458332e1acc
3
+ size 4984
run-23/checkpoint-192/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f62d580257060cdaf08ca2f6bebff447fe36fd6a4ed09c360f3a936d71824ec
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1388c03a76bdbf0955d3ee662bb7d2ee1c5be7b9ed87ead1481458332e1acc
3
+ size 4984