Training in progress, epoch 13
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- logs/events.out.tfevents.1713595624.1376c752d37a.9760.67 +3 -0
- logs/events.out.tfevents.1713595759.1376c752d37a.9760.68 +3 -0
- logs/events.out.tfevents.1713595776.1376c752d37a.9760.69 +3 -0
- logs/events.out.tfevents.1713596116.1376c752d37a.9760.70 +3 -0
- logs/events.out.tfevents.1713596133.1376c752d37a.9760.71 +3 -0
- logs/events.out.tfevents.1713596166.1376c752d37a.9760.72 +3 -0
- logs/events.out.tfevents.1713596184.1376c752d37a.9760.73 +3 -0
- logs/events.out.tfevents.1713596200.1376c752d37a.9760.74 +3 -0
- model.safetensors +1 -1
- run-14/checkpoint-1284/config.json +34 -0
- run-14/checkpoint-1284/model.safetensors +3 -0
- run-14/checkpoint-1284/optimizer.pt +3 -0
- run-14/checkpoint-1284/rng_state.pth +3 -0
- run-14/checkpoint-1284/scheduler.pt +3 -0
- run-14/checkpoint-1284/special_tokens_map.json +7 -0
- run-14/checkpoint-1284/tokenizer.json +0 -0
- run-14/checkpoint-1284/tokenizer_config.json +57 -0
- run-14/checkpoint-1284/trainer_state.json +146 -0
- run-14/checkpoint-1284/training_args.bin +3 -0
- run-14/checkpoint-1284/vocab.txt +0 -0
- run-14/checkpoint-1498/config.json +34 -0
- run-14/checkpoint-1498/model.safetensors +3 -0
- run-14/checkpoint-1498/optimizer.pt +3 -0
- run-14/checkpoint-1498/rng_state.pth +3 -0
- run-14/checkpoint-1498/scheduler.pt +3 -0
- run-14/checkpoint-1498/special_tokens_map.json +7 -0
- run-14/checkpoint-1498/tokenizer.json +0 -0
- run-14/checkpoint-1498/tokenizer_config.json +57 -0
- run-14/checkpoint-1498/trainer_state.json +166 -0
- run-14/checkpoint-1498/training_args.bin +3 -0
- run-14/checkpoint-1498/vocab.txt +0 -0
- run-16/checkpoint-3210/config.json +34 -0
- run-16/checkpoint-3210/model.safetensors +3 -0
- run-16/checkpoint-3210/optimizer.pt +3 -0
- run-16/checkpoint-3210/rng_state.pth +3 -0
- run-16/checkpoint-3210/scheduler.pt +3 -0
- run-16/checkpoint-3210/special_tokens_map.json +7 -0
- run-16/checkpoint-3210/tokenizer.json +0 -0
- run-16/checkpoint-3210/tokenizer_config.json +57 -0
- run-16/checkpoint-3210/trainer_state.json +326 -0
- run-16/checkpoint-3210/training_args.bin +3 -0
- run-16/checkpoint-3210/vocab.txt +0 -0
- run-16/checkpoint-4280/config.json +34 -0
- run-16/checkpoint-4280/model.safetensors +3 -0
- run-16/checkpoint-4280/optimizer.pt +3 -0
- run-16/checkpoint-4280/rng_state.pth +3 -0
- run-16/checkpoint-4280/scheduler.pt +3 -0
- run-16/checkpoint-4280/special_tokens_map.json +7 -0
- run-16/checkpoint-4280/tokenizer.json +0 -0
- run-16/checkpoint-4280/tokenizer_config.json +57 -0
logs/events.out.tfevents.1713595624.1376c752d37a.9760.67
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9471d8100c934e0561d83237fa24f2b779a468ff1f2aac19db1bcbd7e230e2c0
|
3 |
+
size 10593
|
logs/events.out.tfevents.1713595759.1376c752d37a.9760.68
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05bdb97b0471e55adac8159b5240ce3fc5e25c07bde3cafd16545bc83139e207
|
3 |
+
size 5482
|
logs/events.out.tfevents.1713595776.1376c752d37a.9760.69
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e1552d9d326b18eceeb0340b41c83ba0d8e80a6c862ec6dac857d17cda90f70
|
3 |
+
size 19707
|
logs/events.out.tfevents.1713596116.1376c752d37a.9760.70
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1ad9b83d4ff7cc1ee9b9c3f8de2ca558e9c1328e977cab6a4b31263fb293ab9
|
3 |
+
size 5484
|
logs/events.out.tfevents.1713596133.1376c752d37a.9760.71
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89f191f6e03081cbd8cab8ae26b3bbe1477eda3d195df54b0a68be900ced4df2
|
3 |
+
size 6214
|
logs/events.out.tfevents.1713596166.1376c752d37a.9760.72
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a0817846a218d061e4e148b26c97cb67f56a7d0c8b704847673b39151a6b249
|
3 |
+
size 5483
|
logs/events.out.tfevents.1713596184.1376c752d37a.9760.73
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4a559d2071fc197766043391895be999e5ac349acfdc2c3cb33019bf4262965
|
3 |
+
size 5484
|
logs/events.out.tfevents.1713596200.1376c752d37a.9760.74
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39710bfa127476bb3d17efd14c8e253ab645f3f7a9560e5e03583f6880a1cc76
|
3 |
+
size 14598
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17549312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b28623363f40c4182e06f4f9bb646edf4f4ec476dfce17dca95ebcd5c5daa494
|
3 |
size 17549312
|
run-14/checkpoint-1284/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"id2label": {
|
12 |
+
"0": "negative",
|
13 |
+
"1": "positive"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 512,
|
17 |
+
"label2id": {
|
18 |
+
"negative": "0",
|
19 |
+
"positive": "1"
|
20 |
+
},
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 2,
|
25 |
+
"num_hidden_layers": 2,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"position_embedding_type": "absolute",
|
28 |
+
"problem_type": "single_label_classification",
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.40.0",
|
31 |
+
"type_vocab_size": 2,
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 30522
|
34 |
+
}
|
run-14/checkpoint-1284/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df25dea33c789fa3c6139fba7e8e839a29117c8b1ddbe95b1d5355d8fad7f8b8
|
3 |
+
size 17549312
|
run-14/checkpoint-1284/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df912a86ceba8ad0a5777efa4d0f13c5d6b18b4dd64df201aeecfac4db6d9a5a
|
3 |
+
size 35123898
|
run-14/checkpoint-1284/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:faa74fb48b8739c352726086ee20186281632dbf682ba20839e3522db3e3b091
|
3 |
+
size 14308
|
run-14/checkpoint-1284/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:434949fd6ef2b41ba226cf137e81eeb4c37214948ccdebb368a844ff3b1b16a6
|
3 |
+
size 1064
|
run-14/checkpoint-1284/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-14/checkpoint-1284/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-14/checkpoint-1284/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
run-14/checkpoint-1284/trainer_state.json
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7439953134153485,
|
3 |
+
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-1284",
|
4 |
+
"epoch": 6.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 1284,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 4.230797290802002,
|
14 |
+
"learning_rate": 0.0004542691429405582,
|
15 |
+
"loss": 0.5244,
|
16 |
+
"step": 214
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7065026362038664,
|
21 |
+
"eval_f1": 0.3634053367217281,
|
22 |
+
"eval_loss": 0.49707961082458496,
|
23 |
+
"eval_mcc": 0.26187368637682734,
|
24 |
+
"eval_precision": 0.6559633027522935,
|
25 |
+
"eval_recall": 0.2513181019332162,
|
26 |
+
"eval_runtime": 3.1642,
|
27 |
+
"eval_samples_per_second": 539.474,
|
28 |
+
"eval_steps_per_second": 17.066,
|
29 |
+
"step": 214
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"grad_norm": 1.7037988901138306,
|
34 |
+
"learning_rate": 0.0004303602406805288,
|
35 |
+
"loss": 0.4856,
|
36 |
+
"step": 428
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 2.0,
|
40 |
+
"eval_accuracy": 0.7047451669595782,
|
41 |
+
"eval_f1": 0.5019762845849802,
|
42 |
+
"eval_loss": 0.4853743612766266,
|
43 |
+
"eval_mcc": 0.3014395863411175,
|
44 |
+
"eval_precision": 0.5733634311512416,
|
45 |
+
"eval_recall": 0.44639718804920914,
|
46 |
+
"eval_runtime": 3.2868,
|
47 |
+
"eval_samples_per_second": 519.356,
|
48 |
+
"eval_steps_per_second": 16.43,
|
49 |
+
"step": 428
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"epoch": 3.0,
|
53 |
+
"grad_norm": 1.7830451726913452,
|
54 |
+
"learning_rate": 0.0004064513384204994,
|
55 |
+
"loss": 0.4758,
|
56 |
+
"step": 642
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 3.0,
|
60 |
+
"eval_accuracy": 0.7018160515524312,
|
61 |
+
"eval_f1": 0.25909752547307136,
|
62 |
+
"eval_loss": 0.4858837425708771,
|
63 |
+
"eval_mcc": 0.24331468344161114,
|
64 |
+
"eval_precision": 0.7542372881355932,
|
65 |
+
"eval_recall": 0.15641476274165203,
|
66 |
+
"eval_runtime": 3.1648,
|
67 |
+
"eval_samples_per_second": 539.369,
|
68 |
+
"eval_steps_per_second": 17.063,
|
69 |
+
"step": 642
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 4.0,
|
73 |
+
"grad_norm": 1.6830062866210938,
|
74 |
+
"learning_rate": 0.0003825424361604701,
|
75 |
+
"loss": 0.4724,
|
76 |
+
"step": 856
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 4.0,
|
80 |
+
"eval_accuracy": 0.7275922671353251,
|
81 |
+
"eval_f1": 0.4015444015444016,
|
82 |
+
"eval_loss": 0.47459593415260315,
|
83 |
+
"eval_mcc": 0.32925006262083517,
|
84 |
+
"eval_precision": 0.75,
|
85 |
+
"eval_recall": 0.2741652021089631,
|
86 |
+
"eval_runtime": 3.7803,
|
87 |
+
"eval_samples_per_second": 451.553,
|
88 |
+
"eval_steps_per_second": 14.285,
|
89 |
+
"step": 856
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 5.0,
|
93 |
+
"grad_norm": 4.760202407836914,
|
94 |
+
"learning_rate": 0.0003586335339004407,
|
95 |
+
"loss": 0.4614,
|
96 |
+
"step": 1070
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 5.0,
|
100 |
+
"eval_accuracy": 0.7340363210310487,
|
101 |
+
"eval_f1": 0.4733178654292344,
|
102 |
+
"eval_loss": 0.4721404016017914,
|
103 |
+
"eval_mcc": 0.35044282313920905,
|
104 |
+
"eval_precision": 0.6962457337883959,
|
105 |
+
"eval_recall": 0.3585237258347979,
|
106 |
+
"eval_runtime": 3.1258,
|
107 |
+
"eval_samples_per_second": 546.093,
|
108 |
+
"eval_steps_per_second": 17.275,
|
109 |
+
"step": 1070
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 6.0,
|
113 |
+
"grad_norm": 2.5421228408813477,
|
114 |
+
"learning_rate": 0.0003347246316404113,
|
115 |
+
"loss": 0.4617,
|
116 |
+
"step": 1284
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 6.0,
|
120 |
+
"eval_accuracy": 0.7439953134153485,
|
121 |
+
"eval_f1": 0.5346112886048987,
|
122 |
+
"eval_loss": 0.4643152058124542,
|
123 |
+
"eval_mcc": 0.38505007336259955,
|
124 |
+
"eval_precision": 0.6783783783783783,
|
125 |
+
"eval_recall": 0.44112478031634444,
|
126 |
+
"eval_runtime": 3.278,
|
127 |
+
"eval_samples_per_second": 520.745,
|
128 |
+
"eval_steps_per_second": 16.473,
|
129 |
+
"step": 1284
|
130 |
+
}
|
131 |
+
],
|
132 |
+
"logging_steps": 500,
|
133 |
+
"max_steps": 4280,
|
134 |
+
"num_input_tokens_seen": 0,
|
135 |
+
"num_train_epochs": 20,
|
136 |
+
"save_steps": 500,
|
137 |
+
"total_flos": 3148653986640.0,
|
138 |
+
"train_batch_size": 32,
|
139 |
+
"trial_name": null,
|
140 |
+
"trial_params": {
|
141 |
+
"alpha": 0.8544594605661776,
|
142 |
+
"learning_rate": 0.0004781780452005876,
|
143 |
+
"num_train_epochs": 20,
|
144 |
+
"temperature": 15
|
145 |
+
}
|
146 |
+
}
|
run-14/checkpoint-1284/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d21d17ecb931e756a0d7102c83f58b746c9a153fe99764512b52a17889afc5fc
|
3 |
+
size 5048
|
run-14/checkpoint-1284/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-14/checkpoint-1498/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"id2label": {
|
12 |
+
"0": "negative",
|
13 |
+
"1": "positive"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 512,
|
17 |
+
"label2id": {
|
18 |
+
"negative": "0",
|
19 |
+
"positive": "1"
|
20 |
+
},
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 2,
|
25 |
+
"num_hidden_layers": 2,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"position_embedding_type": "absolute",
|
28 |
+
"problem_type": "single_label_classification",
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.40.0",
|
31 |
+
"type_vocab_size": 2,
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 30522
|
34 |
+
}
|
run-14/checkpoint-1498/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5c83ba341df9345237126c6df1e0016197aa480321ad5838dd4e9e38979a787
|
3 |
+
size 17549312
|
run-14/checkpoint-1498/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed69720deef187c386bf14b782480574cbf526b0ba24f14e1cb6c4f56b260680
|
3 |
+
size 35123898
|
run-14/checkpoint-1498/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ec7baa77763623ab6f5c0a3afb24b20f15950a2841a41419e9c31b1269a80a7
|
3 |
+
size 14308
|
run-14/checkpoint-1498/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe765b62f628ab006f73f78d810024e12e2ce029306993c0a9c99c8d0b3ff67f
|
3 |
+
size 1064
|
run-14/checkpoint-1498/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-14/checkpoint-1498/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-14/checkpoint-1498/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
run-14/checkpoint-1498/trainer_state.json
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7439953134153485,
|
3 |
+
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-1284",
|
4 |
+
"epoch": 7.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 1498,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 4.230797290802002,
|
14 |
+
"learning_rate": 0.0004542691429405582,
|
15 |
+
"loss": 0.5244,
|
16 |
+
"step": 214
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7065026362038664,
|
21 |
+
"eval_f1": 0.3634053367217281,
|
22 |
+
"eval_loss": 0.49707961082458496,
|
23 |
+
"eval_mcc": 0.26187368637682734,
|
24 |
+
"eval_precision": 0.6559633027522935,
|
25 |
+
"eval_recall": 0.2513181019332162,
|
26 |
+
"eval_runtime": 3.1642,
|
27 |
+
"eval_samples_per_second": 539.474,
|
28 |
+
"eval_steps_per_second": 17.066,
|
29 |
+
"step": 214
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"grad_norm": 1.7037988901138306,
|
34 |
+
"learning_rate": 0.0004303602406805288,
|
35 |
+
"loss": 0.4856,
|
36 |
+
"step": 428
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 2.0,
|
40 |
+
"eval_accuracy": 0.7047451669595782,
|
41 |
+
"eval_f1": 0.5019762845849802,
|
42 |
+
"eval_loss": 0.4853743612766266,
|
43 |
+
"eval_mcc": 0.3014395863411175,
|
44 |
+
"eval_precision": 0.5733634311512416,
|
45 |
+
"eval_recall": 0.44639718804920914,
|
46 |
+
"eval_runtime": 3.2868,
|
47 |
+
"eval_samples_per_second": 519.356,
|
48 |
+
"eval_steps_per_second": 16.43,
|
49 |
+
"step": 428
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"epoch": 3.0,
|
53 |
+
"grad_norm": 1.7830451726913452,
|
54 |
+
"learning_rate": 0.0004064513384204994,
|
55 |
+
"loss": 0.4758,
|
56 |
+
"step": 642
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 3.0,
|
60 |
+
"eval_accuracy": 0.7018160515524312,
|
61 |
+
"eval_f1": 0.25909752547307136,
|
62 |
+
"eval_loss": 0.4858837425708771,
|
63 |
+
"eval_mcc": 0.24331468344161114,
|
64 |
+
"eval_precision": 0.7542372881355932,
|
65 |
+
"eval_recall": 0.15641476274165203,
|
66 |
+
"eval_runtime": 3.1648,
|
67 |
+
"eval_samples_per_second": 539.369,
|
68 |
+
"eval_steps_per_second": 17.063,
|
69 |
+
"step": 642
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 4.0,
|
73 |
+
"grad_norm": 1.6830062866210938,
|
74 |
+
"learning_rate": 0.0003825424361604701,
|
75 |
+
"loss": 0.4724,
|
76 |
+
"step": 856
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 4.0,
|
80 |
+
"eval_accuracy": 0.7275922671353251,
|
81 |
+
"eval_f1": 0.4015444015444016,
|
82 |
+
"eval_loss": 0.47459593415260315,
|
83 |
+
"eval_mcc": 0.32925006262083517,
|
84 |
+
"eval_precision": 0.75,
|
85 |
+
"eval_recall": 0.2741652021089631,
|
86 |
+
"eval_runtime": 3.7803,
|
87 |
+
"eval_samples_per_second": 451.553,
|
88 |
+
"eval_steps_per_second": 14.285,
|
89 |
+
"step": 856
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 5.0,
|
93 |
+
"grad_norm": 4.760202407836914,
|
94 |
+
"learning_rate": 0.0003586335339004407,
|
95 |
+
"loss": 0.4614,
|
96 |
+
"step": 1070
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 5.0,
|
100 |
+
"eval_accuracy": 0.7340363210310487,
|
101 |
+
"eval_f1": 0.4733178654292344,
|
102 |
+
"eval_loss": 0.4721404016017914,
|
103 |
+
"eval_mcc": 0.35044282313920905,
|
104 |
+
"eval_precision": 0.6962457337883959,
|
105 |
+
"eval_recall": 0.3585237258347979,
|
106 |
+
"eval_runtime": 3.1258,
|
107 |
+
"eval_samples_per_second": 546.093,
|
108 |
+
"eval_steps_per_second": 17.275,
|
109 |
+
"step": 1070
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 6.0,
|
113 |
+
"grad_norm": 2.5421228408813477,
|
114 |
+
"learning_rate": 0.0003347246316404113,
|
115 |
+
"loss": 0.4617,
|
116 |
+
"step": 1284
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 6.0,
|
120 |
+
"eval_accuracy": 0.7439953134153485,
|
121 |
+
"eval_f1": 0.5346112886048987,
|
122 |
+
"eval_loss": 0.4643152058124542,
|
123 |
+
"eval_mcc": 0.38505007336259955,
|
124 |
+
"eval_precision": 0.6783783783783783,
|
125 |
+
"eval_recall": 0.44112478031634444,
|
126 |
+
"eval_runtime": 3.278,
|
127 |
+
"eval_samples_per_second": 520.745,
|
128 |
+
"eval_steps_per_second": 16.473,
|
129 |
+
"step": 1284
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 7.0,
|
133 |
+
"grad_norm": 3.2574965953826904,
|
134 |
+
"learning_rate": 0.0003108157293803819,
|
135 |
+
"loss": 0.4555,
|
136 |
+
"step": 1498
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 7.0,
|
140 |
+
"eval_accuracy": 0.7416520210896309,
|
141 |
+
"eval_f1": 0.4562268803945746,
|
142 |
+
"eval_loss": 0.46802204847335815,
|
143 |
+
"eval_mcc": 0.3717088012670463,
|
144 |
+
"eval_precision": 0.7644628099173554,
|
145 |
+
"eval_recall": 0.3251318101933216,
|
146 |
+
"eval_runtime": 3.1628,
|
147 |
+
"eval_samples_per_second": 539.712,
|
148 |
+
"eval_steps_per_second": 17.073,
|
149 |
+
"step": 1498
|
150 |
+
}
|
151 |
+
],
|
152 |
+
"logging_steps": 500,
|
153 |
+
"max_steps": 4280,
|
154 |
+
"num_input_tokens_seen": 0,
|
155 |
+
"num_train_epochs": 20,
|
156 |
+
"save_steps": 500,
|
157 |
+
"total_flos": 3673429651080.0,
|
158 |
+
"train_batch_size": 32,
|
159 |
+
"trial_name": null,
|
160 |
+
"trial_params": {
|
161 |
+
"alpha": 0.8544594605661776,
|
162 |
+
"learning_rate": 0.0004781780452005876,
|
163 |
+
"num_train_epochs": 20,
|
164 |
+
"temperature": 15
|
165 |
+
}
|
166 |
+
}
|
run-14/checkpoint-1498/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d21d17ecb931e756a0d7102c83f58b746c9a153fe99764512b52a17889afc5fc
|
3 |
+
size 5048
|
run-14/checkpoint-1498/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-16/checkpoint-3210/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"id2label": {
|
12 |
+
"0": "negative",
|
13 |
+
"1": "positive"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 512,
|
17 |
+
"label2id": {
|
18 |
+
"negative": "0",
|
19 |
+
"positive": "1"
|
20 |
+
},
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 2,
|
25 |
+
"num_hidden_layers": 2,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"position_embedding_type": "absolute",
|
28 |
+
"problem_type": "single_label_classification",
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.40.0",
|
31 |
+
"type_vocab_size": 2,
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 30522
|
34 |
+
}
|
run-16/checkpoint-3210/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1794227aab6c8d418f3822b89efba6e0d1e03751a74dfc9440e762b28ce224e7
|
3 |
+
size 17549312
|
run-16/checkpoint-3210/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f067f7bb8c4d166c28277e8ed0c02f05d819b0e2d6048bac11598bfbb0417b2
|
3 |
+
size 35123898
|
run-16/checkpoint-3210/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d81d96bfaf26d44cf1a0c72fb1dd07a53054407225c96066b3cf891d113419f6
|
3 |
+
size 14308
|
run-16/checkpoint-3210/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16867dee2a8ccb217940321e4e2b4d5bc2422ef1bd429064848e1bcd894f0c52
|
3 |
+
size 1064
|
run-16/checkpoint-3210/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-16/checkpoint-3210/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-16/checkpoint-3210/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
run-16/checkpoint-3210/trainer_state.json
ADDED
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7955477445811365,
|
3 |
+
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-16/checkpoint-3210",
|
4 |
+
"epoch": 15.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 3210,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 7.164041519165039,
|
14 |
+
"learning_rate": 0.0004107110800529193,
|
15 |
+
"loss": 0.5976,
|
16 |
+
"step": 214
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7182190978324546,
|
21 |
+
"eval_f1": 0.49208025343189016,
|
22 |
+
"eval_loss": 0.5573095679283142,
|
23 |
+
"eval_mcc": 0.32024447377810733,
|
24 |
+
"eval_precision": 0.6164021164021164,
|
25 |
+
"eval_recall": 0.4094903339191564,
|
26 |
+
"eval_runtime": 3.1533,
|
27 |
+
"eval_samples_per_second": 541.343,
|
28 |
+
"eval_steps_per_second": 17.125,
|
29 |
+
"step": 214
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"grad_norm": 2.810741424560547,
|
34 |
+
"learning_rate": 0.00038909470741855514,
|
35 |
+
"loss": 0.535,
|
36 |
+
"step": 428
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 2.0,
|
40 |
+
"eval_accuracy": 0.7299355594610427,
|
41 |
+
"eval_f1": 0.5719591457753018,
|
42 |
+
"eval_loss": 0.5309674143791199,
|
43 |
+
"eval_mcc": 0.3769093274057024,
|
44 |
+
"eval_precision": 0.6062992125984252,
|
45 |
+
"eval_recall": 0.5413005272407733,
|
46 |
+
"eval_runtime": 3.2455,
|
47 |
+
"eval_samples_per_second": 525.953,
|
48 |
+
"eval_steps_per_second": 16.638,
|
49 |
+
"step": 428
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"epoch": 3.0,
|
53 |
+
"grad_norm": 2.5110363960266113,
|
54 |
+
"learning_rate": 0.00036747833478419095,
|
55 |
+
"loss": 0.51,
|
56 |
+
"step": 642
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 3.0,
|
60 |
+
"eval_accuracy": 0.7527826596367897,
|
61 |
+
"eval_f1": 0.5290178571428571,
|
62 |
+
"eval_loss": 0.5084466934204102,
|
63 |
+
"eval_mcc": 0.4042060476032326,
|
64 |
+
"eval_precision": 0.7247706422018348,
|
65 |
+
"eval_recall": 0.4165202108963093,
|
66 |
+
"eval_runtime": 3.1369,
|
67 |
+
"eval_samples_per_second": 544.167,
|
68 |
+
"eval_steps_per_second": 17.214,
|
69 |
+
"step": 642
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 4.0,
|
73 |
+
"grad_norm": 2.1826329231262207,
|
74 |
+
"learning_rate": 0.0003458619621498268,
|
75 |
+
"loss": 0.4995,
|
76 |
+
"step": 856
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 4.0,
|
80 |
+
"eval_accuracy": 0.7451669595782073,
|
81 |
+
"eval_f1": 0.49241540256709454,
|
82 |
+
"eval_loss": 0.5046694278717041,
|
83 |
+
"eval_mcc": 0.38160728386386483,
|
84 |
+
"eval_precision": 0.7326388888888888,
|
85 |
+
"eval_recall": 0.37082601054481545,
|
86 |
+
"eval_runtime": 3.3202,
|
87 |
+
"eval_samples_per_second": 514.123,
|
88 |
+
"eval_steps_per_second": 16.264,
|
89 |
+
"step": 856
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 5.0,
|
93 |
+
"grad_norm": 6.0804123878479,
|
94 |
+
"learning_rate": 0.0003242455895154626,
|
95 |
+
"loss": 0.4853,
|
96 |
+
"step": 1070
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 5.0,
|
100 |
+
"eval_accuracy": 0.7574692442882249,
|
101 |
+
"eval_f1": 0.6393728222996515,
|
102 |
+
"eval_loss": 0.4947313964366913,
|
103 |
+
"eval_mcc": 0.45673224487908876,
|
104 |
+
"eval_precision": 0.6338514680483592,
|
105 |
+
"eval_recall": 0.6449912126537786,
|
106 |
+
"eval_runtime": 3.1788,
|
107 |
+
"eval_samples_per_second": 536.989,
|
108 |
+
"eval_steps_per_second": 16.987,
|
109 |
+
"step": 1070
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 6.0,
|
113 |
+
"grad_norm": 2.1514229774475098,
|
114 |
+
"learning_rate": 0.0003026292168810984,
|
115 |
+
"loss": 0.4724,
|
116 |
+
"step": 1284
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 6.0,
|
120 |
+
"eval_accuracy": 0.7650849443468073,
|
121 |
+
"eval_f1": 0.6598812553011025,
|
122 |
+
"eval_loss": 0.477894127368927,
|
123 |
+
"eval_mcc": 0.4814730312998762,
|
124 |
+
"eval_precision": 0.6377049180327868,
|
125 |
+
"eval_recall": 0.6836555360281195,
|
126 |
+
"eval_runtime": 3.2099,
|
127 |
+
"eval_samples_per_second": 531.785,
|
128 |
+
"eval_steps_per_second": 16.823,
|
129 |
+
"step": 1284
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 7.0,
|
133 |
+
"grad_norm": 5.599643230438232,
|
134 |
+
"learning_rate": 0.0002810128442467343,
|
135 |
+
"loss": 0.46,
|
136 |
+
"step": 1498
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 7.0,
|
140 |
+
"eval_accuracy": 0.773286467486819,
|
141 |
+
"eval_f1": 0.5798045602605862,
|
142 |
+
"eval_loss": 0.47905173897743225,
|
143 |
+
"eval_mcc": 0.45971678817188244,
|
144 |
+
"eval_precision": 0.7585227272727273,
|
145 |
+
"eval_recall": 0.46924428822495606,
|
146 |
+
"eval_runtime": 3.1625,
|
147 |
+
"eval_samples_per_second": 539.761,
|
148 |
+
"eval_steps_per_second": 17.075,
|
149 |
+
"step": 1498
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 8.0,
|
153 |
+
"grad_norm": 3.2664639949798584,
|
154 |
+
"learning_rate": 0.0002593964716123701,
|
155 |
+
"loss": 0.4428,
|
156 |
+
"step": 1712
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 8.0,
|
160 |
+
"eval_accuracy": 0.7885178676039836,
|
161 |
+
"eval_f1": 0.6386386386386386,
|
162 |
+
"eval_loss": 0.4652605950832367,
|
163 |
+
"eval_mcc": 0.5028818263800968,
|
164 |
+
"eval_precision": 0.7418604651162791,
|
165 |
+
"eval_recall": 0.5606326889279437,
|
166 |
+
"eval_runtime": 3.2551,
|
167 |
+
"eval_samples_per_second": 524.404,
|
168 |
+
"eval_steps_per_second": 16.589,
|
169 |
+
"step": 1712
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"epoch": 9.0,
|
173 |
+
"grad_norm": 5.06269645690918,
|
174 |
+
"learning_rate": 0.00023778009897800593,
|
175 |
+
"loss": 0.4442,
|
176 |
+
"step": 1926
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 9.0,
|
180 |
+
"eval_accuracy": 0.7773872290568249,
|
181 |
+
"eval_f1": 0.6008403361344538,
|
182 |
+
"eval_loss": 0.47534701228141785,
|
183 |
+
"eval_mcc": 0.4716667178372095,
|
184 |
+
"eval_precision": 0.7467362924281984,
|
185 |
+
"eval_recall": 0.5026362038664324,
|
186 |
+
"eval_runtime": 3.1553,
|
187 |
+
"eval_samples_per_second": 541.002,
|
188 |
+
"eval_steps_per_second": 17.114,
|
189 |
+
"step": 1926
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 10.0,
|
193 |
+
"grad_norm": 5.0700459480285645,
|
194 |
+
"learning_rate": 0.00021616372634364174,
|
195 |
+
"loss": 0.4307,
|
196 |
+
"step": 2140
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 10.0,
|
200 |
+
"eval_accuracy": 0.7838312829525483,
|
201 |
+
"eval_f1": 0.6666666666666666,
|
202 |
+
"eval_loss": 0.47036686539649963,
|
203 |
+
"eval_mcc": 0.5073397319926002,
|
204 |
+
"eval_precision": 0.6858736059479554,
|
205 |
+
"eval_recall": 0.648506151142355,
|
206 |
+
"eval_runtime": 3.3854,
|
207 |
+
"eval_samples_per_second": 504.231,
|
208 |
+
"eval_steps_per_second": 15.951,
|
209 |
+
"step": 2140
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"epoch": 11.0,
|
213 |
+
"grad_norm": 6.991399765014648,
|
214 |
+
"learning_rate": 0.00019454735370927757,
|
215 |
+
"loss": 0.4243,
|
216 |
+
"step": 2354
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"epoch": 11.0,
|
220 |
+
"eval_accuracy": 0.7867603983596954,
|
221 |
+
"eval_f1": 0.636,
|
222 |
+
"eval_loss": 0.4695989191532135,
|
223 |
+
"eval_mcc": 0.49868088180967785,
|
224 |
+
"eval_precision": 0.7378190255220418,
|
225 |
+
"eval_recall": 0.5588752196836555,
|
226 |
+
"eval_runtime": 3.1405,
|
227 |
+
"eval_samples_per_second": 543.539,
|
228 |
+
"eval_steps_per_second": 17.195,
|
229 |
+
"step": 2354
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"epoch": 12.0,
|
233 |
+
"grad_norm": 5.208732604980469,
|
234 |
+
"learning_rate": 0.0001729309810749134,
|
235 |
+
"loss": 0.4198,
|
236 |
+
"step": 2568
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 12.0,
|
240 |
+
"eval_accuracy": 0.7896895137668424,
|
241 |
+
"eval_f1": 0.666046511627907,
|
242 |
+
"eval_loss": 0.45460793375968933,
|
243 |
+
"eval_mcc": 0.5152127188733497,
|
244 |
+
"eval_precision": 0.7075098814229249,
|
245 |
+
"eval_recall": 0.6291739894551845,
|
246 |
+
"eval_runtime": 3.8567,
|
247 |
+
"eval_samples_per_second": 442.612,
|
248 |
+
"eval_steps_per_second": 14.002,
|
249 |
+
"step": 2568
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 13.0,
|
253 |
+
"grad_norm": 16.56888771057129,
|
254 |
+
"learning_rate": 0.0001513146084405492,
|
255 |
+
"loss": 0.4199,
|
256 |
+
"step": 2782
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 13.0,
|
260 |
+
"eval_accuracy": 0.7908611599297012,
|
261 |
+
"eval_f1": 0.6641580432737535,
|
262 |
+
"eval_loss": 0.46026891469955444,
|
263 |
+
"eval_mcc": 0.5161067927608803,
|
264 |
+
"eval_precision": 0.7145748987854251,
|
265 |
+
"eval_recall": 0.6203866432337434,
|
266 |
+
"eval_runtime": 3.1632,
|
267 |
+
"eval_samples_per_second": 539.639,
|
268 |
+
"eval_steps_per_second": 17.071,
|
269 |
+
"step": 2782
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"epoch": 14.0,
|
273 |
+
"grad_norm": 6.45969820022583,
|
274 |
+
"learning_rate": 0.00012969823580618505,
|
275 |
+
"loss": 0.4135,
|
276 |
+
"step": 2996
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"epoch": 14.0,
|
280 |
+
"eval_accuracy": 0.789103690685413,
|
281 |
+
"eval_f1": 0.6622889305816135,
|
282 |
+
"eval_loss": 0.4559510350227356,
|
283 |
+
"eval_mcc": 0.5124487501229456,
|
284 |
+
"eval_precision": 0.710261569416499,
|
285 |
+
"eval_recall": 0.6203866432337434,
|
286 |
+
"eval_runtime": 3.2281,
|
287 |
+
"eval_samples_per_second": 528.794,
|
288 |
+
"eval_steps_per_second": 16.728,
|
289 |
+
"step": 2996
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 15.0,
|
293 |
+
"grad_norm": 1.7556620836257935,
|
294 |
+
"learning_rate": 0.00010808186317182087,
|
295 |
+
"loss": 0.4066,
|
296 |
+
"step": 3210
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 15.0,
|
300 |
+
"eval_accuracy": 0.7955477445811365,
|
301 |
+
"eval_f1": 0.6666666666666667,
|
302 |
+
"eval_loss": 0.4586973190307617,
|
303 |
+
"eval_mcc": 0.5249372343364972,
|
304 |
+
"eval_precision": 0.7301255230125523,
|
305 |
+
"eval_recall": 0.6133567662565905,
|
306 |
+
"eval_runtime": 3.1701,
|
307 |
+
"eval_samples_per_second": 538.473,
|
308 |
+
"eval_steps_per_second": 17.034,
|
309 |
+
"step": 3210
|
310 |
+
}
|
311 |
+
],
|
312 |
+
"logging_steps": 500,
|
313 |
+
"max_steps": 4280,
|
314 |
+
"num_input_tokens_seen": 0,
|
315 |
+
"num_train_epochs": 20,
|
316 |
+
"save_steps": 500,
|
317 |
+
"total_flos": 7871634966600.0,
|
318 |
+
"train_batch_size": 32,
|
319 |
+
"trial_name": null,
|
320 |
+
"trial_params": {
|
321 |
+
"alpha": 0.9950627760257441,
|
322 |
+
"learning_rate": 0.0004323274526872835,
|
323 |
+
"num_train_epochs": 20,
|
324 |
+
"temperature": 49
|
325 |
+
}
|
326 |
+
}
|
run-16/checkpoint-3210/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff195ba85c606fb318a57c554ce7a249220632fcc5b586bd0765e5c39f5b3318
|
3 |
+
size 5048
|
run-16/checkpoint-3210/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-16/checkpoint-4280/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"id2label": {
|
12 |
+
"0": "negative",
|
13 |
+
"1": "positive"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 512,
|
17 |
+
"label2id": {
|
18 |
+
"negative": "0",
|
19 |
+
"positive": "1"
|
20 |
+
},
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 2,
|
25 |
+
"num_hidden_layers": 2,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"position_embedding_type": "absolute",
|
28 |
+
"problem_type": "single_label_classification",
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.40.0",
|
31 |
+
"type_vocab_size": 2,
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 30522
|
34 |
+
}
|
run-16/checkpoint-4280/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3add293f38ca0b80637e05887ba0d309c2237445c6827c13264b78c6919ebd15
|
3 |
+
size 17549312
|
run-16/checkpoint-4280/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ec8c06b32062d5b576876e101466e010adc48d50f73dfee20779cc7303ebca9
|
3 |
+
size 35123898
|
run-16/checkpoint-4280/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fedd0769bd89f675594ef0b80df3dcdb1022bf556ed34e9977e3b0698bd0ffa6
|
3 |
+
size 14308
|
run-16/checkpoint-4280/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64b21394f492680851e1ec282e48b1372dc0b0c4ef3b0510796d5ab835cf6453
|
3 |
+
size 1064
|
run-16/checkpoint-4280/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-16/checkpoint-4280/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-16/checkpoint-4280/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|