xuancoblab2023 commited on
Commit
8d1c664
·
verified ·
1 Parent(s): 505b478

Training in progress, epoch 1

Browse files
logs/events.out.tfevents.1709875287.6c1c016b0e3e.7263.10 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2bcab083891fd620f61ecea3f0e974684edae28e1d635b7db83b0e20171e340
3
- size 5984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4327b87919dbbe94f8e79a4891c578c3a1b7e1ea3caa619c6a632bd7c54f00b9
3
+ size 9753
logs/events.out.tfevents.1709876067.6c1c016b0e3e.7263.11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6d98aed345117a010060b30bf0c50ede7b829e269a95a23a3f2d664634f193
3
+ size 5314
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c776f2b49c9ca0ef99506874999e86d97cc1a9217be90a32ee5b80456d1e3d6e
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361284d6cbbbf0747d1d2496b9712460a833e47055619c1ce3d78588bf871550
3
  size 17549312
run-10/checkpoint-288/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90544cd346f84f4589af99a78110877a58e704cb23b6f374ebdfba5b1f7654c5
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16fde600bac51204144e3af5549aadff6874a092e8bba58ee66b59cf759a6f1c
3
  size 17549312
run-10/checkpoint-288/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c3fc2822ded408ee92a83ac988187d1edba9601e41f58ea728289e3b9381cb
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2537703686efc188dccb3cb6511447099a3ded3581554b578e100ed57db6d64d
3
  size 35122746
run-10/checkpoint-288/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba3c2bb1d96a39a28ae1b66f234e351e3c13c700fc97347efd6308798b151790
3
  size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc277e7d1d83522a853f43efa19a0dd29e8896ab414cd166ac88116bce74f64
3
  size 14054
run-10/checkpoint-288/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c115d3f4c35a47d3adb59e1f87f96436fbc076b7e6a682ba8005683cfea2d74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733baf3bbf0bc4e04a501520755b8d276b20695f85c3038a12c240464c860b0c
3
  size 1064
run-10/checkpoint-288/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8297455968688845,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-288",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 288,
7
  "is_hyper_param_search": true,
@@ -10,74 +10,131 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.9170680046081543,
14
- "learning_rate": 0.0005212663608648181,
15
- "loss": 0.5641,
16
- "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.799412915851272,
21
- "eval_f1": 0.8197009674582234,
22
- "eval_loss": 0.47259676456451416,
23
- "eval_precision": 0.744408945686901,
24
- "eval_recall": 0.9119373776908023,
25
- "eval_runtime": 30.8424,
26
- "eval_samples_per_second": 33.136,
27
- "eval_steps_per_second": 1.038,
 
 
 
 
 
 
 
28
  "step": 96
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 2.4848690032958984,
33
- "learning_rate": 0.00026063318043240905,
34
- "loss": 0.456,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "step": 192
36
  },
37
  {
38
- "epoch": 2.0,
39
- "eval_accuracy": 0.824853228962818,
40
- "eval_f1": 0.8406055209260909,
41
- "eval_loss": 0.4349122643470764,
42
- "eval_precision": 0.7712418300653595,
43
- "eval_recall": 0.923679060665362,
44
- "eval_runtime": 31.1515,
45
- "eval_samples_per_second": 32.807,
46
- "eval_steps_per_second": 1.027,
47
  "step": 192
48
  },
49
  {
50
- "epoch": 3.0,
51
- "grad_norm": 4.3911333084106445,
52
- "learning_rate": 0.0,
53
- "loss": 0.4063,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  "step": 288
55
  },
56
  {
57
- "epoch": 3.0,
58
- "eval_accuracy": 0.8297455968688845,
59
- "eval_f1": 0.8421052631578947,
60
- "eval_loss": 0.42057597637176514,
61
- "eval_precision": 0.7851099830795262,
62
- "eval_recall": 0.9080234833659491,
63
- "eval_runtime": 32.1366,
64
- "eval_samples_per_second": 31.802,
65
- "eval_steps_per_second": 0.996,
66
  "step": 288
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 288,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 3,
73
  "save_steps": 500,
74
- "total_flos": 707085591840.0,
75
- "train_batch_size": 32,
76
  "trial_name": null,
77
  "trial_params": {
78
- "alpha": 0.9610512963031814,
79
- "learning_rate": 0.0007818995412972273,
80
- "num_train_epochs": 3,
81
- "temperature": 24
82
  }
83
  }
 
1
  {
2
+ "best_metric": 0.8140900195694716,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-288",
4
+ "epoch": 6.0,
5
  "eval_steps": 500,
6
  "global_step": 288,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.5607351064682007,
14
+ "learning_rate": 0.0008549093212842401,
15
+ "loss": 0.5996,
16
+ "step": 48
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7632093933463796,
21
+ "eval_f1": 0.7949152542372881,
22
+ "eval_loss": 0.502604067325592,
23
+ "eval_precision": 0.7010463378176383,
24
+ "eval_recall": 0.9178082191780822,
25
+ "eval_runtime": 27.7883,
26
+ "eval_samples_per_second": 36.778,
27
+ "eval_steps_per_second": 0.576,
28
+ "step": 48
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 3.883798122406006,
33
+ "learning_rate": 0.0007124244344035335,
34
+ "loss": 0.4919,
35
  "step": 96
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7847358121330724,
40
+ "eval_f1": 0.8066783831282953,
41
+ "eval_loss": 0.4441834092140198,
42
+ "eval_precision": 0.7320574162679426,
43
+ "eval_recall": 0.898238747553816,
44
+ "eval_runtime": 27.0257,
45
+ "eval_samples_per_second": 37.816,
46
+ "eval_steps_per_second": 0.592,
47
+ "step": 96
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 3.711146116256714,
52
+ "learning_rate": 0.0005699395475228268,
53
+ "loss": 0.449,
54
+ "step": 144
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.799412915851272,
59
+ "eval_f1": 0.8222029488291415,
60
+ "eval_loss": 0.46845388412475586,
61
+ "eval_precision": 0.7383177570093458,
62
+ "eval_recall": 0.9275929549902152,
63
+ "eval_runtime": 27.3696,
64
+ "eval_samples_per_second": 37.341,
65
+ "eval_steps_per_second": 0.585,
66
+ "step": 144
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 4.223331451416016,
71
+ "learning_rate": 0.0004274546606421201,
72
+ "loss": 0.4144,
73
  "step": 192
74
  },
75
  {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.8003913894324853,
78
+ "eval_f1": 0.796812749003984,
79
+ "eval_loss": 0.45222949981689453,
80
+ "eval_precision": 0.8113590263691683,
81
+ "eval_recall": 0.7827788649706457,
82
+ "eval_runtime": 27.0074,
83
+ "eval_samples_per_second": 37.841,
84
+ "eval_steps_per_second": 0.592,
85
  "step": 192
86
  },
87
  {
88
+ "epoch": 5.0,
89
+ "grad_norm": 2.6906895637512207,
90
+ "learning_rate": 0.0002849697737614134,
91
+ "loss": 0.3883,
92
+ "step": 240
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.8033268101761253,
97
+ "eval_f1": 0.8280581693755347,
98
+ "eval_loss": 0.4314705431461334,
99
+ "eval_precision": 0.7355623100303952,
100
+ "eval_recall": 0.9471624266144814,
101
+ "eval_runtime": 27.3713,
102
+ "eval_samples_per_second": 37.338,
103
+ "eval_steps_per_second": 0.585,
104
+ "step": 240
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 2.0857512950897217,
109
+ "learning_rate": 0.0001424848868807067,
110
+ "loss": 0.3715,
111
  "step": 288
112
  },
113
  {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.8140900195694716,
116
+ "eval_f1": 0.8327464788732395,
117
+ "eval_loss": 0.4203811585903168,
118
+ "eval_precision": 0.7568,
119
+ "eval_recall": 0.9256360078277887,
120
+ "eval_runtime": 27.3476,
121
+ "eval_samples_per_second": 37.371,
122
+ "eval_steps_per_second": 0.585,
123
  "step": 288
124
  }
125
  ],
126
  "logging_steps": 500,
127
+ "max_steps": 336,
128
  "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 7,
130
  "save_steps": 500,
131
+ "total_flos": 1414171183680.0,
132
+ "train_batch_size": 64,
133
  "trial_name": null,
134
  "trial_params": {
135
+ "alpha": 0.9873851004059778,
136
+ "learning_rate": 0.0009973942081649468,
137
+ "num_train_epochs": 7,
138
+ "temperature": 16
139
  }
140
  }
run-10/checkpoint-288/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c48cb920a64a7322ad1d6d741321a643be4ed10af970a24a154150a0def2990
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb76553ba5d5fd16067059456c55e07ddbd2f0f51720f91f3d3e0bb7e3a6405
3
  size 4920
run-10/checkpoint-336/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-10/checkpoint-336/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8fa0c6189436d5f7ed954aecc452f28c14a2f60bae8edc6e3af80084c3c2eb
3
+ size 17549312
run-10/checkpoint-336/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05006656683bf23068bad41417e56cd397242a22b1831ffd9d753f9ee90bcb0e
3
+ size 35122746
run-10/checkpoint-336/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30720e851cc54860df5aa89a6892389c4264a6061ff42a4eef7ff950f504b083
3
+ size 14054
run-10/checkpoint-336/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f8a700295948ab1cf174647dfa8e4b564d6690fbaaafa7ad04ce093ef700034
3
+ size 1064
run-10/checkpoint-336/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-10/checkpoint-336/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-10/checkpoint-336/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-10/checkpoint-336/trainer_state.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.824853228962818,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-336",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 336,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.5607351064682007,
14
+ "learning_rate": 0.0008549093212842401,
15
+ "loss": 0.5996,
16
+ "step": 48
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7632093933463796,
21
+ "eval_f1": 0.7949152542372881,
22
+ "eval_loss": 0.502604067325592,
23
+ "eval_precision": 0.7010463378176383,
24
+ "eval_recall": 0.9178082191780822,
25
+ "eval_runtime": 27.7883,
26
+ "eval_samples_per_second": 36.778,
27
+ "eval_steps_per_second": 0.576,
28
+ "step": 48
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 3.883798122406006,
33
+ "learning_rate": 0.0007124244344035335,
34
+ "loss": 0.4919,
35
+ "step": 96
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.7847358121330724,
40
+ "eval_f1": 0.8066783831282953,
41
+ "eval_loss": 0.4441834092140198,
42
+ "eval_precision": 0.7320574162679426,
43
+ "eval_recall": 0.898238747553816,
44
+ "eval_runtime": 27.0257,
45
+ "eval_samples_per_second": 37.816,
46
+ "eval_steps_per_second": 0.592,
47
+ "step": 96
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 3.711146116256714,
52
+ "learning_rate": 0.0005699395475228268,
53
+ "loss": 0.449,
54
+ "step": 144
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.799412915851272,
59
+ "eval_f1": 0.8222029488291415,
60
+ "eval_loss": 0.46845388412475586,
61
+ "eval_precision": 0.7383177570093458,
62
+ "eval_recall": 0.9275929549902152,
63
+ "eval_runtime": 27.3696,
64
+ "eval_samples_per_second": 37.341,
65
+ "eval_steps_per_second": 0.585,
66
+ "step": 144
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 4.223331451416016,
71
+ "learning_rate": 0.0004274546606421201,
72
+ "loss": 0.4144,
73
+ "step": 192
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.8003913894324853,
78
+ "eval_f1": 0.796812749003984,
79
+ "eval_loss": 0.45222949981689453,
80
+ "eval_precision": 0.8113590263691683,
81
+ "eval_recall": 0.7827788649706457,
82
+ "eval_runtime": 27.0074,
83
+ "eval_samples_per_second": 37.841,
84
+ "eval_steps_per_second": 0.592,
85
+ "step": 192
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 2.6906895637512207,
90
+ "learning_rate": 0.0002849697737614134,
91
+ "loss": 0.3883,
92
+ "step": 240
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.8033268101761253,
97
+ "eval_f1": 0.8280581693755347,
98
+ "eval_loss": 0.4314705431461334,
99
+ "eval_precision": 0.7355623100303952,
100
+ "eval_recall": 0.9471624266144814,
101
+ "eval_runtime": 27.3713,
102
+ "eval_samples_per_second": 37.338,
103
+ "eval_steps_per_second": 0.585,
104
+ "step": 240
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 2.0857512950897217,
109
+ "learning_rate": 0.0001424848868807067,
110
+ "loss": 0.3715,
111
+ "step": 288
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.8140900195694716,
116
+ "eval_f1": 0.8327464788732395,
117
+ "eval_loss": 0.4203811585903168,
118
+ "eval_precision": 0.7568,
119
+ "eval_recall": 0.9256360078277887,
120
+ "eval_runtime": 27.3476,
121
+ "eval_samples_per_second": 37.371,
122
+ "eval_steps_per_second": 0.585,
123
+ "step": 288
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 2.2891719341278076,
128
+ "learning_rate": 0.0,
129
+ "loss": 0.3626,
130
+ "step": 336
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.824853228962818,
135
+ "eval_f1": 0.8359303391384051,
136
+ "eval_loss": 0.4121144413948059,
137
+ "eval_precision": 0.7862068965517242,
138
+ "eval_recall": 0.8923679060665362,
139
+ "eval_runtime": 30.2756,
140
+ "eval_samples_per_second": 33.757,
141
+ "eval_steps_per_second": 0.528,
142
+ "step": 336
143
+ }
144
+ ],
145
+ "logging_steps": 500,
146
+ "max_steps": 336,
147
+ "num_input_tokens_seen": 0,
148
+ "num_train_epochs": 7,
149
+ "save_steps": 500,
150
+ "total_flos": 1649866380960.0,
151
+ "train_batch_size": 64,
152
+ "trial_name": null,
153
+ "trial_params": {
154
+ "alpha": 0.9873851004059778,
155
+ "learning_rate": 0.0009973942081649468,
156
+ "num_train_epochs": 7,
157
+ "temperature": 16
158
+ }
159
+ }
run-10/checkpoint-336/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb76553ba5d5fd16067059456c55e07ddbd2f0f51720f91f3d3e0bb7e3a6405
3
+ size 4920
run-10/checkpoint-336/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-11/checkpoint-48/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-11/checkpoint-48/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361284d6cbbbf0747d1d2496b9712460a833e47055619c1ce3d78588bf871550
3
+ size 17549312
run-11/checkpoint-48/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c25644a65bdf41de7ee0309c13f675afc49f167008ba62ccb37e4e30760af0c
3
+ size 35122746
run-11/checkpoint-48/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f731098665929ed6a815501631b54240caaac6508207c5c55cf0fe36ad39b17
3
+ size 14054
run-11/checkpoint-48/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678e7fd168382ae4d233368ea59a020ab00987ed0913f4b94fa8eb4e73fa2007
3
+ size 1064
run-11/checkpoint-48/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-11/checkpoint-48/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-11/checkpoint-48/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-11/checkpoint-48/trainer_state.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7524461839530333,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-48",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 48,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 2.152926206588745,
14
+ "learning_rate": 0.0007816128679887146,
15
+ "loss": 0.591,
16
+ "step": 48
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7524461839530333,
21
+ "eval_f1": 0.7944760357432982,
22
+ "eval_loss": 0.4880000948905945,
23
+ "eval_precision": 0.6791666666666667,
24
+ "eval_recall": 0.9569471624266145,
25
+ "eval_runtime": 29.551,
26
+ "eval_samples_per_second": 34.584,
27
+ "eval_steps_per_second": 0.541,
28
+ "step": 48
29
+ }
30
+ ],
31
+ "logging_steps": 500,
32
+ "max_steps": 336,
33
+ "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 7,
35
+ "save_steps": 500,
36
+ "total_flos": 235695197280.0,
37
+ "train_batch_size": 64,
38
+ "trial_name": null,
39
+ "trial_params": {
40
+ "alpha": 0.9971844266637861,
41
+ "learning_rate": 0.0009118816793201671,
42
+ "num_train_epochs": 7,
43
+ "temperature": 16
44
+ }
45
+ }
run-11/checkpoint-48/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a283f7ca8fa712756294b7fd6d3247bcb44ee70c9bb9ae961204f34de91039c
3
+ size 4920
run-11/checkpoint-48/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb76553ba5d5fd16067059456c55e07ddbd2f0f51720f91f3d3e0bb7e3a6405
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a283f7ca8fa712756294b7fd6d3247bcb44ee70c9bb9ae961204f34de91039c
3
  size 4920