xuancoblab2023 commited on
Commit
18c59ec
·
verified ·
1 Parent(s): 2e74145

Training in progress, epoch 1

Browse files
logs/events.out.tfevents.1713611228.a9446dbff3d4.7440.6 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:161b9590117795e641238dbac929a15742b18a64c71c8dd87c4f862fb45d992b
3
- size 6213
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd5f4ef6f503bbeea619b625ca70bb17949889eca0ee7298a6005c753f0367e
3
+ size 9487
logs/events.out.tfevents.1713611290.a9446dbff3d4.7440.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89002f3b5587b2cd8b8b4c3aa110bbd0b302ed4ad939519f75fdab1c4f2d1d65
3
+ size 5482
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0931b44185dc698bb65363708841216b9c6ddc90939480171e6ffcb248ed7413
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce6f22498e13b48d747633e47b75424c85f9718e6954a26e7dabeb1bd1f9f68
3
  size 17549312
run-4/checkpoint-960/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.40.0",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-4/checkpoint-960/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9cb480c01b2955ab4659777f56d6468bbadc585e4291f6207b02514eb20e61e
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:803979e64c4edd3b8ba91d42e52a6b0829ae1c9b4780949b82cda4ed909e5e16
3
  size 17549312
run-4/checkpoint-960/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da3592e83ad14aebe9c980b18c0dc1ac9c15623e49972208fc3fb854289268a0
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876c395bed9fe84ba07e5cda2b5c9a4dbec31579c3134bd1130045f1b0e624e5
3
+ size 35123898
run-4/checkpoint-960/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bc70fadfc325a9e3454d9d9f633805a30a7b6edb6e2dafe2e6da349874f75d0
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04833703a2abb12fa47ad4211546498a7ae2dcd9a28a03549753cff4beb5c8aa
3
+ size 14308
run-4/checkpoint-960/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:377fe6940de29d4b1b362bdb64bc5a7963c0c1099ce7d7c87c76a44e0533c320
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d377d0cb04c26cce9c5555565bd6ca5a1df431c694a93ec3e91cbcb8e72bc5c
3
  size 1064
run-4/checkpoint-960/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8258317025440313,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-960",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
  "global_step": 960,
7
  "is_hyper_param_search": true,
@@ -10,207 +10,137 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.90240478515625,
14
- "learning_rate": 0.0008040462940429662,
15
- "loss": 0.4563,
16
- "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7446183953033269,
21
- "eval_f1": 0.7473378509196515,
22
- "eval_loss": 0.415781170129776,
23
- "eval_precision": 0.7394636015325671,
24
- "eval_recall": 0.7553816046966731,
25
- "eval_runtime": 25.3619,
26
- "eval_samples_per_second": 40.297,
27
- "eval_steps_per_second": 1.262,
28
- "step": 96
 
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 0.7548394799232483,
33
- "learning_rate": 0.0007147078169270811,
34
- "loss": 0.4243,
35
- "step": 192
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7896281800391389,
40
- "eval_f1": 0.7969782813975448,
41
- "eval_loss": 0.39640527963638306,
42
- "eval_precision": 0.7700729927007299,
43
- "eval_recall": 0.8258317025440313,
44
- "eval_runtime": 25.8877,
45
- "eval_samples_per_second": 39.478,
46
- "eval_steps_per_second": 1.236,
47
- "step": 192
 
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 2.091404914855957,
52
- "learning_rate": 0.0006253693398111959,
53
- "loss": 0.4007,
54
- "step": 288
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8052837573385518,
59
- "eval_f1": 0.8240495137046862,
60
- "eval_loss": 0.39981088042259216,
61
- "eval_precision": 0.7516129032258064,
62
- "eval_recall": 0.9119373776908023,
63
- "eval_runtime": 25.886,
64
- "eval_samples_per_second": 39.481,
65
- "eval_steps_per_second": 1.236,
66
- "step": 288
 
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.994834542274475,
71
- "learning_rate": 0.0005360308626953108,
72
- "loss": 0.3871,
73
- "step": 384
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8082191780821918,
78
- "eval_f1": 0.8175046554934823,
79
- "eval_loss": 0.38285842537879944,
80
- "eval_precision": 0.7797513321492007,
81
- "eval_recall": 0.8590998043052838,
82
- "eval_runtime": 25.8573,
83
- "eval_samples_per_second": 39.525,
84
- "eval_steps_per_second": 1.238,
85
- "step": 384
 
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.7219040393829346,
90
- "learning_rate": 0.00044669238557942565,
91
- "loss": 0.378,
92
- "step": 480
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.773972602739726,
97
- "eval_f1": 0.7640449438202248,
98
- "eval_loss": 0.39512935280799866,
99
- "eval_precision": 0.7991452991452992,
100
- "eval_recall": 0.7318982387475538,
101
- "eval_runtime": 25.8711,
102
- "eval_samples_per_second": 39.503,
103
- "eval_steps_per_second": 1.237,
104
- "step": 480
 
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.2725290060043335,
109
- "learning_rate": 0.00035735390846354054,
110
- "loss": 0.3759,
111
- "step": 576
112
- },
113
- {
114
- "epoch": 6.0,
115
- "eval_accuracy": 0.815068493150685,
116
- "eval_f1": 0.8270814272644098,
117
- "eval_loss": 0.3832918703556061,
118
- "eval_precision": 0.7766323024054983,
119
- "eval_recall": 0.8845401174168297,
120
- "eval_runtime": 26.2709,
121
- "eval_samples_per_second": 38.902,
122
- "eval_steps_per_second": 1.218,
123
- "step": 576
124
- },
125
- {
126
- "epoch": 7.0,
127
- "grad_norm": 0.7257367968559265,
128
- "learning_rate": 0.0002680154313476554,
129
- "loss": 0.3679,
130
- "step": 672
131
- },
132
- {
133
- "epoch": 7.0,
134
- "eval_accuracy": 0.815068493150685,
135
- "eval_f1": 0.8280254777070064,
136
- "eval_loss": 0.37927353382110596,
137
- "eval_precision": 0.7738095238095238,
138
- "eval_recall": 0.8904109589041096,
139
- "eval_runtime": 25.5612,
140
- "eval_samples_per_second": 39.982,
141
- "eval_steps_per_second": 1.252,
142
- "step": 672
143
- },
144
- {
145
- "epoch": 8.0,
146
- "grad_norm": 1.1571022272109985,
147
- "learning_rate": 0.00017867695423177027,
148
- "loss": 0.3609,
149
- "step": 768
150
- },
151
- {
152
- "epoch": 8.0,
153
- "eval_accuracy": 0.8238747553816047,
154
- "eval_f1": 0.832089552238806,
155
- "eval_loss": 0.37364885210990906,
156
- "eval_precision": 0.7950089126559715,
157
- "eval_recall": 0.87279843444227,
158
- "eval_runtime": 25.5421,
159
- "eval_samples_per_second": 40.012,
160
- "eval_steps_per_second": 1.253,
161
- "step": 768
162
- },
163
- {
164
- "epoch": 9.0,
165
- "grad_norm": 0.6674935817718506,
166
- "learning_rate": 8.933847711588513e-05,
167
- "loss": 0.3556,
168
- "step": 864
169
- },
170
- {
171
- "epoch": 9.0,
172
- "eval_accuracy": 0.8238747553816047,
173
- "eval_f1": 0.8314606741573034,
174
- "eval_loss": 0.3790241479873657,
175
- "eval_precision": 0.7971274685816876,
176
- "eval_recall": 0.8688845401174168,
177
- "eval_runtime": 26.0614,
178
- "eval_samples_per_second": 39.215,
179
- "eval_steps_per_second": 1.228,
180
- "step": 864
181
- },
182
- {
183
- "epoch": 10.0,
184
- "grad_norm": 1.1517527103424072,
185
  "learning_rate": 0.0,
186
- "loss": 0.3558,
187
  "step": 960
188
  },
189
  {
190
- "epoch": 10.0,
191
- "eval_accuracy": 0.8258317025440313,
192
- "eval_f1": 0.8327067669172932,
193
- "eval_loss": 0.3765825033187866,
194
- "eval_precision": 0.8010849909584087,
195
- "eval_recall": 0.8669275929549902,
196
- "eval_runtime": 25.5387,
197
- "eval_samples_per_second": 40.018,
198
- "eval_steps_per_second": 1.253,
 
199
  "step": 960
200
  }
201
  ],
202
  "logging_steps": 500,
203
  "max_steps": 960,
204
  "num_input_tokens_seen": 0,
205
- "num_train_epochs": 10,
206
  "save_steps": 500,
207
- "total_flos": 2356951972800.0,
208
  "train_batch_size": 32,
209
  "trial_name": null,
210
  "trial_params": {
211
- "alpha": 0.6601531096042508,
212
- "learning_rate": 0.0008933847711588513,
213
- "num_train_epochs": 10,
214
- "temperature": 4
215
  }
216
  }
 
1
  {
2
+ "best_metric": 0.6674509803921569,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-160",
4
+ "epoch": 6.0,
5
  "eval_steps": 500,
6
  "global_step": 960,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.30153390765190125,
14
+ "learning_rate": 0.0001806546260556293,
15
+ "loss": 0.1164,
16
+ "step": 160
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6674509803921569,
21
+ "eval_f1": 0.03636363636363636,
22
+ "eval_loss": 0.10561967641115189,
23
+ "eval_mcc": 0.04629100498862757,
24
+ "eval_precision": 0.5333333333333333,
25
+ "eval_recall": 0.018823529411764704,
26
+ "eval_runtime": 1.857,
27
+ "eval_samples_per_second": 686.581,
28
+ "eval_steps_per_second": 21.54,
29
+ "step": 160
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 0.2800346910953522,
34
+ "learning_rate": 0.00014452370084450344,
35
+ "loss": 0.1067,
36
+ "step": 320
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_accuracy": 0.6658823529411765,
41
+ "eval_f1": 0.049107142857142856,
42
+ "eval_loss": 0.10315733402967453,
43
+ "eval_mcc": 0.04166956048662825,
44
+ "eval_precision": 0.4782608695652174,
45
+ "eval_recall": 0.02588235294117647,
46
+ "eval_runtime": 1.8766,
47
+ "eval_samples_per_second": 679.417,
48
+ "eval_steps_per_second": 21.315,
49
+ "step": 320
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "grad_norm": 0.29303914308547974,
54
+ "learning_rate": 0.00010839277563337758,
55
+ "loss": 0.1049,
56
+ "step": 480
57
  },
58
  {
59
  "epoch": 3.0,
60
+ "eval_accuracy": 0.6603921568627451,
61
+ "eval_f1": 0.06881720430107525,
62
+ "eval_loss": 0.10350044816732407,
63
+ "eval_mcc": 0.02545139051903111,
64
+ "eval_precision": 0.4,
65
+ "eval_recall": 0.03764705882352941,
66
+ "eval_runtime": 1.8927,
67
+ "eval_samples_per_second": 673.655,
68
+ "eval_steps_per_second": 21.134,
69
+ "step": 480
70
  },
71
  {
72
  "epoch": 4.0,
73
+ "grad_norm": 0.34386146068573,
74
+ "learning_rate": 7.226185042225172e-05,
75
+ "loss": 0.1043,
76
+ "step": 640
77
  },
78
  {
79
  "epoch": 4.0,
80
+ "eval_accuracy": 0.6666666666666666,
81
+ "eval_f1": 0.0534521158129176,
82
+ "eval_loss": 0.1024671271443367,
83
+ "eval_mcc": 0.04897021068743918,
84
+ "eval_precision": 0.5,
85
+ "eval_recall": 0.02823529411764706,
86
+ "eval_runtime": 1.8897,
87
+ "eval_samples_per_second": 674.694,
88
+ "eval_steps_per_second": 21.167,
89
+ "step": 640
90
  },
91
  {
92
  "epoch": 5.0,
93
+ "grad_norm": 0.266721248626709,
94
+ "learning_rate": 3.613092521112586e-05,
95
+ "loss": 0.1039,
96
+ "step": 800
97
  },
98
  {
99
  "epoch": 5.0,
100
+ "eval_accuracy": 0.6643137254901961,
101
+ "eval_f1": 0.06550218340611354,
102
+ "eval_loss": 0.10257755219936371,
103
+ "eval_mcc": 0.04191297423079544,
104
+ "eval_precision": 0.45454545454545453,
105
+ "eval_recall": 0.03529411764705882,
106
+ "eval_runtime": 1.872,
107
+ "eval_samples_per_second": 681.081,
108
+ "eval_steps_per_second": 21.367,
109
+ "step": 800
110
  },
111
  {
112
  "epoch": 6.0,
113
+ "grad_norm": 0.17371755838394165,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  "learning_rate": 0.0,
115
+ "loss": 0.1031,
116
  "step": 960
117
  },
118
  {
119
+ "epoch": 6.0,
120
+ "eval_accuracy": 0.6666666666666666,
121
+ "eval_f1": 0.06593406593406594,
122
+ "eval_loss": 0.10186242312192917,
123
+ "eval_mcc": 0.05488212999484517,
124
+ "eval_precision": 0.5,
125
+ "eval_recall": 0.03529411764705882,
126
+ "eval_runtime": 1.8824,
127
+ "eval_samples_per_second": 677.342,
128
+ "eval_steps_per_second": 21.25,
129
  "step": 960
130
  }
131
  ],
132
  "logging_steps": 500,
133
  "max_steps": 960,
134
  "num_input_tokens_seen": 0,
135
+ "num_train_epochs": 6,
136
  "save_steps": 500,
137
+ "total_flos": 1750532627520.0,
138
  "train_batch_size": 32,
139
  "trial_name": null,
140
  "trial_params": {
141
+ "alpha": 0.1455909339678303,
142
+ "learning_rate": 0.00021678555126675516,
143
+ "num_train_epochs": 6,
144
+ "temperature": 19
145
  }
146
  }
run-4/checkpoint-960/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05a27237c59bc1684c5bc675b662fc3c971fc4d8663bd9fafef0f646ea921a93
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68048bf7148340488c5af59583501491a10f4f2cdd872d35119499ea279b6aeb
3
+ size 5048
run-5/checkpoint-160/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.40.0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-5/checkpoint-160/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce6f22498e13b48d747633e47b75424c85f9718e6954a26e7dabeb1bd1f9f68
3
+ size 17549312
run-5/checkpoint-160/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77beb9d65438b02224e983e17af7c98bb455daf477224089ecac7fe4b6645190
3
+ size 35123898
run-5/checkpoint-160/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0792eaff268dd73c8e104b5060a487f4ef56535ad3b58888006338b8bc298137
3
+ size 14308
run-5/checkpoint-160/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b35db4b790bda6fadd7de9231a2496b5d26bea6bc04e62c995352ce39529ef1
3
+ size 1064
run-5/checkpoint-160/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-5/checkpoint-160/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-5/checkpoint-160/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-5/checkpoint-160/trainer_state.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6666666666666666,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-160",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 160,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.9986019134521484,
14
+ "learning_rate": 6.067732661916699e-05,
15
+ "loss": 0.3474,
16
+ "step": 160
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.6666666666666666,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.33498698472976685,
23
+ "eval_mcc": 0.0,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 1.8603,
27
+ "eval_samples_per_second": 685.372,
28
+ "eval_steps_per_second": 21.502,
29
+ "step": 160
30
+ }
31
+ ],
32
+ "logging_steps": 500,
33
+ "max_steps": 640,
34
+ "num_input_tokens_seen": 0,
35
+ "num_train_epochs": 4,
36
+ "save_steps": 500,
37
+ "total_flos": 291755437920.0,
38
+ "train_batch_size": 32,
39
+ "trial_name": null,
40
+ "trial_params": {
41
+ "alpha": 0.5122281577891578,
42
+ "learning_rate": 8.090310215888932e-05,
43
+ "num_train_epochs": 4,
44
+ "temperature": 14
45
+ }
46
+ }
run-5/checkpoint-160/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffdc018305c7641913fb33119db0342b974e7cbd16a23cd7b923a830af0c23d
3
+ size 5048
run-5/checkpoint-160/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68048bf7148340488c5af59583501491a10f4f2cdd872d35119499ea279b6aeb
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffdc018305c7641913fb33119db0342b974e7cbd16a23cd7b923a830af0c23d
3
  size 5048