nailiamirzakhmedova commited on
Commit
92b663c
·
verified ·
1 Parent(s): ebadc3d

upload checkpoint

Browse files
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/electra-large-discriminator",
3
+ "architectures": [
4
+ "ElectraForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 1024,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "B-Value",
14
+ "1": "I-Value",
15
+ "2": "B-Rhetorical",
16
+ "3": "I-Rhetorical",
17
+ "4": "B-Policy",
18
+ "5": "I-Policy",
19
+ "6": "B-Testimony",
20
+ "7": "I-Testimony",
21
+ "8": "B-Fact",
22
+ "9": "I-Fact",
23
+ "10": "O"
24
+ },
25
+ "initializer_range": 0.02,
26
+ "intermediate_size": 4096,
27
+ "label2id": {
28
+ "B-Fact": 8,
29
+ "B-Policy": 4,
30
+ "B-Rhetorical": 2,
31
+ "B-Testimony": 6,
32
+ "B-Value": 0,
33
+ "I-Fact": 9,
34
+ "I-Policy": 5,
35
+ "I-Rhetorical": 3,
36
+ "I-Testimony": 7,
37
+ "I-Value": 1,
38
+ "O": 10
39
+ },
40
+ "layer_norm_eps": 1e-12,
41
+ "max_position_embeddings": 512,
42
+ "model_type": "electra",
43
+ "num_attention_heads": 16,
44
+ "num_hidden_layers": 24,
45
+ "pad_token_id": 0,
46
+ "position_embedding_type": "absolute",
47
+ "summary_activation": "gelu",
48
+ "summary_last_dropout": 0.1,
49
+ "summary_type": "first",
50
+ "summary_use_proj": true,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.28.0",
53
+ "type_vocab_size": 2,
54
+ "use_cache": true,
55
+ "vocab_size": 30522
56
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4124981aff2725e0f7473627166c089a1d840dca77bc075515b263a99c235949
3
+ size 2673058117
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b517abc163f44b6d10d19ef623748445806502b8c38cd831c6609867d79bea8
3
+ size 1336553581
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cfe0e350a2f4a45becf30b20f10937f77ff255d808315a9d2e604eee24faafb
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26fda3babc655c4d4878776d8c39929f31b08be1444c387e1824c93bcb6713ff
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 512,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "ElectraTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
trainer_state.json ADDED
@@ -0,0 +1,1078 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 86.0,
5
+ "global_step": 2752,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.6175584724649537,
13
+ "eval_f1": 0.0769764216366158,
14
+ "eval_loss": 1.250605583190918,
15
+ "eval_precision": 0.06442251886244922,
16
+ "eval_recall": 0.09560723514211886,
17
+ "eval_runtime": 3.5402,
18
+ "eval_samples_per_second": 26.553,
19
+ "eval_steps_per_second": 3.39,
20
+ "step": 32
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.6520655932770609,
25
+ "eval_f1": 0.18170731707317075,
26
+ "eval_loss": 1.0688396692276,
27
+ "eval_precision": 0.14063237376120813,
28
+ "eval_recall": 0.2566752799310939,
29
+ "eval_runtime": 3.2469,
30
+ "eval_samples_per_second": 28.95,
31
+ "eval_steps_per_second": 3.696,
32
+ "step": 64
33
+ },
34
+ {
35
+ "epoch": 3.0,
36
+ "eval_accuracy": 0.733239132859852,
37
+ "eval_f1": 0.26462128475551294,
38
+ "eval_loss": 0.8916715383529663,
39
+ "eval_precision": 0.21036585365853658,
40
+ "eval_recall": 0.35658914728682173,
41
+ "eval_runtime": 3.5217,
42
+ "eval_samples_per_second": 26.692,
43
+ "eval_steps_per_second": 3.407,
44
+ "step": 96
45
+ },
46
+ {
47
+ "epoch": 4.0,
48
+ "eval_accuracy": 0.7502695868813446,
49
+ "eval_f1": 0.31227821149751595,
50
+ "eval_loss": 0.8893136382102966,
51
+ "eval_precision": 0.2655401327700664,
52
+ "eval_recall": 0.3789836347975883,
53
+ "eval_runtime": 3.1549,
54
+ "eval_samples_per_second": 29.795,
55
+ "eval_steps_per_second": 3.804,
56
+ "step": 128
57
+ },
58
+ {
59
+ "epoch": 5.0,
60
+ "eval_accuracy": 0.7515338563938572,
61
+ "eval_f1": 0.3259361997226075,
62
+ "eval_loss": 0.8177661299705505,
63
+ "eval_precision": 0.2727800348229832,
64
+ "eval_recall": 0.40482342807924204,
65
+ "eval_runtime": 3.1577,
66
+ "eval_samples_per_second": 29.768,
67
+ "eval_steps_per_second": 3.8,
68
+ "step": 160
69
+ },
70
+ {
71
+ "epoch": 6.0,
72
+ "eval_accuracy": 0.7685271260179228,
73
+ "eval_f1": 0.3471241170534813,
74
+ "eval_loss": 0.818898618221283,
75
+ "eval_precision": 0.2847682119205298,
76
+ "eval_recall": 0.4444444444444444,
77
+ "eval_runtime": 3.5275,
78
+ "eval_samples_per_second": 26.648,
79
+ "eval_steps_per_second": 3.402,
80
+ "step": 192
81
+ },
82
+ {
83
+ "epoch": 7.0,
84
+ "eval_accuracy": 0.7693451827613134,
85
+ "eval_f1": 0.39416058394160586,
86
+ "eval_loss": 0.8939387798309326,
87
+ "eval_precision": 0.34198860037998735,
88
+ "eval_recall": 0.46511627906976744,
89
+ "eval_runtime": 4.3875,
90
+ "eval_samples_per_second": 21.424,
91
+ "eval_steps_per_second": 2.735,
92
+ "step": 224
93
+ },
94
+ {
95
+ "epoch": 8.0,
96
+ "eval_accuracy": 0.7565537500464805,
97
+ "eval_f1": 0.3823943661971831,
98
+ "eval_loss": 0.8892739415168762,
99
+ "eval_precision": 0.323406789755807,
100
+ "eval_recall": 0.46770025839793283,
101
+ "eval_runtime": 3.5968,
102
+ "eval_samples_per_second": 26.135,
103
+ "eval_steps_per_second": 3.336,
104
+ "step": 256
105
+ },
106
+ {
107
+ "epoch": 9.0,
108
+ "eval_accuracy": 0.7693451827613134,
109
+ "eval_f1": 0.41654247391952315,
110
+ "eval_loss": 0.9633013010025024,
111
+ "eval_precision": 0.3670387393302692,
112
+ "eval_recall": 0.48148148148148145,
113
+ "eval_runtime": 3.1245,
114
+ "eval_samples_per_second": 30.085,
115
+ "eval_steps_per_second": 3.841,
116
+ "step": 288
117
+ },
118
+ {
119
+ "epoch": 10.0,
120
+ "eval_accuracy": 0.765478005428922,
121
+ "eval_f1": 0.3988900450919181,
122
+ "eval_loss": 0.9256911873817444,
123
+ "eval_precision": 0.3339140534262485,
124
+ "eval_recall": 0.49526270456503013,
125
+ "eval_runtime": 3.0684,
126
+ "eval_samples_per_second": 30.635,
127
+ "eval_steps_per_second": 3.911,
128
+ "step": 320
129
+ },
130
+ {
131
+ "epoch": 11.0,
132
+ "eval_accuracy": 0.7651061614546536,
133
+ "eval_f1": 0.4374537379718727,
134
+ "eval_loss": 1.0472618341445923,
135
+ "eval_precision": 0.3835171966255678,
136
+ "eval_recall": 0.5090439276485789,
137
+ "eval_runtime": 15.8515,
138
+ "eval_samples_per_second": 5.93,
139
+ "eval_steps_per_second": 0.757,
140
+ "step": 352
141
+ },
142
+ {
143
+ "epoch": 12.0,
144
+ "eval_accuracy": 0.7513107500092961,
145
+ "eval_f1": 0.4116819141449683,
146
+ "eval_loss": 1.0901930332183838,
147
+ "eval_precision": 0.34800713860797144,
148
+ "eval_recall": 0.5038759689922481,
149
+ "eval_runtime": 3.3377,
150
+ "eval_samples_per_second": 28.163,
151
+ "eval_steps_per_second": 3.595,
152
+ "step": 384
153
+ },
154
+ {
155
+ "epoch": 13.0,
156
+ "eval_accuracy": 0.7735842040679731,
157
+ "eval_f1": 0.4424198250728863,
158
+ "eval_loss": 1.0164769887924194,
159
+ "eval_precision": 0.38344914718888184,
160
+ "eval_recall": 0.5228251507321274,
161
+ "eval_runtime": 3.1559,
162
+ "eval_samples_per_second": 29.785,
163
+ "eval_steps_per_second": 3.802,
164
+ "step": 416
165
+ },
166
+ {
167
+ "epoch": 14.0,
168
+ "eval_accuracy": 0.7737329416576805,
169
+ "eval_f1": 0.46205860255447034,
170
+ "eval_loss": 1.0768243074417114,
171
+ "eval_precision": 0.4097268487674883,
172
+ "eval_recall": 0.5297157622739018,
173
+ "eval_runtime": 3.0915,
174
+ "eval_samples_per_second": 30.406,
175
+ "eval_steps_per_second": 3.882,
176
+ "step": 448
177
+ },
178
+ {
179
+ "epoch": 15.0,
180
+ "eval_accuracy": 0.7817647715018778,
181
+ "eval_f1": 0.4762264150943396,
182
+ "eval_loss": 1.1149170398712158,
183
+ "eval_precision": 0.4237743451981195,
184
+ "eval_recall": 0.5434969853574505,
185
+ "eval_runtime": 3.2505,
186
+ "eval_samples_per_second": 28.919,
187
+ "eval_steps_per_second": 3.692,
188
+ "step": 480
189
+ },
190
+ {
191
+ "epoch": 15.62,
192
+ "learning_rate": 1.6875e-05,
193
+ "loss": 0.4087,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 16.0,
198
+ "eval_accuracy": 0.7796452608485479,
199
+ "eval_f1": 0.4698749526335733,
200
+ "eval_loss": 1.1298694610595703,
201
+ "eval_precision": 0.41948579161028415,
202
+ "eval_recall": 0.5340223944875108,
203
+ "eval_runtime": 3.0471,
204
+ "eval_samples_per_second": 30.849,
205
+ "eval_steps_per_second": 3.938,
206
+ "step": 512
207
+ },
208
+ {
209
+ "epoch": 17.0,
210
+ "eval_accuracy": 0.7883464098464285,
211
+ "eval_f1": 0.4673040152963671,
212
+ "eval_loss": 1.1149996519088745,
213
+ "eval_precision": 0.4202200825309491,
214
+ "eval_recall": 0.5262704565030146,
215
+ "eval_runtime": 3.1689,
216
+ "eval_samples_per_second": 29.664,
217
+ "eval_steps_per_second": 3.787,
218
+ "step": 544
219
+ },
220
+ {
221
+ "epoch": 18.0,
222
+ "eval_accuracy": 0.7801286580150969,
223
+ "eval_f1": 0.45127436281859074,
224
+ "eval_loss": 1.1859441995620728,
225
+ "eval_precision": 0.39946914399469147,
226
+ "eval_recall": 0.5185185185185185,
227
+ "eval_runtime": 7.3417,
228
+ "eval_samples_per_second": 12.803,
229
+ "eval_steps_per_second": 1.634,
230
+ "step": 576
231
+ },
232
+ {
233
+ "epoch": 19.0,
234
+ "eval_accuracy": 0.7609043245454208,
235
+ "eval_f1": 0.4414210128495843,
236
+ "eval_loss": 1.2266000509262085,
237
+ "eval_precision": 0.39326599326599326,
238
+ "eval_recall": 0.5030146425495263,
239
+ "eval_runtime": 3.4668,
240
+ "eval_samples_per_second": 27.114,
241
+ "eval_steps_per_second": 3.461,
242
+ "step": 608
243
+ },
244
+ {
245
+ "epoch": 20.0,
246
+ "eval_accuracy": 0.7837355445655003,
247
+ "eval_f1": 0.4707198806415516,
248
+ "eval_loss": 1.1946580410003662,
249
+ "eval_precision": 0.4151315789473684,
250
+ "eval_recall": 0.5434969853574505,
251
+ "eval_runtime": 3.1596,
252
+ "eval_samples_per_second": 29.75,
253
+ "eval_steps_per_second": 3.798,
254
+ "step": 640
255
+ },
256
+ {
257
+ "epoch": 21.0,
258
+ "eval_accuracy": 0.7890900977949652,
259
+ "eval_f1": 0.4974698326196964,
260
+ "eval_loss": 1.2461358308792114,
261
+ "eval_precision": 0.4538352272727273,
262
+ "eval_recall": 0.5503875968992248,
263
+ "eval_runtime": 3.1747,
264
+ "eval_samples_per_second": 29.609,
265
+ "eval_steps_per_second": 3.78,
266
+ "step": 672
267
+ },
268
+ {
269
+ "epoch": 22.0,
270
+ "eval_accuracy": 0.7804261331945116,
271
+ "eval_f1": 0.4870506378044067,
272
+ "eval_loss": 1.2504911422729492,
273
+ "eval_precision": 0.4417952314165498,
274
+ "eval_recall": 0.5426356589147286,
275
+ "eval_runtime": 3.1491,
276
+ "eval_samples_per_second": 29.849,
277
+ "eval_steps_per_second": 3.811,
278
+ "step": 704
279
+ },
280
+ {
281
+ "epoch": 23.0,
282
+ "eval_accuracy": 0.7844420481166102,
283
+ "eval_f1": 0.4932249322493225,
284
+ "eval_loss": 1.287372350692749,
285
+ "eval_precision": 0.4479606188466948,
286
+ "eval_recall": 0.5486649440137812,
287
+ "eval_runtime": 3.7236,
288
+ "eval_samples_per_second": 25.244,
289
+ "eval_steps_per_second": 3.223,
290
+ "step": 736
291
+ },
292
+ {
293
+ "epoch": 24.0,
294
+ "eval_accuracy": 0.7877886438850259,
295
+ "eval_f1": 0.49941107184923444,
296
+ "eval_loss": 1.277133584022522,
297
+ "eval_precision": 0.4588744588744589,
298
+ "eval_recall": 0.5478036175710594,
299
+ "eval_runtime": 3.2052,
300
+ "eval_samples_per_second": 29.328,
301
+ "eval_steps_per_second": 3.744,
302
+ "step": 768
303
+ },
304
+ {
305
+ "epoch": 25.0,
306
+ "eval_accuracy": 0.7892760197820994,
307
+ "eval_f1": 0.4747201852566576,
308
+ "eval_loss": 1.2719131708145142,
309
+ "eval_precision": 0.43006993006993005,
310
+ "eval_recall": 0.5297157622739018,
311
+ "eval_runtime": 3.0524,
312
+ "eval_samples_per_second": 30.795,
313
+ "eval_steps_per_second": 3.931,
314
+ "step": 800
315
+ },
316
+ {
317
+ "epoch": 26.0,
318
+ "eval_accuracy": 0.7896478637563679,
319
+ "eval_f1": 0.49407265774378584,
320
+ "eval_loss": 1.2584308385849,
321
+ "eval_precision": 0.4442916093535076,
322
+ "eval_recall": 0.5564168819982773,
323
+ "eval_runtime": 3.7701,
324
+ "eval_samples_per_second": 24.933,
325
+ "eval_steps_per_second": 3.183,
326
+ "step": 832
327
+ },
328
+ {
329
+ "epoch": 27.0,
330
+ "eval_accuracy": 0.7868590339493549,
331
+ "eval_f1": 0.4920077972709551,
332
+ "eval_loss": 1.3040637969970703,
333
+ "eval_precision": 0.4494301994301994,
334
+ "eval_recall": 0.5434969853574505,
335
+ "eval_runtime": 3.4169,
336
+ "eval_samples_per_second": 27.51,
337
+ "eval_steps_per_second": 3.512,
338
+ "step": 864
339
+ },
340
+ {
341
+ "epoch": 28.0,
342
+ "eval_accuracy": 0.7880861190644406,
343
+ "eval_f1": 0.4916158536585366,
344
+ "eval_loss": 1.3579275608062744,
345
+ "eval_precision": 0.4408749145591251,
346
+ "eval_recall": 0.5555555555555556,
347
+ "eval_runtime": 3.4487,
348
+ "eval_samples_per_second": 27.256,
349
+ "eval_steps_per_second": 3.48,
350
+ "step": 896
351
+ },
352
+ {
353
+ "epoch": 29.0,
354
+ "eval_accuracy": 0.7839958353474882,
355
+ "eval_f1": 0.501557632398754,
356
+ "eval_loss": 1.3749990463256836,
357
+ "eval_precision": 0.4577114427860697,
358
+ "eval_recall": 0.5546942291128337,
359
+ "eval_runtime": 3.131,
360
+ "eval_samples_per_second": 30.022,
361
+ "eval_steps_per_second": 3.833,
362
+ "step": 928
363
+ },
364
+ {
365
+ "epoch": 30.0,
366
+ "eval_accuracy": 0.7871936935261964,
367
+ "eval_f1": 0.4903883876029816,
368
+ "eval_loss": 1.3733103275299072,
369
+ "eval_precision": 0.45028818443804036,
370
+ "eval_recall": 0.5383290267011197,
371
+ "eval_runtime": 3.4968,
372
+ "eval_samples_per_second": 26.882,
373
+ "eval_steps_per_second": 3.432,
374
+ "step": 960
375
+ },
376
+ {
377
+ "epoch": 31.0,
378
+ "eval_accuracy": 0.7819135090915852,
379
+ "eval_f1": 0.5099260412611912,
380
+ "eval_loss": 1.395755410194397,
381
+ "eval_precision": 0.46519886363636365,
382
+ "eval_recall": 0.5641688199827735,
383
+ "eval_runtime": 3.2027,
384
+ "eval_samples_per_second": 29.351,
385
+ "eval_steps_per_second": 3.747,
386
+ "step": 992
387
+ },
388
+ {
389
+ "epoch": 31.25,
390
+ "learning_rate": 1.375e-05,
391
+ "loss": 0.0138,
392
+ "step": 1000
393
+ },
394
+ {
395
+ "epoch": 32.0,
396
+ "eval_accuracy": 0.7822481686684267,
397
+ "eval_f1": 0.49709639953542384,
398
+ "eval_loss": 1.4415804147720337,
399
+ "eval_precision": 0.45147679324894513,
400
+ "eval_recall": 0.5529715762273901,
401
+ "eval_runtime": 3.081,
402
+ "eval_samples_per_second": 30.51,
403
+ "eval_steps_per_second": 3.895,
404
+ "step": 1024
405
+ },
406
+ {
407
+ "epoch": 33.0,
408
+ "eval_accuracy": 0.7877886438850259,
409
+ "eval_f1": 0.49035187287173665,
410
+ "eval_loss": 1.384263277053833,
411
+ "eval_precision": 0.43724696356275305,
412
+ "eval_recall": 0.5581395348837209,
413
+ "eval_runtime": 3.7036,
414
+ "eval_samples_per_second": 25.381,
415
+ "eval_steps_per_second": 3.24,
416
+ "step": 1056
417
+ },
418
+ {
419
+ "epoch": 34.0,
420
+ "eval_accuracy": 0.7796080764511211,
421
+ "eval_f1": 0.490521327014218,
422
+ "eval_loss": 1.4162341356277466,
423
+ "eval_precision": 0.45295404814004375,
424
+ "eval_recall": 0.5348837209302325,
425
+ "eval_runtime": 3.1016,
426
+ "eval_samples_per_second": 30.307,
427
+ "eval_steps_per_second": 3.869,
428
+ "step": 1088
429
+ },
430
+ {
431
+ "epoch": 35.0,
432
+ "eval_accuracy": 0.7906146580894656,
433
+ "eval_f1": 0.5060240963855421,
434
+ "eval_loss": 1.380549669265747,
435
+ "eval_precision": 0.46104815864022664,
436
+ "eval_recall": 0.5607235142118863,
437
+ "eval_runtime": 3.5552,
438
+ "eval_samples_per_second": 26.44,
439
+ "eval_steps_per_second": 3.375,
440
+ "step": 1120
441
+ },
442
+ {
443
+ "epoch": 36.0,
444
+ "eval_accuracy": 0.7868590339493549,
445
+ "eval_f1": 0.4811210587777345,
446
+ "eval_loss": 1.4221726655960083,
447
+ "eval_precision": 0.43892045454545453,
448
+ "eval_recall": 0.5322997416020672,
449
+ "eval_runtime": 3.1728,
450
+ "eval_samples_per_second": 29.627,
451
+ "eval_steps_per_second": 3.782,
452
+ "step": 1152
453
+ },
454
+ {
455
+ "epoch": 37.0,
456
+ "eval_accuracy": 0.7899453389357826,
457
+ "eval_f1": 0.502147598594299,
458
+ "eval_loss": 1.407759189605713,
459
+ "eval_precision": 0.4592857142857143,
460
+ "eval_recall": 0.553832902670112,
461
+ "eval_runtime": 3.1234,
462
+ "eval_samples_per_second": 30.096,
463
+ "eval_steps_per_second": 3.842,
464
+ "step": 1184
465
+ },
466
+ {
467
+ "epoch": 38.0,
468
+ "eval_accuracy": 0.7908377644740266,
469
+ "eval_f1": 0.4872389791183295,
470
+ "eval_loss": 1.4327783584594727,
471
+ "eval_precision": 0.4421052631578947,
472
+ "eval_recall": 0.5426356589147286,
473
+ "eval_runtime": 3.409,
474
+ "eval_samples_per_second": 27.574,
475
+ "eval_steps_per_second": 3.52,
476
+ "step": 1216
477
+ },
478
+ {
479
+ "epoch": 39.0,
480
+ "eval_accuracy": 0.7851113672702934,
481
+ "eval_f1": 0.4913227921326649,
482
+ "eval_loss": 1.4441440105438232,
483
+ "eval_precision": 0.4448324022346369,
484
+ "eval_recall": 0.5486649440137812,
485
+ "eval_runtime": 3.2678,
486
+ "eval_samples_per_second": 28.766,
487
+ "eval_steps_per_second": 3.672,
488
+ "step": 1248
489
+ },
490
+ {
491
+ "epoch": 40.0,
492
+ "eval_accuracy": 0.7912839772431488,
493
+ "eval_f1": 0.5036864571206829,
494
+ "eval_loss": 1.4089561700820923,
495
+ "eval_precision": 0.4583333333333333,
496
+ "eval_recall": 0.5590008613264428,
497
+ "eval_runtime": 3.1449,
498
+ "eval_samples_per_second": 29.89,
499
+ "eval_steps_per_second": 3.816,
500
+ "step": 1280
501
+ },
502
+ {
503
+ "epoch": 41.0,
504
+ "eval_accuracy": 0.7902428141151973,
505
+ "eval_f1": 0.503858024691358,
506
+ "eval_loss": 1.4232546091079712,
507
+ "eval_precision": 0.45632424877707894,
508
+ "eval_recall": 0.5624461670973299,
509
+ "eval_runtime": 8.2173,
510
+ "eval_samples_per_second": 11.439,
511
+ "eval_steps_per_second": 1.46,
512
+ "step": 1312
513
+ },
514
+ {
515
+ "epoch": 42.0,
516
+ "eval_accuracy": 0.7909493176663073,
517
+ "eval_f1": 0.49484536082474234,
518
+ "eval_loss": 1.4232605695724487,
519
+ "eval_precision": 0.4444444444444444,
520
+ "eval_recall": 0.5581395348837209,
521
+ "eval_runtime": 3.0349,
522
+ "eval_samples_per_second": 30.973,
523
+ "eval_steps_per_second": 3.954,
524
+ "step": 1344
525
+ },
526
+ {
527
+ "epoch": 43.0,
528
+ "eval_accuracy": 0.7923995091659539,
529
+ "eval_f1": 0.5162324649298597,
530
+ "eval_loss": 1.4146913290023804,
531
+ "eval_precision": 0.4827586206896552,
532
+ "eval_recall": 0.5546942291128337,
533
+ "eval_runtime": 3.288,
534
+ "eval_samples_per_second": 28.589,
535
+ "eval_steps_per_second": 3.65,
536
+ "step": 1376
537
+ },
538
+ {
539
+ "epoch": 44.0,
540
+ "eval_accuracy": 0.7880117502695869,
541
+ "eval_f1": 0.4955684007707129,
542
+ "eval_loss": 1.4241943359375,
543
+ "eval_precision": 0.44839609483960946,
544
+ "eval_recall": 0.553832902670112,
545
+ "eval_runtime": 6.1717,
546
+ "eval_samples_per_second": 15.231,
547
+ "eval_steps_per_second": 1.944,
548
+ "step": 1408
549
+ },
550
+ {
551
+ "epoch": 45.0,
552
+ "eval_accuracy": 0.7880861190644406,
553
+ "eval_f1": 0.49018853405155827,
554
+ "eval_loss": 1.421156644821167,
555
+ "eval_precision": 0.4429763560500695,
556
+ "eval_recall": 0.5486649440137812,
557
+ "eval_runtime": 3.1226,
558
+ "eval_samples_per_second": 30.103,
559
+ "eval_steps_per_second": 3.843,
560
+ "step": 1440
561
+ },
562
+ {
563
+ "epoch": 46.0,
564
+ "eval_accuracy": 0.7936637786784665,
565
+ "eval_f1": 0.5089463220675944,
566
+ "eval_loss": 1.424131989479065,
567
+ "eval_precision": 0.4726735598227474,
568
+ "eval_recall": 0.5512489233419466,
569
+ "eval_runtime": 3.3347,
570
+ "eval_samples_per_second": 28.189,
571
+ "eval_steps_per_second": 3.599,
572
+ "step": 1472
573
+ },
574
+ {
575
+ "epoch": 46.88,
576
+ "learning_rate": 1.0625e-05,
577
+ "loss": 0.0034,
578
+ "step": 1500
579
+ },
580
+ {
581
+ "epoch": 47.0,
582
+ "eval_accuracy": 0.7858550552188301,
583
+ "eval_f1": 0.5065398335315101,
584
+ "eval_loss": 1.443221926689148,
585
+ "eval_precision": 0.46916299559471364,
586
+ "eval_recall": 0.5503875968992248,
587
+ "eval_runtime": 3.0632,
588
+ "eval_samples_per_second": 30.687,
589
+ "eval_steps_per_second": 3.917,
590
+ "step": 1504
591
+ },
592
+ {
593
+ "epoch": 48.0,
594
+ "eval_accuracy": 0.784739523296025,
595
+ "eval_f1": 0.49553398058252424,
596
+ "eval_loss": 1.4684823751449585,
597
+ "eval_precision": 0.4512022630834512,
598
+ "eval_recall": 0.549526270456503,
599
+ "eval_runtime": 3.2701,
600
+ "eval_samples_per_second": 28.745,
601
+ "eval_steps_per_second": 3.67,
602
+ "step": 1536
603
+ },
604
+ {
605
+ "epoch": 49.0,
606
+ "eval_accuracy": 0.7949280481909791,
607
+ "eval_f1": 0.5152113789016199,
608
+ "eval_loss": 1.4661859273910522,
609
+ "eval_precision": 0.4759124087591241,
610
+ "eval_recall": 0.5615848406546081,
611
+ "eval_runtime": 3.3592,
612
+ "eval_samples_per_second": 27.983,
613
+ "eval_steps_per_second": 3.572,
614
+ "step": 1568
615
+ },
616
+ {
617
+ "epoch": 50.0,
618
+ "eval_accuracy": 0.7894619417692337,
619
+ "eval_f1": 0.506033476060724,
620
+ "eval_loss": 1.5200412273406982,
621
+ "eval_precision": 0.4616477272727273,
622
+ "eval_recall": 0.5598621877691645,
623
+ "eval_runtime": 3.1539,
624
+ "eval_samples_per_second": 29.805,
625
+ "eval_steps_per_second": 3.805,
626
+ "step": 1600
627
+ },
628
+ {
629
+ "epoch": 51.0,
630
+ "eval_accuracy": 0.7907262112817461,
631
+ "eval_f1": 0.5102848101265823,
632
+ "eval_loss": 1.49446702003479,
633
+ "eval_precision": 0.4718361375274323,
634
+ "eval_recall": 0.5555555555555556,
635
+ "eval_runtime": 6.9973,
636
+ "eval_samples_per_second": 13.434,
637
+ "eval_steps_per_second": 1.715,
638
+ "step": 1632
639
+ },
640
+ {
641
+ "epoch": 52.0,
642
+ "eval_accuracy": 0.7916558212174172,
643
+ "eval_f1": 0.5182511030886483,
644
+ "eval_loss": 1.5040817260742188,
645
+ "eval_precision": 0.484984984984985,
646
+ "eval_recall": 0.5564168819982773,
647
+ "eval_runtime": 8.0818,
648
+ "eval_samples_per_second": 11.631,
649
+ "eval_steps_per_second": 1.485,
650
+ "step": 1664
651
+ },
652
+ {
653
+ "epoch": 53.0,
654
+ "eval_accuracy": 0.7928829063325029,
655
+ "eval_f1": 0.5159489633173845,
656
+ "eval_loss": 1.5629594326019287,
657
+ "eval_precision": 0.4803266518188567,
658
+ "eval_recall": 0.5572782084409992,
659
+ "eval_runtime": 3.423,
660
+ "eval_samples_per_second": 27.462,
661
+ "eval_steps_per_second": 3.506,
662
+ "step": 1696
663
+ },
664
+ {
665
+ "epoch": 54.0,
666
+ "eval_accuracy": 0.7840702041423419,
667
+ "eval_f1": 0.5010117361392149,
668
+ "eval_loss": 1.5059171915054321,
669
+ "eval_precision": 0.4725190839694656,
670
+ "eval_recall": 0.533161068044789,
671
+ "eval_runtime": 3.1466,
672
+ "eval_samples_per_second": 29.874,
673
+ "eval_steps_per_second": 3.814,
674
+ "step": 1728
675
+ },
676
+ {
677
+ "epoch": 55.0,
678
+ "eval_accuracy": 0.790763395679173,
679
+ "eval_f1": 0.5078616352201258,
680
+ "eval_loss": 1.5826448202133179,
681
+ "eval_precision": 0.4671005061460593,
682
+ "eval_recall": 0.5564168819982773,
683
+ "eval_runtime": 3.1003,
684
+ "eval_samples_per_second": 30.319,
685
+ "eval_steps_per_second": 3.871,
686
+ "step": 1760
687
+ },
688
+ {
689
+ "epoch": 56.0,
690
+ "eval_accuracy": 0.791246792845722,
691
+ "eval_f1": 0.520757758968158,
692
+ "eval_loss": 1.5211557149887085,
693
+ "eval_precision": 0.4893939393939394,
694
+ "eval_recall": 0.5564168819982773,
695
+ "eval_runtime": 3.5151,
696
+ "eval_samples_per_second": 26.742,
697
+ "eval_steps_per_second": 3.414,
698
+ "step": 1792
699
+ },
700
+ {
701
+ "epoch": 57.0,
702
+ "eval_accuracy": 0.7894619417692337,
703
+ "eval_f1": 0.5172550575168584,
704
+ "eval_loss": 1.5194625854492188,
705
+ "eval_precision": 0.47941176470588237,
706
+ "eval_recall": 0.5615848406546081,
707
+ "eval_runtime": 3.1721,
708
+ "eval_samples_per_second": 29.634,
709
+ "eval_steps_per_second": 3.783,
710
+ "step": 1824
711
+ },
712
+ {
713
+ "epoch": 58.0,
714
+ "eval_accuracy": 0.7864871899750865,
715
+ "eval_f1": 0.5015600624024961,
716
+ "eval_loss": 1.5568981170654297,
717
+ "eval_precision": 0.45830363506771205,
718
+ "eval_recall": 0.553832902670112,
719
+ "eval_runtime": 3.0367,
720
+ "eval_samples_per_second": 30.954,
721
+ "eval_steps_per_second": 3.952,
722
+ "step": 1856
723
+ },
724
+ {
725
+ "epoch": 59.0,
726
+ "eval_accuracy": 0.789610679358941,
727
+ "eval_f1": 0.5186953062848051,
728
+ "eval_loss": 1.5378462076187134,
729
+ "eval_precision": 0.4818920916481892,
730
+ "eval_recall": 0.5615848406546081,
731
+ "eval_runtime": 3.0368,
732
+ "eval_samples_per_second": 30.954,
733
+ "eval_steps_per_second": 3.952,
734
+ "step": 1888
735
+ },
736
+ {
737
+ "epoch": 60.0,
738
+ "eval_accuracy": 0.7838842821552077,
739
+ "eval_f1": 0.490257209664848,
740
+ "eval_loss": 1.5430467128753662,
741
+ "eval_precision": 0.44768683274021354,
742
+ "eval_recall": 0.5417743324720069,
743
+ "eval_runtime": 3.2599,
744
+ "eval_samples_per_second": 28.835,
745
+ "eval_steps_per_second": 3.681,
746
+ "step": 1920
747
+ },
748
+ {
749
+ "epoch": 61.0,
750
+ "eval_accuracy": 0.7890529133975384,
751
+ "eval_f1": 0.5021816739389131,
752
+ "eval_loss": 1.5567139387130737,
753
+ "eval_precision": 0.46544117647058825,
754
+ "eval_recall": 0.5452196382428941,
755
+ "eval_runtime": 3.4648,
756
+ "eval_samples_per_second": 27.13,
757
+ "eval_steps_per_second": 3.463,
758
+ "step": 1952
759
+ },
760
+ {
761
+ "epoch": 62.0,
762
+ "eval_accuracy": 0.7909865020637341,
763
+ "eval_f1": 0.5128824476650564,
764
+ "eval_loss": 1.5516618490219116,
765
+ "eval_precision": 0.48148148148148145,
766
+ "eval_recall": 0.5486649440137812,
767
+ "eval_runtime": 3.3346,
768
+ "eval_samples_per_second": 28.19,
769
+ "eval_steps_per_second": 3.599,
770
+ "step": 1984
771
+ },
772
+ {
773
+ "epoch": 62.5,
774
+ "learning_rate": 7.500000000000001e-06,
775
+ "loss": 0.0017,
776
+ "step": 2000
777
+ },
778
+ {
779
+ "epoch": 63.0,
780
+ "eval_accuracy": 0.7902056297177704,
781
+ "eval_f1": 0.508203281312525,
782
+ "eval_loss": 1.5522751808166504,
783
+ "eval_precision": 0.47458893871449925,
784
+ "eval_recall": 0.5469422911283376,
785
+ "eval_runtime": 3.095,
786
+ "eval_samples_per_second": 30.372,
787
+ "eval_steps_per_second": 3.877,
788
+ "step": 2016
789
+ },
790
+ {
791
+ "epoch": 64.0,
792
+ "eval_accuracy": 0.787044955936489,
793
+ "eval_f1": 0.4970691676436107,
794
+ "eval_loss": 1.5659950971603394,
795
+ "eval_precision": 0.45493562231759654,
796
+ "eval_recall": 0.5478036175710594,
797
+ "eval_runtime": 3.7257,
798
+ "eval_samples_per_second": 25.23,
799
+ "eval_steps_per_second": 3.221,
800
+ "step": 2048
801
+ },
802
+ {
803
+ "epoch": 65.0,
804
+ "eval_accuracy": 0.791246792845722,
805
+ "eval_f1": 0.5023219814241486,
806
+ "eval_loss": 1.5194514989852905,
807
+ "eval_precision": 0.4560787069571328,
808
+ "eval_recall": 0.5590008613264428,
809
+ "eval_runtime": 3.1193,
810
+ "eval_samples_per_second": 30.135,
811
+ "eval_steps_per_second": 3.847,
812
+ "step": 2080
813
+ },
814
+ {
815
+ "epoch": 66.0,
816
+ "eval_accuracy": 0.7909493176663073,
817
+ "eval_f1": 0.504884720593982,
818
+ "eval_loss": 1.5329976081848145,
819
+ "eval_precision": 0.46208869814020026,
820
+ "eval_recall": 0.5564168819982773,
821
+ "eval_runtime": 3.2791,
822
+ "eval_samples_per_second": 28.666,
823
+ "eval_steps_per_second": 3.66,
824
+ "step": 2112
825
+ },
826
+ {
827
+ "epoch": 67.0,
828
+ "eval_accuracy": 0.7897222325512215,
829
+ "eval_f1": 0.5118296529968455,
830
+ "eval_loss": 1.5393718481063843,
831
+ "eval_precision": 0.472,
832
+ "eval_recall": 0.5590008613264428,
833
+ "eval_runtime": 3.3565,
834
+ "eval_samples_per_second": 28.006,
835
+ "eval_steps_per_second": 3.575,
836
+ "step": 2144
837
+ },
838
+ {
839
+ "epoch": 68.0,
840
+ "eval_accuracy": 0.7929572751273566,
841
+ "eval_f1": 0.5202220459952419,
842
+ "eval_loss": 1.5860378742218018,
843
+ "eval_precision": 0.4819985304922851,
844
+ "eval_recall": 0.5650301464254953,
845
+ "eval_runtime": 3.0372,
846
+ "eval_samples_per_second": 30.949,
847
+ "eval_steps_per_second": 3.951,
848
+ "step": 2176
849
+ },
850
+ {
851
+ "epoch": 69.0,
852
+ "eval_accuracy": 0.7916186368199903,
853
+ "eval_f1": 0.5208667736757624,
854
+ "eval_loss": 1.622610092163086,
855
+ "eval_precision": 0.48760330578512395,
856
+ "eval_recall": 0.5590008613264428,
857
+ "eval_runtime": 3.1363,
858
+ "eval_samples_per_second": 29.972,
859
+ "eval_steps_per_second": 3.826,
860
+ "step": 2208
861
+ },
862
+ {
863
+ "epoch": 70.0,
864
+ "eval_accuracy": 0.7935150410887591,
865
+ "eval_f1": 0.5205158264947246,
866
+ "eval_loss": 1.613077163696289,
867
+ "eval_precision": 0.47639484978540775,
868
+ "eval_recall": 0.5736434108527132,
869
+ "eval_runtime": 3.4181,
870
+ "eval_samples_per_second": 27.501,
871
+ "eval_steps_per_second": 3.511,
872
+ "step": 2240
873
+ },
874
+ {
875
+ "epoch": 71.0,
876
+ "eval_accuracy": 0.7962666864983453,
877
+ "eval_f1": 0.5341365461847388,
878
+ "eval_loss": 1.617754340171814,
879
+ "eval_precision": 0.5003762227238525,
880
+ "eval_recall": 0.5727820844099913,
881
+ "eval_runtime": 3.9965,
882
+ "eval_samples_per_second": 23.521,
883
+ "eval_steps_per_second": 3.003,
884
+ "step": 2272
885
+ },
886
+ {
887
+ "epoch": 72.0,
888
+ "eval_accuracy": 0.7960063957163575,
889
+ "eval_f1": 0.5309876049580169,
890
+ "eval_loss": 1.6110599040985107,
891
+ "eval_precision": 0.4955223880597015,
892
+ "eval_recall": 0.5719207579672696,
893
+ "eval_runtime": 3.0526,
894
+ "eval_samples_per_second": 30.794,
895
+ "eval_steps_per_second": 3.931,
896
+ "step": 2304
897
+ },
898
+ {
899
+ "epoch": 73.0,
900
+ "eval_accuracy": 0.7905774736920388,
901
+ "eval_f1": 0.5087025316455697,
902
+ "eval_loss": 1.5602927207946777,
903
+ "eval_precision": 0.4703730797366496,
904
+ "eval_recall": 0.553832902670112,
905
+ "eval_runtime": 3.378,
906
+ "eval_samples_per_second": 27.827,
907
+ "eval_steps_per_second": 3.552,
908
+ "step": 2336
909
+ },
910
+ {
911
+ "epoch": 74.0,
912
+ "eval_accuracy": 0.7928085375376492,
913
+ "eval_f1": 0.5181962025316456,
914
+ "eval_loss": 1.555016040802002,
915
+ "eval_precision": 0.479151426481346,
916
+ "eval_recall": 0.5641688199827735,
917
+ "eval_runtime": 3.0403,
918
+ "eval_samples_per_second": 30.918,
919
+ "eval_steps_per_second": 3.947,
920
+ "step": 2368
921
+ },
922
+ {
923
+ "epoch": 75.0,
924
+ "eval_accuracy": 0.7963782396906258,
925
+ "eval_f1": 0.5212933753943217,
926
+ "eval_loss": 1.5542960166931152,
927
+ "eval_precision": 0.4807272727272727,
928
+ "eval_recall": 0.5693367786391043,
929
+ "eval_runtime": 3.1539,
930
+ "eval_samples_per_second": 29.805,
931
+ "eval_steps_per_second": 3.805,
932
+ "step": 2400
933
+ },
934
+ {
935
+ "epoch": 76.0,
936
+ "eval_accuracy": 0.7952998921652474,
937
+ "eval_f1": 0.5285996055226825,
938
+ "eval_loss": 1.560102939605713,
939
+ "eval_precision": 0.487627365356623,
940
+ "eval_recall": 0.5770887166236004,
941
+ "eval_runtime": 3.3272,
942
+ "eval_samples_per_second": 28.252,
943
+ "eval_steps_per_second": 3.607,
944
+ "step": 2432
945
+ },
946
+ {
947
+ "epoch": 77.0,
948
+ "eval_accuracy": 0.7926597999479419,
949
+ "eval_f1": 0.5247999999999999,
950
+ "eval_loss": 1.559518575668335,
951
+ "eval_precision": 0.4899178491411501,
952
+ "eval_recall": 0.5650301464254953,
953
+ "eval_runtime": 3.1588,
954
+ "eval_samples_per_second": 29.758,
955
+ "eval_steps_per_second": 3.799,
956
+ "step": 2464
957
+ },
958
+ {
959
+ "epoch": 78.0,
960
+ "eval_accuracy": 0.7940356226527349,
961
+ "eval_f1": 0.533066132264529,
962
+ "eval_loss": 1.5632045269012451,
963
+ "eval_precision": 0.4985007496251874,
964
+ "eval_recall": 0.5727820844099913,
965
+ "eval_runtime": 3.5803,
966
+ "eval_samples_per_second": 26.255,
967
+ "eval_steps_per_second": 3.352,
968
+ "step": 2496
969
+ },
970
+ {
971
+ "epoch": 78.12,
972
+ "learning_rate": 4.3750000000000005e-06,
973
+ "loss": 0.0011,
974
+ "step": 2500
975
+ },
976
+ {
977
+ "epoch": 79.0,
978
+ "eval_accuracy": 0.7931431971144908,
979
+ "eval_f1": 0.5266084193804608,
980
+ "eval_loss": 1.5693398714065552,
981
+ "eval_precision": 0.4885777450257922,
982
+ "eval_recall": 0.5710594315245479,
983
+ "eval_runtime": 3.2284,
984
+ "eval_samples_per_second": 29.117,
985
+ "eval_steps_per_second": 3.717,
986
+ "step": 2528
987
+ },
988
+ {
989
+ "epoch": 80.0,
990
+ "eval_accuracy": 0.7927341687427956,
991
+ "eval_f1": 0.5263575108997226,
992
+ "eval_loss": 1.5728816986083984,
993
+ "eval_precision": 0.48751835535976507,
994
+ "eval_recall": 0.5719207579672696,
995
+ "eval_runtime": 3.0942,
996
+ "eval_samples_per_second": 30.38,
997
+ "eval_steps_per_second": 3.878,
998
+ "step": 2560
999
+ },
1000
+ {
1001
+ "epoch": 81.0,
1002
+ "eval_accuracy": 0.7929200907299297,
1003
+ "eval_f1": 0.5305313243457573,
1004
+ "eval_loss": 1.5773258209228516,
1005
+ "eval_precision": 0.49155033063923587,
1006
+ "eval_recall": 0.5762273901808785,
1007
+ "eval_runtime": 3.5248,
1008
+ "eval_samples_per_second": 26.668,
1009
+ "eval_steps_per_second": 3.404,
1010
+ "step": 2592
1011
+ },
1012
+ {
1013
+ "epoch": 82.0,
1014
+ "eval_accuracy": 0.7932919347041981,
1015
+ "eval_f1": 0.5318382058470164,
1016
+ "eval_loss": 1.579202651977539,
1017
+ "eval_precision": 0.49700598802395207,
1018
+ "eval_recall": 0.5719207579672696,
1019
+ "eval_runtime": 3.0984,
1020
+ "eval_samples_per_second": 30.338,
1021
+ "eval_steps_per_second": 3.873,
1022
+ "step": 2624
1023
+ },
1024
+ {
1025
+ "epoch": 83.0,
1026
+ "eval_accuracy": 0.793329119101625,
1027
+ "eval_f1": 0.5278780585639792,
1028
+ "eval_loss": 1.58578622341156,
1029
+ "eval_precision": 0.493993993993994,
1030
+ "eval_recall": 0.5667527993109388,
1031
+ "eval_runtime": 3.0353,
1032
+ "eval_samples_per_second": 30.969,
1033
+ "eval_steps_per_second": 3.954,
1034
+ "step": 2656
1035
+ },
1036
+ {
1037
+ "epoch": 84.0,
1038
+ "eval_accuracy": 0.7925110623582344,
1039
+ "eval_f1": 0.528965241709948,
1040
+ "eval_loss": 1.584820032119751,
1041
+ "eval_precision": 0.4932935916542474,
1042
+ "eval_recall": 0.570198105081826,
1043
+ "eval_runtime": 3.3844,
1044
+ "eval_samples_per_second": 27.774,
1045
+ "eval_steps_per_second": 3.546,
1046
+ "step": 2688
1047
+ },
1048
+ {
1049
+ "epoch": 85.0,
1050
+ "eval_accuracy": 0.7922135871788197,
1051
+ "eval_f1": 0.5333866453418633,
1052
+ "eval_loss": 1.5820199251174927,
1053
+ "eval_precision": 0.49776119402985075,
1054
+ "eval_recall": 0.5745047372954349,
1055
+ "eval_runtime": 3.2337,
1056
+ "eval_samples_per_second": 29.069,
1057
+ "eval_steps_per_second": 3.711,
1058
+ "step": 2720
1059
+ },
1060
+ {
1061
+ "epoch": 86.0,
1062
+ "eval_accuracy": 0.7920648495891124,
1063
+ "eval_f1": 0.5324830609804703,
1064
+ "eval_loss": 1.5860569477081299,
1065
+ "eval_precision": 0.49554896142433236,
1066
+ "eval_recall": 0.5753660637381568,
1067
+ "eval_runtime": 3.1113,
1068
+ "eval_samples_per_second": 30.212,
1069
+ "eval_steps_per_second": 3.857,
1070
+ "step": 2752
1071
+ }
1072
+ ],
1073
+ "max_steps": 3200,
1074
+ "num_train_epochs": 100,
1075
+ "total_flos": 1.8890572813041664e+16,
1076
+ "trial_name": null,
1077
+ "trial_params": null
1078
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1e434b531988ca004c6ae71f60286e85ab7b5eca3cbdd0940f28e887686c64
3
+ size 3643
vocab.txt ADDED
The diff for this file is too large to render. See raw diff