chaojiang06 commited on
Commit
a66e2ed
·
1 Parent(s): 252a896

Upload 15 files

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: tst-translation353
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # tst-translation353
16
+
17
+ This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.1457
20
+ - Accuracy: 0.6826
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 5e-05
40
+ - train_batch_size: 12
41
+ - eval_batch_size: 12
42
+ - seed: 42
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 10.0
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
51
+ | No log | 1.0 | 105 | 0.3043 | 0.2991 |
52
+ | No log | 2.0 | 210 | 0.2653 | 0.3311 |
53
+ | No log | 3.0 | 315 | 0.2475 | 0.4726 |
54
+ | No log | 4.0 | 420 | 0.1737 | 0.6096 |
55
+ | 0.5112 | 5.0 | 525 | 0.1660 | 0.6256 |
56
+ | 0.5112 | 6.0 | 630 | 0.1499 | 0.6575 |
57
+ | 0.5112 | 7.0 | 735 | 0.1497 | 0.6438 |
58
+ | 0.5112 | 8.0 | 840 | 0.1457 | 0.6826 |
59
+ | 0.5112 | 9.0 | 945 | 0.1470 | 0.6781 |
60
+ | 0.151 | 10.0 | 1050 | 0.1428 | 0.6781 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.17.0
66
+ - Pytorch 1.11.0+cu113
67
+ - Datasets 1.17.0
68
+ - Tokenizers 0.11.6
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[REF]": 32102, "[MATH]": 32100, "[EQUATION]": 32101, "[CITATION]": 32103}
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.682648401826484,
4
+ "eval_loss": 0.14565615355968475,
5
+ "eval_runtime": 15.3703,
6
+ "eval_samples": 438,
7
+ "eval_samples_per_second": 28.496,
8
+ "eval_steps_per_second": 2.407,
9
+ "predict_accuracy": 0.686046511627907,
10
+ "predict_loss": 0.14548401534557343,
11
+ "predict_runtime": 6.6827,
12
+ "predict_samples": 430,
13
+ "predict_samples_per_second": 64.345,
14
+ "predict_steps_per_second": 5.387,
15
+ "train_loss": 0.32115679332188196,
16
+ "train_runtime": 336.3145,
17
+ "train_samples": 1254,
18
+ "train_samples_per_second": 37.287,
19
+ "train_steps_per_second": 3.122
20
+ }
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 3072,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "t5",
17
+ "n_positions": 512,
18
+ "num_decoder_layers": 12,
19
+ "num_heads": 12,
20
+ "num_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_num_buckets": 32,
24
+ "task_specific_params": {
25
+ "summarization": {
26
+ "early_stopping": true,
27
+ "length_penalty": 2.0,
28
+ "max_length": 200,
29
+ "min_length": 30,
30
+ "no_repeat_ngram_size": 3,
31
+ "num_beams": 4,
32
+ "prefix": "summarize: "
33
+ },
34
+ "translation_en_to_de": {
35
+ "early_stopping": true,
36
+ "max_length": 300,
37
+ "num_beams": 4,
38
+ "prefix": "translate English to German: "
39
+ },
40
+ "translation_en_to_fr": {
41
+ "early_stopping": true,
42
+ "max_length": 300,
43
+ "num_beams": 4,
44
+ "prefix": "translate English to French: "
45
+ },
46
+ "translation_en_to_ro": {
47
+ "early_stopping": true,
48
+ "max_length": 300,
49
+ "num_beams": 4,
50
+ "prefix": "translate English to Romanian: "
51
+ }
52
+ },
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.17.0",
55
+ "use_cache": true,
56
+ "vocab_size": 32104
57
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.682648401826484,
4
+ "eval_loss": 0.14565615355968475,
5
+ "eval_runtime": 15.3703,
6
+ "eval_samples": 438,
7
+ "eval_samples_per_second": 28.496,
8
+ "eval_steps_per_second": 2.407
9
+ }
generated_predictions.txt ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Format
2
+ Content
3
+ Lang-accurate-spefific
4
+ Content
5
+ Content
6
+ Improve-grammar-Typo
7
+ Content
8
+ Format
9
+ Format
10
+ Lang-accurate-spefific
11
+ Format
12
+ Improve-grammar-Typo
13
+ Improve-grammar-Typo
14
+ Content
15
+ Format
16
+ Content
17
+ Improve-grammar-Typo
18
+ Lang-accurate-spefific
19
+ Format
20
+ Format
21
+ Content
22
+ Improve-grammar-Typo
23
+ Lang-accurate-spefific
24
+ Improve-grammar-Typo
25
+ Improve-grammar-Typo
26
+ Format
27
+ Improve-grammar-Typo
28
+ Improve-grammar-Typo
29
+ Content
30
+ Improve-grammar-Typo
31
+ Format
32
+ Improve-grammar-Typo
33
+ Format
34
+ Lang-accurate-spefific
35
+ Content
36
+ Format
37
+ Improve-grammar-Typo
38
+ Format
39
+ Format
40
+ Lang-professional-Improve-style
41
+ Lang-accurate-spefific
42
+ Format
43
+ Content
44
+ Content
45
+ Format
46
+ Content
47
+ Format
48
+ Format
49
+ Format
50
+ Format
51
+ Improve-grammar-Typo
52
+ Format
53
+ Improve-grammar-Typo
54
+ Content
55
+ Format
56
+ Lang-accurate-spefific
57
+ Improve-grammar-Typo
58
+ Lang-improve-readability-Simplify
59
+ Content
60
+ Lang-accurate-spefific
61
+ Format
62
+ Content
63
+ Lang-accurate-spefific
64
+ Content
65
+ Lang-accurate-spefific
66
+ Lang-accurate-spefific
67
+ Lang-accurate-spefific
68
+ Format
69
+ Lang-accurate-spefific
70
+ Lang-accurate-spefific
71
+ Lang-improve-readability-Simplify
72
+ Content
73
+ Improve-grammar-Typo
74
+ Improve-grammar-Typo
75
+ Improve-grammar-Typo
76
+ Content
77
+ Improve-grammar-Typo
78
+ Format
79
+ Format
80
+ Improve-grammar-Typo
81
+ Format
82
+ Lang-accurate-spefific
83
+ Lang-accurate-spefific
84
+ Lang-accurate-spefific
85
+ Improve-grammar-Typo
86
+ Format
87
+ Content
88
+ Content
89
+ Format
90
+ Format
91
+ Improve-grammar-Typo
92
+ Improve-grammar-Typo
93
+ Content
94
+ Lang-accurate-spefific
95
+ Improve-grammar-Typo
96
+ Format
97
+ Content
98
+ Improve-grammar-Typo
99
+ Improve-grammar-Typo
100
+ Content
101
+ Format
102
+ Content
103
+ Content
104
+ Lang-professional-Improve-style
105
+ Content
106
+ Content
107
+ Content
108
+ Lang-professional-Improve-style
109
+ Improve-grammar-Typo
110
+ Format
111
+ Format
112
+ Format
113
+ Format
114
+ Format
115
+ Improve-grammar-Typo
116
+ Content
117
+ Improve-grammar-Typo
118
+ Improve-grammar-Typo
119
+ Format
120
+ Format
121
+ Improve-grammar-Typo
122
+ Content
123
+ Content
124
+ Format
125
+ Improve-grammar-Typo
126
+ Improve-grammar-Typo
127
+ Improve-grammar-Typo
128
+ Content
129
+ Improve-grammar-Typo
130
+ Content
131
+ Improve-grammar-Typo
132
+ Format
133
+ Format
134
+ Content
135
+ Content
136
+ Format
137
+ Lang-accurate-spefific
138
+ Content
139
+ Improve-grammar-Typo
140
+ Content
141
+ Content
142
+ Content
143
+ Improve-grammar-Typo
144
+ Content
145
+ Format
146
+ Improve-grammar-Typo
147
+ Improve-grammar-Typo
148
+ Lang-professional-Improve-style
149
+ Content
150
+ Content
151
+ Format
152
+ Content
153
+ Lang-improve-readability-Simplify
154
+ Content
155
+ Improve-grammar-Typo
156
+ Improve-grammar-Typo
157
+ Improve-grammar-Typo
158
+ Content
159
+ Content
160
+ Format
161
+ Improve-grammar-Typo
162
+ Format
163
+ Improve-grammar-Typo
164
+ Content
165
+ Content
166
+ Content
167
+ Improve-grammar-Typo
168
+ Content
169
+ Lang-accurate-spefific
170
+ Format
171
+ Content
172
+ Lang-accurate-spefific
173
+ Format
174
+ Format
175
+ Format
176
+ Lang-accurate-spefific
177
+ Format
178
+ Content
179
+ Content
180
+ Content
181
+ Content
182
+ Lang-accurate-spefific
183
+ Lang-professional-Improve-style
184
+ Format
185
+ Improve-grammar-Typo
186
+ Content
187
+ Improve-grammar-Typo
188
+ Format
189
+ Content
190
+ Content
191
+ Improve-grammar-Typo
192
+ Content
193
+ Format
194
+ Improve-grammar-Typo
195
+ Format
196
+ Improve-grammar-Typo
197
+ Lang-accurate-spefific
198
+ Improve-grammar-Typo
199
+ Lang-professional-Improve-style
200
+ Format
201
+ Improve-grammar-Typo
202
+ Lang-accurate-spefific
203
+ Improve-grammar-Typo
204
+ Improve-grammar-Typo
205
+ Format
206
+ Content
207
+ Format
208
+ Improve-grammar-Typo
209
+ Format
210
+ Improve-grammar-Typo
211
+ Content
212
+ Content
213
+ Improve-grammar-Typo
214
+ Improve-grammar-Typo
215
+ Format
216
+ Improve-grammar-Typo
217
+ Content
218
+ Content
219
+ Lang-accurate-spefific
220
+ Improve-grammar-Typo
221
+ Format
222
+ Content
223
+ Improve-grammar-Typo
224
+ Format
225
+ Format
226
+ Improve-grammar-Typo
227
+ Improve-grammar-Typo
228
+ Improve-grammar-Typo
229
+ Format
230
+ Improve-grammar-Typo
231
+ Format
232
+ Content
233
+ Lang-accurate-spefific
234
+ Content
235
+ Improve-grammar-Typo
236
+ Improve-grammar-Typo
237
+ Content
238
+ Content
239
+ Content
240
+ Content
241
+ Improve-grammar-Typo
242
+ Content
243
+ Format
244
+ Lang-improve-readability-Simplify
245
+ Lang-accurate-spefific
246
+ Lang-improve-readability-Simplify
247
+ Lang-accurate-spefific
248
+ Lang-accurate-spefific
249
+ Lang-accurate-spefific
250
+ Improve-grammar-Typo
251
+ Format
252
+ Improve-grammar-Typo
253
+ Improve-grammar-Typo
254
+ Improve-grammar-Typo
255
+ Content
256
+ Content
257
+ Lang-professional-Improve-style
258
+ Improve-grammar-Typo
259
+ Lang-accurate-spefific
260
+ Format
261
+ Improve-grammar-Typo
262
+ Improve-grammar-Typo
263
+ Improve-grammar-Typo
264
+ Improve-grammar-Typo
265
+ Improve-grammar-Typo
266
+ Improve-grammar-Typo
267
+ Content
268
+ Lang-accurate-spefific
269
+ Improve-grammar-Typo
270
+ Lang-improve-readability-Simplify
271
+ Content
272
+ Improve-grammar-Typo
273
+ Format
274
+ Improve-grammar-Typo
275
+ Lang-accurate-spefific
276
+ Content
277
+ Improve-grammar-Typo
278
+ Improve-grammar-Typo
279
+ Content
280
+ Format
281
+ Content
282
+ Improve-grammar-Typo
283
+ Format
284
+ Lang-professional-Improve-style
285
+ Content
286
+ Lang-accurate-spefific
287
+ Improve-grammar-Typo
288
+ Lang-accurate-spefific
289
+ Format
290
+ Content
291
+ Improve-grammar-Typo
292
+ Content
293
+ Content
294
+ Lang-accurate-spefific
295
+ Improve-grammar-Typo
296
+ Format
297
+ Content
298
+ Content
299
+ Improve-grammar-Typo
300
+ Improve-grammar-Typo
301
+ Improve-grammar-Typo
302
+ Improve-grammar-Typo
303
+ Improve-grammar-Typo
304
+ Improve-grammar-Typo
305
+ Improve-grammar-Typo
306
+ Improve-grammar-Typo
307
+ Content
308
+ Content
309
+ Improve-grammar-Typo
310
+ Content
311
+ Format
312
+ Format
313
+ Improve-grammar-Typo
314
+ Format
315
+ Content
316
+ Format
317
+ Content
318
+ Lang-improve-readability-Simplify
319
+ Content
320
+ Improve-grammar-Typo
321
+ Improve-grammar-Typo
322
+ Improve-grammar-Typo
323
+ Lang-accurate-spefific
324
+ Improve-grammar-Typo
325
+ Format
326
+ Content
327
+ Improve-grammar-Typo
328
+ Content
329
+ Lang-accurate-spefific
330
+ Lang-professional-Improve-style
331
+ Lang-improve-readability-Simplify
332
+ Lang-professional-Improve-style
333
+ Lang-improve-readability-Simplify
334
+ Lang-professional-Improve-style
335
+ Lang-professional-Improve-style
336
+ Content
337
+ Content
338
+ Lang-accurate-spefific
339
+ Format
340
+ Content
341
+ Lang-accurate-spefific
342
+ Improve-grammar-Typo
343
+ Lang-accurate-spefific
344
+ Content
345
+ Lang-accurate-spefific
346
+ Content
347
+ Content
348
+ Content
349
+ Improve-grammar-Typo
350
+ Improve-grammar-Typo
351
+ Improve-grammar-Typo
352
+ Improve-grammar-Typo
353
+ Content
354
+ Format
355
+ Format
356
+ Lang-professional-Improve-style
357
+ Lang-professional-Improve-style
358
+ Lang-improve-readability-Simplify
359
+ Content
360
+ Content
361
+ Content
362
+ Content
363
+ Lang-professional-Improve-style
364
+ Content
365
+ Lang-professional-Improve-style
366
+ Content
367
+ Format
368
+ Improve-grammar-Typo
369
+ Improve-grammar-Typo
370
+ Content
371
+ Format
372
+ Lang-accurate-spefific
373
+ Improve-grammar-Typo
374
+ Improve-grammar-Typo
375
+ Improve-grammar-Typo
376
+ Content
377
+ Content
378
+ Improve-grammar-Typo
379
+ Improve-grammar-Typo
380
+ Lang-improve-readability-Simplify
381
+ Improve-grammar-Typo
382
+ Improve-grammar-Typo
383
+ Improve-grammar-Typo
384
+ Format
385
+ Content
386
+ Lang-accurate-spefific
387
+ Format
388
+ Format
389
+ Content
390
+ Format
391
+ Improve-grammar-Typo
392
+ Improve-grammar-Typo
393
+ Improve-grammar-Typo
394
+ Improve-grammar-Typo
395
+ Improve-grammar-Typo
396
+ Improve-grammar-Typo
397
+ Content
398
+ Improve-grammar-Typo
399
+ Content
400
+ Improve-grammar-Typo
401
+ Lang-professional-Improve-style
402
+ Improve-grammar-Typo
403
+ Improve-grammar-Typo
404
+ Improve-grammar-Typo
405
+ Improve-grammar-Typo
406
+ Improve-grammar-Typo
407
+ Improve-grammar-Typo
408
+ Improve-grammar-Typo
409
+ Lang-accurate-spefific
410
+ Format
411
+ Improve-grammar-Typo
412
+ Lang-accurate-spefific
413
+ Improve-grammar-Typo
414
+ Improve-grammar-Typo
415
+ Improve-grammar-Typo
416
+ Improve-grammar-Typo
417
+ Lang-improve-readability-Simplify
418
+ Improve-grammar-Typo
419
+ Improve-grammar-Typo
420
+ Improve-grammar-Typo
421
+ Improve-grammar-Typo
422
+ Improve-grammar-Typo
423
+ Improve-grammar-Typo
424
+ Improve-grammar-Typo
425
+ Content
426
+ Format
427
+ Format
428
+ Format
429
+ Format
430
+ Format
predict_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.686046511627907,
3
+ "predict_loss": 0.14548401534557343,
4
+ "predict_runtime": 6.6827,
5
+ "predict_samples": 430,
6
+ "predict_samples_per_second": 64.345,
7
+ "predict_steps_per_second": 5.387
8
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b36741381b22cad8ba447a0c87693f71b1e183699dd7539f506b4e32aa18f9eb
3
+ size 891657151
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "t5-base", "tokenizer_class": "T5Tokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.32115679332188196,
4
+ "train_runtime": 336.3145,
5
+ "train_samples": 1254,
6
+ "train_samples_per_second": 37.287,
7
+ "train_steps_per_second": 3.122
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.682648401826484,
3
+ "best_model_checkpoint": "tmp/tst-translation353/checkpoint-840",
4
+ "epoch": 10.0,
5
+ "global_step": 1050,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.2990867579908676,
13
+ "eval_loss": 0.3042815625667572,
14
+ "eval_runtime": 6.3082,
15
+ "eval_samples_per_second": 69.434,
16
+ "eval_steps_per_second": 5.865,
17
+ "step": 105
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.3310502283105023,
22
+ "eval_loss": 0.2653220593929291,
23
+ "eval_runtime": 6.6274,
24
+ "eval_samples_per_second": 66.089,
25
+ "eval_steps_per_second": 5.583,
26
+ "step": 210
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_accuracy": 0.4726027397260274,
31
+ "eval_loss": 0.24747495353221893,
32
+ "eval_runtime": 6.6413,
33
+ "eval_samples_per_second": 65.951,
34
+ "eval_steps_per_second": 5.571,
35
+ "step": 315
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_accuracy": 0.6095890410958904,
40
+ "eval_loss": 0.17373643815517426,
41
+ "eval_runtime": 10.6832,
42
+ "eval_samples_per_second": 40.999,
43
+ "eval_steps_per_second": 3.463,
44
+ "step": 420
45
+ },
46
+ {
47
+ "epoch": 4.76,
48
+ "learning_rate": 2.6190476190476192e-05,
49
+ "loss": 0.5112,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 5.0,
54
+ "eval_accuracy": 0.6255707762557078,
55
+ "eval_loss": 0.16598360240459442,
56
+ "eval_runtime": 6.8539,
57
+ "eval_samples_per_second": 63.906,
58
+ "eval_steps_per_second": 5.398,
59
+ "step": 525
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_accuracy": 0.6575342465753424,
64
+ "eval_loss": 0.1499105989933014,
65
+ "eval_runtime": 7.0098,
66
+ "eval_samples_per_second": 62.484,
67
+ "eval_steps_per_second": 5.278,
68
+ "step": 630
69
+ },
70
+ {
71
+ "epoch": 7.0,
72
+ "eval_accuracy": 0.6438356164383562,
73
+ "eval_loss": 0.14968065917491913,
74
+ "eval_runtime": 7.4429,
75
+ "eval_samples_per_second": 58.848,
76
+ "eval_steps_per_second": 4.971,
77
+ "step": 735
78
+ },
79
+ {
80
+ "epoch": 8.0,
81
+ "eval_accuracy": 0.682648401826484,
82
+ "eval_loss": 0.14565615355968475,
83
+ "eval_runtime": 6.9321,
84
+ "eval_samples_per_second": 63.184,
85
+ "eval_steps_per_second": 5.337,
86
+ "step": 840
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_accuracy": 0.678082191780822,
91
+ "eval_loss": 0.14702993631362915,
92
+ "eval_runtime": 8.9981,
93
+ "eval_samples_per_second": 48.677,
94
+ "eval_steps_per_second": 4.112,
95
+ "step": 945
96
+ },
97
+ {
98
+ "epoch": 9.52,
99
+ "learning_rate": 2.3809523809523808e-06,
100
+ "loss": 0.151,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 10.0,
105
+ "eval_accuracy": 0.678082191780822,
106
+ "eval_loss": 0.14281360805034637,
107
+ "eval_runtime": 7.0161,
108
+ "eval_samples_per_second": 62.428,
109
+ "eval_steps_per_second": 5.274,
110
+ "step": 1050
111
+ },
112
+ {
113
+ "epoch": 10.0,
114
+ "step": 1050,
115
+ "total_flos": 2385811380510720.0,
116
+ "train_loss": 0.32115679332188196,
117
+ "train_runtime": 336.3145,
118
+ "train_samples_per_second": 37.287,
119
+ "train_steps_per_second": 3.122
120
+ }
121
+ ],
122
+ "max_steps": 1050,
123
+ "num_train_epochs": 10,
124
+ "total_flos": 2385811380510720.0,
125
+ "trial_name": null,
126
+ "trial_params": null
127
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e8e5707c72d33aa403209a25598f3b35020fdad3d4e44af24df1a2fa358774
3
+ size 3183