learn3r commited on
Commit
c06f4cb
·
1 Parent(s): 6925f1b

End of training

Browse files
Files changed (5) hide show
  1. README.md +21 -8
  2. all_results.json +18 -0
  3. eval_results.json +13 -0
  4. train_results.json +8 -0
  5. trainer_state.json +578 -0
README.md CHANGED
@@ -1,11 +1,24 @@
1
  ---
 
2
  tags:
3
  - generated_from_trainer
 
 
4
  metrics:
5
  - rouge
6
  model-index:
7
  - name: longt5_xl_summ_screen_memsum_bp_30
8
- results: []
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -13,14 +26,14 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # longt5_xl_summ_screen_memsum_bp_30
15
 
16
- This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 2.9815
19
- - Rouge1: 46.6542
20
- - Rouge2: 17.8515
21
- - Rougel: 28.146
22
- - Rougelsum: 45.0274
23
- - Gen Len: 337.4822
24
 
25
  ## Model description
26
 
 
1
  ---
2
+ base_model: longt5_xl_summ_screen_memsum_bp_20/checkpoint-140
3
  tags:
4
  - generated_from_trainer
5
+ datasets:
6
+ - learn3r/summ_screen_fd_memsum_bp
7
  metrics:
8
  - rouge
9
  model-index:
10
  - name: longt5_xl_summ_screen_memsum_bp_30
11
+ results:
12
+ - task:
13
+ name: Summarization
14
+ type: summarization
15
+ dataset:
16
+ name: learn3r/summ_screen_fd_memsum_bp
17
+ type: learn3r/summ_screen_fd_memsum_bp
18
+ metrics:
19
+ - name: Rouge1
20
+ type: rouge
21
+ value: 47.1842
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # longt5_xl_summ_screen_memsum_bp_30
28
 
29
+ This model is a fine-tuned version of [longt5_xl_summ_screen_memsum_bp_20/checkpoint-140](https://huggingface.co/longt5_xl_summ_screen_memsum_bp_20/checkpoint-140) on the learn3r/summ_screen_fd_memsum_bp dataset.
30
  It achieves the following results on the evaluation set:
31
+ - Loss: 2.6817
32
+ - Rouge1: 47.1842
33
+ - Rouge2: 18.22
34
+ - Rougel: 28.4626
35
+ - Rougelsum: 45.5778
36
+ - Gen Len: 308.9083
37
 
38
  ## Model description
39
 
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.74,
3
+ "eval_gen_len": 308.9082840236686,
4
+ "eval_loss": 2.681736707687378,
5
+ "eval_rouge1": 47.1842,
6
+ "eval_rouge2": 18.22,
7
+ "eval_rougeL": 28.4626,
8
+ "eval_rougeLsum": 45.5778,
9
+ "eval_runtime": 769.5222,
10
+ "eval_samples": 338,
11
+ "eval_samples_per_second": 0.439,
12
+ "eval_steps_per_second": 0.056,
13
+ "train_loss": 0.045875947869249756,
14
+ "train_runtime": 15096.5898,
15
+ "train_samples": 3673,
16
+ "train_samples_per_second": 2.433,
17
+ "train_steps_per_second": 0.009
18
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.74,
3
+ "eval_gen_len": 308.9082840236686,
4
+ "eval_loss": 2.681736707687378,
5
+ "eval_rouge1": 47.1842,
6
+ "eval_rouge2": 18.22,
7
+ "eval_rougeL": 28.4626,
8
+ "eval_rougeLsum": 45.5778,
9
+ "eval_runtime": 769.5222,
10
+ "eval_samples": 338,
11
+ "eval_samples_per_second": 0.439,
12
+ "eval_steps_per_second": 0.056
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.74,
3
+ "train_loss": 0.045875947869249756,
4
+ "train_runtime": 15096.5898,
5
+ "train_samples": 3673,
6
+ "train_samples_per_second": 2.433,
7
+ "train_steps_per_second": 0.009
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.681736707687378,
3
+ "best_model_checkpoint": "longt5_xl_summ_screen_memsum_bp_30/checkpoint-100",
4
+ "epoch": 9.73913043478261,
5
+ "eval_steps": 500,
6
+ "global_step": 140,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.14,
13
+ "learning_rate": 0.001,
14
+ "loss": 0.0739,
15
+ "step": 2
16
+ },
17
+ {
18
+ "epoch": 0.28,
19
+ "learning_rate": 0.001,
20
+ "loss": 0.0732,
21
+ "step": 4
22
+ },
23
+ {
24
+ "epoch": 0.42,
25
+ "learning_rate": 0.001,
26
+ "loss": 0.0657,
27
+ "step": 6
28
+ },
29
+ {
30
+ "epoch": 0.56,
31
+ "learning_rate": 0.001,
32
+ "loss": 0.0699,
33
+ "step": 8
34
+ },
35
+ {
36
+ "epoch": 0.7,
37
+ "learning_rate": 0.001,
38
+ "loss": 0.0667,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.83,
43
+ "learning_rate": 0.001,
44
+ "loss": 0.0705,
45
+ "step": 12
46
+ },
47
+ {
48
+ "epoch": 0.97,
49
+ "learning_rate": 0.001,
50
+ "loss": 0.0707,
51
+ "step": 14
52
+ },
53
+ {
54
+ "epoch": 0.97,
55
+ "eval_gen_len": 453.5295857988166,
56
+ "eval_loss": 2.709702730178833,
57
+ "eval_rouge1": 41.4751,
58
+ "eval_rouge2": 15.5831,
59
+ "eval_rougeL": 25.1976,
60
+ "eval_rougeLsum": 39.9229,
61
+ "eval_runtime": 757.2273,
62
+ "eval_samples_per_second": 0.446,
63
+ "eval_steps_per_second": 0.057,
64
+ "step": 14
65
+ },
66
+ {
67
+ "epoch": 1.11,
68
+ "learning_rate": 0.001,
69
+ "loss": 0.0649,
70
+ "step": 16
71
+ },
72
+ {
73
+ "epoch": 1.25,
74
+ "learning_rate": 0.001,
75
+ "loss": 0.0535,
76
+ "step": 18
77
+ },
78
+ {
79
+ "epoch": 1.39,
80
+ "learning_rate": 0.001,
81
+ "loss": 0.0715,
82
+ "step": 20
83
+ },
84
+ {
85
+ "epoch": 1.53,
86
+ "learning_rate": 0.001,
87
+ "loss": 0.0731,
88
+ "step": 22
89
+ },
90
+ {
91
+ "epoch": 1.67,
92
+ "learning_rate": 0.001,
93
+ "loss": 0.0931,
94
+ "step": 24
95
+ },
96
+ {
97
+ "epoch": 1.81,
98
+ "learning_rate": 0.001,
99
+ "loss": 0.0583,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.95,
104
+ "learning_rate": 0.001,
105
+ "loss": 0.0608,
106
+ "step": 28
107
+ },
108
+ {
109
+ "epoch": 1.95,
110
+ "eval_gen_len": 387.41715976331363,
111
+ "eval_loss": 2.7271180152893066,
112
+ "eval_rouge1": 45.691,
113
+ "eval_rouge2": 17.905,
114
+ "eval_rougeL": 27.9519,
115
+ "eval_rougeLsum": 43.8787,
116
+ "eval_runtime": 765.753,
117
+ "eval_samples_per_second": 0.441,
118
+ "eval_steps_per_second": 0.056,
119
+ "step": 28
120
+ },
121
+ {
122
+ "epoch": 2.09,
123
+ "learning_rate": 0.001,
124
+ "loss": 0.0521,
125
+ "step": 30
126
+ },
127
+ {
128
+ "epoch": 2.23,
129
+ "learning_rate": 0.001,
130
+ "loss": 0.0396,
131
+ "step": 32
132
+ },
133
+ {
134
+ "epoch": 2.37,
135
+ "learning_rate": 0.001,
136
+ "loss": 0.0505,
137
+ "step": 34
138
+ },
139
+ {
140
+ "epoch": 2.5,
141
+ "learning_rate": 0.001,
142
+ "loss": 0.0496,
143
+ "step": 36
144
+ },
145
+ {
146
+ "epoch": 2.64,
147
+ "learning_rate": 0.001,
148
+ "loss": 0.0591,
149
+ "step": 38
150
+ },
151
+ {
152
+ "epoch": 2.78,
153
+ "learning_rate": 0.001,
154
+ "loss": 0.061,
155
+ "step": 40
156
+ },
157
+ {
158
+ "epoch": 2.92,
159
+ "learning_rate": 0.001,
160
+ "loss": 0.0851,
161
+ "step": 42
162
+ },
163
+ {
164
+ "epoch": 2.99,
165
+ "eval_gen_len": 261.56804733727813,
166
+ "eval_loss": 3.0001442432403564,
167
+ "eval_rouge1": 47.1647,
168
+ "eval_rouge2": 17.8993,
169
+ "eval_rougeL": 28.7561,
170
+ "eval_rougeLsum": 45.661,
171
+ "eval_runtime": 691.7097,
172
+ "eval_samples_per_second": 0.489,
173
+ "eval_steps_per_second": 0.062,
174
+ "step": 43
175
+ },
176
+ {
177
+ "epoch": 3.06,
178
+ "learning_rate": 0.001,
179
+ "loss": 0.0519,
180
+ "step": 44
181
+ },
182
+ {
183
+ "epoch": 3.2,
184
+ "learning_rate": 0.001,
185
+ "loss": 0.0481,
186
+ "step": 46
187
+ },
188
+ {
189
+ "epoch": 3.34,
190
+ "learning_rate": 0.001,
191
+ "loss": 0.0535,
192
+ "step": 48
193
+ },
194
+ {
195
+ "epoch": 3.48,
196
+ "learning_rate": 0.001,
197
+ "loss": 0.0404,
198
+ "step": 50
199
+ },
200
+ {
201
+ "epoch": 3.62,
202
+ "learning_rate": 0.001,
203
+ "loss": 0.0468,
204
+ "step": 52
205
+ },
206
+ {
207
+ "epoch": 3.76,
208
+ "learning_rate": 0.001,
209
+ "loss": 0.0535,
210
+ "step": 54
211
+ },
212
+ {
213
+ "epoch": 3.9,
214
+ "learning_rate": 0.001,
215
+ "loss": 0.0697,
216
+ "step": 56
217
+ },
218
+ {
219
+ "epoch": 3.97,
220
+ "eval_gen_len": 365.3047337278106,
221
+ "eval_loss": 2.9297378063201904,
222
+ "eval_rouge1": 46.6892,
223
+ "eval_rouge2": 17.8922,
224
+ "eval_rougeL": 28.0724,
225
+ "eval_rougeLsum": 44.8821,
226
+ "eval_runtime": 770.7359,
227
+ "eval_samples_per_second": 0.439,
228
+ "eval_steps_per_second": 0.056,
229
+ "step": 57
230
+ },
231
+ {
232
+ "epoch": 4.03,
233
+ "learning_rate": 0.001,
234
+ "loss": 0.0371,
235
+ "step": 58
236
+ },
237
+ {
238
+ "epoch": 4.17,
239
+ "learning_rate": 0.001,
240
+ "loss": 0.0306,
241
+ "step": 60
242
+ },
243
+ {
244
+ "epoch": 4.31,
245
+ "learning_rate": 0.001,
246
+ "loss": 0.0285,
247
+ "step": 62
248
+ },
249
+ {
250
+ "epoch": 4.45,
251
+ "learning_rate": 0.001,
252
+ "loss": 0.033,
253
+ "step": 64
254
+ },
255
+ {
256
+ "epoch": 4.59,
257
+ "learning_rate": 0.001,
258
+ "loss": 0.0411,
259
+ "step": 66
260
+ },
261
+ {
262
+ "epoch": 4.73,
263
+ "learning_rate": 0.001,
264
+ "loss": 0.0326,
265
+ "step": 68
266
+ },
267
+ {
268
+ "epoch": 4.87,
269
+ "learning_rate": 0.001,
270
+ "loss": 0.0296,
271
+ "step": 70
272
+ },
273
+ {
274
+ "epoch": 4.94,
275
+ "eval_gen_len": 440.6390532544379,
276
+ "eval_loss": 2.901674270629883,
277
+ "eval_rouge1": 44.2702,
278
+ "eval_rouge2": 17.7874,
279
+ "eval_rougeL": 26.7598,
280
+ "eval_rougeLsum": 42.6857,
281
+ "eval_runtime": 764.3617,
282
+ "eval_samples_per_second": 0.442,
283
+ "eval_steps_per_second": 0.056,
284
+ "step": 71
285
+ },
286
+ {
287
+ "epoch": 5.01,
288
+ "learning_rate": 0.001,
289
+ "loss": 0.0384,
290
+ "step": 72
291
+ },
292
+ {
293
+ "epoch": 5.15,
294
+ "learning_rate": 0.001,
295
+ "loss": 0.0429,
296
+ "step": 74
297
+ },
298
+ {
299
+ "epoch": 5.29,
300
+ "learning_rate": 0.001,
301
+ "loss": 0.0459,
302
+ "step": 76
303
+ },
304
+ {
305
+ "epoch": 5.43,
306
+ "learning_rate": 0.001,
307
+ "loss": 0.0336,
308
+ "step": 78
309
+ },
310
+ {
311
+ "epoch": 5.57,
312
+ "learning_rate": 0.001,
313
+ "loss": 0.0406,
314
+ "step": 80
315
+ },
316
+ {
317
+ "epoch": 5.7,
318
+ "learning_rate": 0.001,
319
+ "loss": 0.0361,
320
+ "step": 82
321
+ },
322
+ {
323
+ "epoch": 5.84,
324
+ "learning_rate": 0.001,
325
+ "loss": 0.0394,
326
+ "step": 84
327
+ },
328
+ {
329
+ "epoch": 5.98,
330
+ "learning_rate": 0.001,
331
+ "loss": 0.0312,
332
+ "step": 86
333
+ },
334
+ {
335
+ "epoch": 5.98,
336
+ "eval_gen_len": 306.6715976331361,
337
+ "eval_loss": 3.04889178276062,
338
+ "eval_rouge1": 47.7884,
339
+ "eval_rouge2": 18.1788,
340
+ "eval_rougeL": 28.6688,
341
+ "eval_rougeLsum": 46.0744,
342
+ "eval_runtime": 746.8476,
343
+ "eval_samples_per_second": 0.453,
344
+ "eval_steps_per_second": 0.058,
345
+ "step": 86
346
+ },
347
+ {
348
+ "epoch": 6.12,
349
+ "learning_rate": 0.001,
350
+ "loss": 0.0276,
351
+ "step": 88
352
+ },
353
+ {
354
+ "epoch": 6.26,
355
+ "learning_rate": 0.001,
356
+ "loss": 0.0338,
357
+ "step": 90
358
+ },
359
+ {
360
+ "epoch": 6.4,
361
+ "learning_rate": 0.001,
362
+ "loss": 0.0487,
363
+ "step": 92
364
+ },
365
+ {
366
+ "epoch": 6.54,
367
+ "learning_rate": 0.001,
368
+ "loss": 0.0379,
369
+ "step": 94
370
+ },
371
+ {
372
+ "epoch": 6.68,
373
+ "learning_rate": 0.001,
374
+ "loss": 0.0309,
375
+ "step": 96
376
+ },
377
+ {
378
+ "epoch": 6.82,
379
+ "learning_rate": 0.001,
380
+ "loss": 0.0366,
381
+ "step": 98
382
+ },
383
+ {
384
+ "epoch": 6.96,
385
+ "learning_rate": 0.001,
386
+ "loss": 0.0383,
387
+ "step": 100
388
+ },
389
+ {
390
+ "epoch": 6.96,
391
+ "eval_gen_len": 308.9082840236686,
392
+ "eval_loss": 2.681736707687378,
393
+ "eval_rouge1": 47.1842,
394
+ "eval_rouge2": 18.22,
395
+ "eval_rougeL": 28.4626,
396
+ "eval_rougeLsum": 45.5778,
397
+ "eval_runtime": 764.2847,
398
+ "eval_samples_per_second": 0.442,
399
+ "eval_steps_per_second": 0.056,
400
+ "step": 100
401
+ },
402
+ {
403
+ "epoch": 7.1,
404
+ "learning_rate": 0.001,
405
+ "loss": 0.0474,
406
+ "step": 102
407
+ },
408
+ {
409
+ "epoch": 7.23,
410
+ "learning_rate": 0.001,
411
+ "loss": 0.0376,
412
+ "step": 104
413
+ },
414
+ {
415
+ "epoch": 7.37,
416
+ "learning_rate": 0.001,
417
+ "loss": 0.0535,
418
+ "step": 106
419
+ },
420
+ {
421
+ "epoch": 7.51,
422
+ "learning_rate": 0.001,
423
+ "loss": 0.036,
424
+ "step": 108
425
+ },
426
+ {
427
+ "epoch": 7.65,
428
+ "learning_rate": 0.001,
429
+ "loss": 0.0339,
430
+ "step": 110
431
+ },
432
+ {
433
+ "epoch": 7.79,
434
+ "learning_rate": 0.001,
435
+ "loss": 0.0557,
436
+ "step": 112
437
+ },
438
+ {
439
+ "epoch": 7.93,
440
+ "learning_rate": 0.001,
441
+ "loss": 0.0367,
442
+ "step": 114
443
+ },
444
+ {
445
+ "epoch": 8.0,
446
+ "eval_gen_len": 227.8550295857988,
447
+ "eval_loss": 3.0245203971862793,
448
+ "eval_rouge1": 45.5573,
449
+ "eval_rouge2": 17.2161,
450
+ "eval_rougeL": 28.0573,
451
+ "eval_rougeLsum": 43.7772,
452
+ "eval_runtime": 668.0023,
453
+ "eval_samples_per_second": 0.506,
454
+ "eval_steps_per_second": 0.064,
455
+ "step": 115
456
+ },
457
+ {
458
+ "epoch": 8.07,
459
+ "learning_rate": 0.001,
460
+ "loss": 0.033,
461
+ "step": 116
462
+ },
463
+ {
464
+ "epoch": 8.21,
465
+ "learning_rate": 0.001,
466
+ "loss": 0.0379,
467
+ "step": 118
468
+ },
469
+ {
470
+ "epoch": 8.35,
471
+ "learning_rate": 0.001,
472
+ "loss": 0.0274,
473
+ "step": 120
474
+ },
475
+ {
476
+ "epoch": 8.49,
477
+ "learning_rate": 0.001,
478
+ "loss": 0.026,
479
+ "step": 122
480
+ },
481
+ {
482
+ "epoch": 8.63,
483
+ "learning_rate": 0.001,
484
+ "loss": 0.0269,
485
+ "step": 124
486
+ },
487
+ {
488
+ "epoch": 8.77,
489
+ "learning_rate": 0.001,
490
+ "loss": 0.0314,
491
+ "step": 126
492
+ },
493
+ {
494
+ "epoch": 8.9,
495
+ "learning_rate": 0.001,
496
+ "loss": 0.04,
497
+ "step": 128
498
+ },
499
+ {
500
+ "epoch": 8.97,
501
+ "eval_gen_len": 429.8757396449704,
502
+ "eval_loss": 3.2872769832611084,
503
+ "eval_rouge1": 44.0164,
504
+ "eval_rouge2": 17.1682,
505
+ "eval_rougeL": 26.4769,
506
+ "eval_rougeLsum": 42.3752,
507
+ "eval_runtime": 779.6058,
508
+ "eval_samples_per_second": 0.434,
509
+ "eval_steps_per_second": 0.055,
510
+ "step": 129
511
+ },
512
+ {
513
+ "epoch": 9.04,
514
+ "learning_rate": 0.001,
515
+ "loss": 0.0288,
516
+ "step": 130
517
+ },
518
+ {
519
+ "epoch": 9.18,
520
+ "learning_rate": 0.001,
521
+ "loss": 0.0252,
522
+ "step": 132
523
+ },
524
+ {
525
+ "epoch": 9.32,
526
+ "learning_rate": 0.001,
527
+ "loss": 0.0262,
528
+ "step": 134
529
+ },
530
+ {
531
+ "epoch": 9.46,
532
+ "learning_rate": 0.001,
533
+ "loss": 0.0297,
534
+ "step": 136
535
+ },
536
+ {
537
+ "epoch": 9.6,
538
+ "learning_rate": 0.001,
539
+ "loss": 0.0286,
540
+ "step": 138
541
+ },
542
+ {
543
+ "epoch": 9.74,
544
+ "learning_rate": 0.001,
545
+ "loss": 0.028,
546
+ "step": 140
547
+ },
548
+ {
549
+ "epoch": 9.74,
550
+ "eval_gen_len": 337.4822485207101,
551
+ "eval_loss": 2.9815244674682617,
552
+ "eval_rouge1": 46.6542,
553
+ "eval_rouge2": 17.8515,
554
+ "eval_rougeL": 28.146,
555
+ "eval_rougeLsum": 45.0274,
556
+ "eval_runtime": 765.5763,
557
+ "eval_samples_per_second": 0.441,
558
+ "eval_steps_per_second": 0.056,
559
+ "step": 140
560
+ },
561
+ {
562
+ "epoch": 9.74,
563
+ "step": 140,
564
+ "total_flos": 2.7436549259892326e+17,
565
+ "train_loss": 0.045875947869249756,
566
+ "train_runtime": 15096.5898,
567
+ "train_samples_per_second": 2.433,
568
+ "train_steps_per_second": 0.009
569
+ }
570
+ ],
571
+ "logging_steps": 2,
572
+ "max_steps": 140,
573
+ "num_train_epochs": 10,
574
+ "save_steps": 500,
575
+ "total_flos": 2.7436549259892326e+17,
576
+ "trial_name": null,
577
+ "trial_params": null
578
+ }