cloudwalkerw commited on
Commit
c098c70
·
1 Parent(s): 808d890

End of training

Browse files
Files changed (5) hide show
  1. README.md +1 -0
  2. all_results.json +12 -0
  3. eval_results.json +8 -0
  4. train_results.json +7 -0
  5. trainer_state.json +613 -0
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
 
4
  - generated_from_trainer
5
  metrics:
6
  - accuracy
 
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
4
+ - audio-classification
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.4151269197463989,
5
+ "eval_runtime": 499.658,
6
+ "eval_samples_per_second": 49.722,
7
+ "eval_steps_per_second": 24.861,
8
+ "train_loss": 0.5747933341999246,
9
+ "train_runtime": 22880.3496,
10
+ "train_samples_per_second": 11.092,
11
+ "train_steps_per_second": 0.173
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.4151269197463989,
5
+ "eval_runtime": 499.658,
6
+ "eval_samples_per_second": 49.722,
7
+ "eval_steps_per_second": 24.861
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "train_loss": 0.5747933341999246,
4
+ "train_runtime": 22880.3496,
5
+ "train_samples_per_second": 11.092,
6
+ "train_steps_per_second": 0.173
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.027737604454159737,
3
+ "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_5/checkpoint-100",
4
+ "epoch": 9.98109640831758,
5
+ "eval_steps": 100,
6
+ "global_step": 3960,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.25,
13
+ "learning_rate": 7.575757575757576e-05,
14
+ "loss": 0.3764,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.25,
19
+ "eval_accuracy": 0.9947673482530993,
20
+ "eval_loss": 0.027737604454159737,
21
+ "eval_runtime": 497.2467,
22
+ "eval_samples_per_second": 49.963,
23
+ "eval_steps_per_second": 24.982,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.5,
28
+ "learning_rate": 0.00015151515151515152,
29
+ "loss": 0.1211,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 0.5,
34
+ "eval_accuracy": 0.998108195137659,
35
+ "eval_loss": 0.02966056950390339,
36
+ "eval_runtime": 498.935,
37
+ "eval_samples_per_second": 49.794,
38
+ "eval_steps_per_second": 24.897,
39
+ "step": 200
40
+ },
41
+ {
42
+ "epoch": 0.76,
43
+ "learning_rate": 0.00022727272727272725,
44
+ "loss": 0.2525,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.76,
49
+ "eval_accuracy": 0.9168008372242795,
50
+ "eval_loss": 1.283957839012146,
51
+ "eval_runtime": 499.0992,
52
+ "eval_samples_per_second": 49.778,
53
+ "eval_steps_per_second": 24.889,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 1.01,
58
+ "learning_rate": 0.00029966329966329963,
59
+ "loss": 0.784,
60
+ "step": 400
61
+ },
62
+ {
63
+ "epoch": 1.01,
64
+ "eval_accuracy": 0.8974400257607471,
65
+ "eval_loss": 0.34426796436309814,
66
+ "eval_runtime": 499.2746,
67
+ "eval_samples_per_second": 49.76,
68
+ "eval_steps_per_second": 24.88,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 1.26,
73
+ "learning_rate": 0.00029124579124579125,
74
+ "loss": 0.6053,
75
+ "step": 500
76
+ },
77
+ {
78
+ "epoch": 1.26,
79
+ "eval_accuracy": 0.8974400257607471,
80
+ "eval_loss": 0.3957619071006775,
81
+ "eval_runtime": 499.3187,
82
+ "eval_samples_per_second": 49.756,
83
+ "eval_steps_per_second": 24.878,
84
+ "step": 500
85
+ },
86
+ {
87
+ "epoch": 1.51,
88
+ "learning_rate": 0.0002828282828282828,
89
+ "loss": 0.6038,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.51,
94
+ "eval_accuracy": 0.8974400257607471,
95
+ "eval_loss": 0.4848286509513855,
96
+ "eval_runtime": 499.175,
97
+ "eval_samples_per_second": 49.77,
98
+ "eval_steps_per_second": 24.885,
99
+ "step": 600
100
+ },
101
+ {
102
+ "epoch": 1.76,
103
+ "learning_rate": 0.0002744107744107744,
104
+ "loss": 0.5996,
105
+ "step": 700
106
+ },
107
+ {
108
+ "epoch": 1.76,
109
+ "eval_accuracy": 0.8974400257607471,
110
+ "eval_loss": 0.39541468024253845,
111
+ "eval_runtime": 499.7897,
112
+ "eval_samples_per_second": 49.709,
113
+ "eval_steps_per_second": 24.854,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 2.02,
118
+ "learning_rate": 0.00026599326599326595,
119
+ "loss": 0.5914,
120
+ "step": 800
121
+ },
122
+ {
123
+ "epoch": 2.02,
124
+ "eval_accuracy": 0.8974400257607471,
125
+ "eval_loss": 0.3970233201980591,
126
+ "eval_runtime": 499.6888,
127
+ "eval_samples_per_second": 49.719,
128
+ "eval_steps_per_second": 24.859,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 2.27,
133
+ "learning_rate": 0.00025757575757575756,
134
+ "loss": 0.6077,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 2.27,
139
+ "eval_accuracy": 0.8974400257607471,
140
+ "eval_loss": 0.4722130298614502,
141
+ "eval_runtime": 499.722,
142
+ "eval_samples_per_second": 49.716,
143
+ "eval_steps_per_second": 24.858,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.52,
148
+ "learning_rate": 0.00024915824915824913,
149
+ "loss": 0.5991,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 2.52,
154
+ "eval_accuracy": 0.8974400257607471,
155
+ "eval_loss": 0.43623748421669006,
156
+ "eval_runtime": 499.9532,
157
+ "eval_samples_per_second": 49.693,
158
+ "eval_steps_per_second": 24.846,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 2.77,
163
+ "learning_rate": 0.00024074074074074072,
164
+ "loss": 0.5813,
165
+ "step": 1100
166
+ },
167
+ {
168
+ "epoch": 2.77,
169
+ "eval_accuracy": 0.8974400257607471,
170
+ "eval_loss": 0.38707101345062256,
171
+ "eval_runtime": 499.9678,
172
+ "eval_samples_per_second": 49.691,
173
+ "eval_steps_per_second": 24.846,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 3.02,
178
+ "learning_rate": 0.0002323232323232323,
179
+ "loss": 0.5953,
180
+ "step": 1200
181
+ },
182
+ {
183
+ "epoch": 3.02,
184
+ "eval_accuracy": 0.8974400257607471,
185
+ "eval_loss": 0.40134623646736145,
186
+ "eval_runtime": 499.6897,
187
+ "eval_samples_per_second": 49.719,
188
+ "eval_steps_per_second": 24.859,
189
+ "step": 1200
190
+ },
191
+ {
192
+ "epoch": 3.28,
193
+ "learning_rate": 0.0002239057239057239,
194
+ "loss": 0.5957,
195
+ "step": 1300
196
+ },
197
+ {
198
+ "epoch": 3.28,
199
+ "eval_accuracy": 0.8974400257607471,
200
+ "eval_loss": 0.46927762031555176,
201
+ "eval_runtime": 499.7933,
202
+ "eval_samples_per_second": 49.709,
203
+ "eval_steps_per_second": 24.854,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 3.53,
208
+ "learning_rate": 0.00021548821548821544,
209
+ "loss": 0.5852,
210
+ "step": 1400
211
+ },
212
+ {
213
+ "epoch": 3.53,
214
+ "eval_accuracy": 0.8974400257607471,
215
+ "eval_loss": 0.387899786233902,
216
+ "eval_runtime": 499.9294,
217
+ "eval_samples_per_second": 49.695,
218
+ "eval_steps_per_second": 24.848,
219
+ "step": 1400
220
+ },
221
+ {
222
+ "epoch": 3.78,
223
+ "learning_rate": 0.00020707070707070703,
224
+ "loss": 0.6066,
225
+ "step": 1500
226
+ },
227
+ {
228
+ "epoch": 3.78,
229
+ "eval_accuracy": 0.8974400257607471,
230
+ "eval_loss": 0.42799168825149536,
231
+ "eval_runtime": 499.9481,
232
+ "eval_samples_per_second": 49.693,
233
+ "eval_steps_per_second": 24.847,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 4.03,
238
+ "learning_rate": 0.00019865319865319862,
239
+ "loss": 0.6085,
240
+ "step": 1600
241
+ },
242
+ {
243
+ "epoch": 4.03,
244
+ "eval_accuracy": 0.8974400257607471,
245
+ "eval_loss": 0.4358781576156616,
246
+ "eval_runtime": 499.7505,
247
+ "eval_samples_per_second": 49.713,
248
+ "eval_steps_per_second": 24.856,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 4.28,
253
+ "learning_rate": 0.00019023569023569022,
254
+ "loss": 0.5944,
255
+ "step": 1700
256
+ },
257
+ {
258
+ "epoch": 4.28,
259
+ "eval_accuracy": 0.8974400257607471,
260
+ "eval_loss": 0.41665858030319214,
261
+ "eval_runtime": 499.6545,
262
+ "eval_samples_per_second": 49.722,
263
+ "eval_steps_per_second": 24.861,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 4.54,
268
+ "learning_rate": 0.0001818181818181818,
269
+ "loss": 0.5994,
270
+ "step": 1800
271
+ },
272
+ {
273
+ "epoch": 4.54,
274
+ "eval_accuracy": 0.8974400257607471,
275
+ "eval_loss": 0.4139242172241211,
276
+ "eval_runtime": 499.4729,
277
+ "eval_samples_per_second": 49.74,
278
+ "eval_steps_per_second": 24.87,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 4.79,
283
+ "learning_rate": 0.0001734006734006734,
284
+ "loss": 0.5953,
285
+ "step": 1900
286
+ },
287
+ {
288
+ "epoch": 4.79,
289
+ "eval_accuracy": 0.8974400257607471,
290
+ "eval_loss": 0.42556148767471313,
291
+ "eval_runtime": 499.5722,
292
+ "eval_samples_per_second": 49.731,
293
+ "eval_steps_per_second": 24.865,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 5.04,
298
+ "learning_rate": 0.000164983164983165,
299
+ "loss": 0.5929,
300
+ "step": 2000
301
+ },
302
+ {
303
+ "epoch": 5.04,
304
+ "eval_accuracy": 0.8974400257607471,
305
+ "eval_loss": 0.4370674192905426,
306
+ "eval_runtime": 499.6245,
307
+ "eval_samples_per_second": 49.725,
308
+ "eval_steps_per_second": 24.863,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 5.29,
313
+ "learning_rate": 0.00015656565656565653,
314
+ "loss": 0.6067,
315
+ "step": 2100
316
+ },
317
+ {
318
+ "epoch": 5.29,
319
+ "eval_accuracy": 0.8974400257607471,
320
+ "eval_loss": 0.4254782199859619,
321
+ "eval_runtime": 499.8266,
322
+ "eval_samples_per_second": 49.705,
323
+ "eval_steps_per_second": 24.853,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 5.55,
328
+ "learning_rate": 0.00014814814814814812,
329
+ "loss": 0.5944,
330
+ "step": 2200
331
+ },
332
+ {
333
+ "epoch": 5.55,
334
+ "eval_accuracy": 0.8974400257607471,
335
+ "eval_loss": 0.41206374764442444,
336
+ "eval_runtime": 499.9874,
337
+ "eval_samples_per_second": 49.689,
338
+ "eval_steps_per_second": 24.845,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 5.8,
343
+ "learning_rate": 0.0001397306397306397,
344
+ "loss": 0.5926,
345
+ "step": 2300
346
+ },
347
+ {
348
+ "epoch": 5.8,
349
+ "eval_accuracy": 0.8974400257607471,
350
+ "eval_loss": 0.42098236083984375,
351
+ "eval_runtime": 499.6375,
352
+ "eval_samples_per_second": 49.724,
353
+ "eval_steps_per_second": 24.862,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 6.05,
358
+ "learning_rate": 0.0001313131313131313,
359
+ "loss": 0.594,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 6.05,
364
+ "eval_accuracy": 0.8974400257607471,
365
+ "eval_loss": 0.40572819113731384,
366
+ "eval_runtime": 499.6473,
367
+ "eval_samples_per_second": 49.723,
368
+ "eval_steps_per_second": 24.862,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 6.3,
373
+ "learning_rate": 0.0001228956228956229,
374
+ "loss": 0.6042,
375
+ "step": 2500
376
+ },
377
+ {
378
+ "epoch": 6.3,
379
+ "eval_accuracy": 0.8974400257607471,
380
+ "eval_loss": 0.42515799403190613,
381
+ "eval_runtime": 499.6019,
382
+ "eval_samples_per_second": 49.728,
383
+ "eval_steps_per_second": 24.864,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 6.55,
388
+ "learning_rate": 0.00011447811447811446,
389
+ "loss": 0.5971,
390
+ "step": 2600
391
+ },
392
+ {
393
+ "epoch": 6.55,
394
+ "eval_accuracy": 0.8974400257607471,
395
+ "eval_loss": 0.395828515291214,
396
+ "eval_runtime": 500.1612,
397
+ "eval_samples_per_second": 49.672,
398
+ "eval_steps_per_second": 24.836,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 6.81,
403
+ "learning_rate": 0.00010606060606060605,
404
+ "loss": 0.597,
405
+ "step": 2700
406
+ },
407
+ {
408
+ "epoch": 6.81,
409
+ "eval_accuracy": 0.8974400257607471,
410
+ "eval_loss": 0.41235631704330444,
411
+ "eval_runtime": 499.5804,
412
+ "eval_samples_per_second": 49.73,
413
+ "eval_steps_per_second": 24.865,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 7.06,
418
+ "learning_rate": 9.764309764309764e-05,
419
+ "loss": 0.5816,
420
+ "step": 2800
421
+ },
422
+ {
423
+ "epoch": 7.06,
424
+ "eval_accuracy": 0.8974400257607471,
425
+ "eval_loss": 0.41006505489349365,
426
+ "eval_runtime": 499.6536,
427
+ "eval_samples_per_second": 49.722,
428
+ "eval_steps_per_second": 24.861,
429
+ "step": 2800
430
+ },
431
+ {
432
+ "epoch": 7.31,
433
+ "learning_rate": 8.92255892255892e-05,
434
+ "loss": 0.5944,
435
+ "step": 2900
436
+ },
437
+ {
438
+ "epoch": 7.31,
439
+ "eval_accuracy": 0.8974400257607471,
440
+ "eval_loss": 0.42578616738319397,
441
+ "eval_runtime": 499.7205,
442
+ "eval_samples_per_second": 49.716,
443
+ "eval_steps_per_second": 24.858,
444
+ "step": 2900
445
+ },
446
+ {
447
+ "epoch": 7.56,
448
+ "learning_rate": 8.08080808080808e-05,
449
+ "loss": 0.6053,
450
+ "step": 3000
451
+ },
452
+ {
453
+ "epoch": 7.56,
454
+ "eval_accuracy": 0.8974400257607471,
455
+ "eval_loss": 0.4414595365524292,
456
+ "eval_runtime": 499.6011,
457
+ "eval_samples_per_second": 49.728,
458
+ "eval_steps_per_second": 24.864,
459
+ "step": 3000
460
+ },
461
+ {
462
+ "epoch": 7.81,
463
+ "learning_rate": 7.239057239057239e-05,
464
+ "loss": 0.5894,
465
+ "step": 3100
466
+ },
467
+ {
468
+ "epoch": 7.81,
469
+ "eval_accuracy": 0.8974400257607471,
470
+ "eval_loss": 0.40666893124580383,
471
+ "eval_runtime": 499.6727,
472
+ "eval_samples_per_second": 49.721,
473
+ "eval_steps_per_second": 24.86,
474
+ "step": 3100
475
+ },
476
+ {
477
+ "epoch": 8.07,
478
+ "learning_rate": 6.397306397306397e-05,
479
+ "loss": 0.5987,
480
+ "step": 3200
481
+ },
482
+ {
483
+ "epoch": 8.07,
484
+ "eval_accuracy": 0.8974400257607471,
485
+ "eval_loss": 0.41087645292282104,
486
+ "eval_runtime": 499.5755,
487
+ "eval_samples_per_second": 49.73,
488
+ "eval_steps_per_second": 24.865,
489
+ "step": 3200
490
+ },
491
+ {
492
+ "epoch": 8.32,
493
+ "learning_rate": 5.5555555555555545e-05,
494
+ "loss": 0.5846,
495
+ "step": 3300
496
+ },
497
+ {
498
+ "epoch": 8.32,
499
+ "eval_accuracy": 0.8974400257607471,
500
+ "eval_loss": 0.4095437824726105,
501
+ "eval_runtime": 499.7301,
502
+ "eval_samples_per_second": 49.715,
503
+ "eval_steps_per_second": 24.857,
504
+ "step": 3300
505
+ },
506
+ {
507
+ "epoch": 8.57,
508
+ "learning_rate": 4.7138047138047136e-05,
509
+ "loss": 0.5982,
510
+ "step": 3400
511
+ },
512
+ {
513
+ "epoch": 8.57,
514
+ "eval_accuracy": 0.8974400257607471,
515
+ "eval_loss": 0.41868552565574646,
516
+ "eval_runtime": 499.638,
517
+ "eval_samples_per_second": 49.724,
518
+ "eval_steps_per_second": 24.862,
519
+ "step": 3400
520
+ },
521
+ {
522
+ "epoch": 8.82,
523
+ "learning_rate": 3.8720538720538714e-05,
524
+ "loss": 0.5932,
525
+ "step": 3500
526
+ },
527
+ {
528
+ "epoch": 8.82,
529
+ "eval_accuracy": 0.8974400257607471,
530
+ "eval_loss": 0.4124181866645813,
531
+ "eval_runtime": 499.6303,
532
+ "eval_samples_per_second": 49.725,
533
+ "eval_steps_per_second": 24.862,
534
+ "step": 3500
535
+ },
536
+ {
537
+ "epoch": 9.07,
538
+ "learning_rate": 3.03030303030303e-05,
539
+ "loss": 0.6007,
540
+ "step": 3600
541
+ },
542
+ {
543
+ "epoch": 9.07,
544
+ "eval_accuracy": 0.8974400257607471,
545
+ "eval_loss": 0.42121654748916626,
546
+ "eval_runtime": 499.864,
547
+ "eval_samples_per_second": 49.702,
548
+ "eval_steps_per_second": 24.851,
549
+ "step": 3600
550
+ },
551
+ {
552
+ "epoch": 9.33,
553
+ "learning_rate": 2.1885521885521884e-05,
554
+ "loss": 0.6041,
555
+ "step": 3700
556
+ },
557
+ {
558
+ "epoch": 9.33,
559
+ "eval_accuracy": 0.8974400257607471,
560
+ "eval_loss": 0.4257338345050812,
561
+ "eval_runtime": 499.3339,
562
+ "eval_samples_per_second": 49.754,
563
+ "eval_steps_per_second": 24.877,
564
+ "step": 3700
565
+ },
566
+ {
567
+ "epoch": 9.58,
568
+ "learning_rate": 1.3468013468013465e-05,
569
+ "loss": 0.5859,
570
+ "step": 3800
571
+ },
572
+ {
573
+ "epoch": 9.58,
574
+ "eval_accuracy": 0.8974400257607471,
575
+ "eval_loss": 0.4176446497440338,
576
+ "eval_runtime": 499.7293,
577
+ "eval_samples_per_second": 49.715,
578
+ "eval_steps_per_second": 24.857,
579
+ "step": 3800
580
+ },
581
+ {
582
+ "epoch": 9.83,
583
+ "learning_rate": 5.05050505050505e-06,
584
+ "loss": 0.5842,
585
+ "step": 3900
586
+ },
587
+ {
588
+ "epoch": 9.83,
589
+ "eval_accuracy": 0.8974400257607471,
590
+ "eval_loss": 0.41514307260513306,
591
+ "eval_runtime": 499.5922,
592
+ "eval_samples_per_second": 49.729,
593
+ "eval_steps_per_second": 24.864,
594
+ "step": 3900
595
+ },
596
+ {
597
+ "epoch": 9.98,
598
+ "step": 3960,
599
+ "total_flos": 1.506420070651024e+19,
600
+ "train_loss": 0.5747933341999246,
601
+ "train_runtime": 22880.3496,
602
+ "train_samples_per_second": 11.092,
603
+ "train_steps_per_second": 0.173
604
+ }
605
+ ],
606
+ "logging_steps": 100,
607
+ "max_steps": 3960,
608
+ "num_train_epochs": 10,
609
+ "save_steps": 100,
610
+ "total_flos": 1.506420070651024e+19,
611
+ "trial_name": null,
612
+ "trial_params": null
613
+ }