csikasote commited on
Commit
c990fe5
·
verified ·
1 Parent(s): 9247f3e

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-swagen-combined-15hrs-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.2307
22
  - Wer: 0.1929
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - swagen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-swagen-combined-15hrs-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the SWAGEN - SWA dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.2307
25
  - Wer: 0.1929
adapter.swa.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4350638faa5551178cfcd7b463ba47ba2b135df3ab5cd07b67c8fcb18b3a1d9c
3
+ size 8865152
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.035870864886409,
3
+ "eval_loss": 0.23070356249809265,
4
+ "eval_runtime": 65.4406,
5
+ "eval_samples": 1132,
6
+ "eval_samples_per_second": 17.298,
7
+ "eval_steps_per_second": 4.325,
8
+ "eval_wer": 0.19288835915772745,
9
+ "total_flos": 5.341353862310001e+18,
10
+ "train_loss": 1.5951181411743165,
11
+ "train_runtime": 2475.2222,
12
+ "train_samples": 10036,
13
+ "train_samples_per_second": 121.638,
14
+ "train_steps_per_second": 15.199
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.035870864886409,
3
+ "eval_loss": 0.23070356249809265,
4
+ "eval_runtime": 65.4406,
5
+ "eval_samples": 1132,
6
+ "eval_samples_per_second": 17.298,
7
+ "eval_steps_per_second": 4.325,
8
+ "eval_wer": 0.19288835915772745
9
+ }
runs/Jan03_09-57-35_srvrocgpu011.uct.ac.za/events.out.tfevents.1735893906.srvrocgpu011.uct.ac.za ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee5f532fcc7dceb4e810e72cfbec17aebb0644734cf575ffd5bd464b6abfe3b
3
+ size 40
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.035870864886409,
3
+ "total_flos": 5.341353862310001e+18,
4
+ "train_loss": 1.5951181411743165,
5
+ "train_runtime": 2475.2222,
6
+ "train_samples": 10036,
7
+ "train_samples_per_second": 121.638,
8
+ "train_steps_per_second": 15.199
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2293323278427124,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-swagen-combined-15hrs-model/checkpoint-900",
4
+ "epoch": 1.035870864886409,
5
+ "eval_steps": 100,
6
+ "global_step": 1300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07971303308090873,
13
+ "grad_norm": 4.197810173034668,
14
+ "learning_rate": 0.000279,
15
+ "loss": 14.8801,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.07971303308090873,
20
+ "eval_loss": 0.7376943230628967,
21
+ "eval_runtime": 64.6778,
22
+ "eval_samples_per_second": 17.502,
23
+ "eval_steps_per_second": 4.376,
24
+ "eval_wer": 0.44259038537941997,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.15942606616181745,
29
+ "grad_norm": 4.988656997680664,
30
+ "learning_rate": 0.0002992563965884861,
31
+ "loss": 0.6766,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.15942606616181745,
36
+ "eval_loss": 0.2687693238258362,
37
+ "eval_runtime": 64.5932,
38
+ "eval_samples_per_second": 17.525,
39
+ "eval_steps_per_second": 4.381,
40
+ "eval_wer": 0.20063567739372268,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.2391390992427262,
45
+ "grad_norm": 3.365363836288452,
46
+ "learning_rate": 0.0002984568230277185,
47
+ "loss": 0.5153,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.2391390992427262,
52
+ "eval_loss": 0.24840499460697174,
53
+ "eval_runtime": 64.6789,
54
+ "eval_samples_per_second": 17.502,
55
+ "eval_steps_per_second": 4.375,
56
+ "eval_wer": 0.19745729042510926,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.3188521323236349,
61
+ "grad_norm": 2.4677116870880127,
62
+ "learning_rate": 0.00029765724946695095,
63
+ "loss": 0.526,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 0.3188521323236349,
68
+ "eval_loss": 0.23976168036460876,
69
+ "eval_runtime": 64.8543,
70
+ "eval_samples_per_second": 17.455,
71
+ "eval_steps_per_second": 4.364,
72
+ "eval_wer": 0.19487485101311083,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.3985651654045436,
77
+ "grad_norm": 3.4616479873657227,
78
+ "learning_rate": 0.0002968576759061834,
79
+ "loss": 0.4874,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.3985651654045436,
84
+ "eval_loss": 0.23978756368160248,
85
+ "eval_runtime": 65.3676,
86
+ "eval_samples_per_second": 17.317,
87
+ "eval_steps_per_second": 4.329,
88
+ "eval_wer": 0.19576877234803336,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 0.4782781984854524,
93
+ "grad_norm": 1.9864723682403564,
94
+ "learning_rate": 0.00029605810234541576,
95
+ "loss": 0.4666,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 0.4782781984854524,
100
+ "eval_loss": 0.2357860654592514,
101
+ "eval_runtime": 64.7957,
102
+ "eval_samples_per_second": 17.47,
103
+ "eval_steps_per_second": 4.368,
104
+ "eval_wer": 0.19090186730234407,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.5579912315663611,
109
+ "grad_norm": 2.5512609481811523,
110
+ "learning_rate": 0.00029525852878464813,
111
+ "loss": 0.4406,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 0.5579912315663611,
116
+ "eval_loss": 0.2390868365764618,
117
+ "eval_runtime": 64.7802,
118
+ "eval_samples_per_second": 17.474,
119
+ "eval_steps_per_second": 4.369,
120
+ "eval_wer": 0.194378228049265,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 0.6377042646472698,
125
+ "grad_norm": 6.3221330642700195,
126
+ "learning_rate": 0.00029445895522388056,
127
+ "loss": 0.4689,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 0.6377042646472698,
132
+ "eval_loss": 0.23335325717926025,
133
+ "eval_runtime": 64.7754,
134
+ "eval_samples_per_second": 17.476,
135
+ "eval_steps_per_second": 4.369,
136
+ "eval_wer": 0.19259038537941994,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 0.7174172977281785,
141
+ "grad_norm": 2.1293585300445557,
142
+ "learning_rate": 0.000293659381663113,
143
+ "loss": 0.462,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 0.7174172977281785,
148
+ "eval_loss": 0.2293323278427124,
149
+ "eval_runtime": 65.6055,
150
+ "eval_samples_per_second": 17.255,
151
+ "eval_steps_per_second": 4.314,
152
+ "eval_wer": 0.19268970997218912,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 0.7971303308090872,
157
+ "grad_norm": 6.672135353088379,
158
+ "learning_rate": 0.00029285980810234537,
159
+ "loss": 0.4407,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 0.7971303308090872,
164
+ "eval_loss": 0.22934316098690033,
165
+ "eval_runtime": 65.0636,
166
+ "eval_samples_per_second": 17.398,
167
+ "eval_steps_per_second": 4.35,
168
+ "eval_wer": 0.19308700834326578,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 0.8768433638899961,
173
+ "grad_norm": 7.573569297790527,
174
+ "learning_rate": 0.0002920602345415778,
175
+ "loss": 0.4567,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 0.8768433638899961,
180
+ "eval_loss": 0.22979336977005005,
181
+ "eval_runtime": 65.234,
182
+ "eval_samples_per_second": 17.353,
183
+ "eval_steps_per_second": 4.338,
184
+ "eval_wer": 0.19278903456495827,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 0.9565563969709048,
189
+ "grad_norm": 2.2756216526031494,
190
+ "learning_rate": 0.00029126066098081023,
191
+ "loss": 0.4711,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 0.9565563969709048,
196
+ "eval_loss": 0.23050223290920258,
197
+ "eval_runtime": 65.3786,
198
+ "eval_samples_per_second": 17.315,
199
+ "eval_steps_per_second": 4.329,
200
+ "eval_wer": 0.19715931664680175,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 1.035870864886409,
205
+ "grad_norm": 1.712958574295044,
206
+ "learning_rate": 0.0002904610874200426,
207
+ "loss": 0.4444,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 1.035870864886409,
212
+ "eval_loss": 0.23069703578948975,
213
+ "eval_runtime": 66.0724,
214
+ "eval_samples_per_second": 17.133,
215
+ "eval_steps_per_second": 4.283,
216
+ "eval_wer": 0.19288835915772745,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 1.035870864886409,
221
+ "step": 1300,
222
+ "total_flos": 5.341353862310001e+18,
223
+ "train_loss": 1.5951181411743165,
224
+ "train_runtime": 2475.2222,
225
+ "train_samples_per_second": 121.638,
226
+ "train_steps_per_second": 15.199
227
+ }
228
+ ],
229
+ "logging_steps": 100,
230
+ "max_steps": 37620,
231
+ "num_input_tokens_seen": 0,
232
+ "num_train_epochs": 30,
233
+ "save_steps": 400,
234
+ "stateful_callbacks": {
235
+ "EarlyStoppingCallback": {
236
+ "args": {
237
+ "early_stopping_patience": 4,
238
+ "early_stopping_threshold": 0.0
239
+ },
240
+ "attributes": {
241
+ "early_stopping_patience_counter": 3
242
+ }
243
+ },
244
+ "TrainerControl": {
245
+ "args": {
246
+ "should_epoch_stop": false,
247
+ "should_evaluate": false,
248
+ "should_log": false,
249
+ "should_save": true,
250
+ "should_training_stop": false
251
+ },
252
+ "attributes": {}
253
+ }
254
+ },
255
+ "total_flos": 5.341353862310001e+18,
256
+ "train_batch_size": 4,
257
+ "trial_name": null,
258
+ "trial_params": null
259
+ }