csikasote commited on
Commit
cd9d645
·
verified ·
1 Parent(s): c8a06d2

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -3
  2. all_results.json +15 -0
  3. eval_results.json +9 -0
  4. train_results.json +9 -0
  5. trainer_state.json +281 -0
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: mit
4
  base_model: facebook/w2v-bert-2.0
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # w2v-bert-bem-genbed-m-model
18
 
19
- This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4253
22
- - Wer: 0.5296
23
 
24
  ## Model description
25
 
 
3
  license: mit
4
  base_model: facebook/w2v-bert-2.0
5
  tags:
6
+ - automatic-speech-recognition
7
+ - genbed
8
  - generated_from_trainer
9
  metrics:
10
  - wer
 
18
 
19
  # w2v-bert-bem-genbed-m-model
20
 
21
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the GENBED - BEM dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4168
24
+ - Wer: 0.5478
25
 
26
  ## Model description
27
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.019283746556473,
3
+ "eval_loss": 0.4167534410953522,
4
+ "eval_runtime": 40.9232,
5
+ "eval_samples": 969,
6
+ "eval_samples_per_second": 23.679,
7
+ "eval_steps_per_second": 2.981,
8
+ "eval_wer": 0.5478185181155478,
9
+ "total_flos": 5.988611611546594e+18,
10
+ "train_loss": 0.38784495496749877,
11
+ "train_runtime": 3222.307,
12
+ "train_samples": 2900,
13
+ "train_samples_per_second": 26.999,
14
+ "train_steps_per_second": 1.685
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.019283746556473,
3
+ "eval_loss": 0.4167534410953522,
4
+ "eval_runtime": 40.9232,
5
+ "eval_samples": 969,
6
+ "eval_samples_per_second": 23.679,
7
+ "eval_steps_per_second": 2.981,
8
+ "eval_wer": 0.5478185181155478
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.019283746556473,
3
+ "total_flos": 5.988611611546594e+18,
4
+ "train_loss": 0.38784495496749877,
5
+ "train_runtime": 3222.307,
6
+ "train_samples": 2900,
7
+ "train_samples_per_second": 26.999,
8
+ "train_steps_per_second": 1.685
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4167534410953522,
3
+ "best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-genbed-m-model/checkpoint-1400",
4
+ "epoch": 11.019283746556473,
5
+ "eval_steps": 200,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5509641873278237,
13
+ "grad_norm": 2.760388135910034,
14
+ "learning_rate": 0.000294,
15
+ "loss": 1.5066,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 1.1019283746556474,
20
+ "grad_norm": 1.6717568635940552,
21
+ "learning_rate": 0.0002945403377110694,
22
+ "loss": 0.7215,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 1.1019283746556474,
27
+ "eval_loss": 0.6149704456329346,
28
+ "eval_runtime": 41.354,
29
+ "eval_samples_per_second": 23.432,
30
+ "eval_steps_per_second": 2.95,
31
+ "eval_wer": 0.743009465781743,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 1.6528925619834711,
36
+ "grad_norm": 1.9287854433059692,
37
+ "learning_rate": 0.00028891181988742964,
38
+ "loss": 0.597,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 2.203856749311295,
43
+ "grad_norm": 1.757243275642395,
44
+ "learning_rate": 0.00028328330206378984,
45
+ "loss": 0.5519,
46
+ "step": 400
47
+ },
48
+ {
49
+ "epoch": 2.203856749311295,
50
+ "eval_loss": 0.5605354309082031,
51
+ "eval_runtime": 41.456,
52
+ "eval_samples_per_second": 23.374,
53
+ "eval_steps_per_second": 2.943,
54
+ "eval_wer": 0.7115656620607116,
55
+ "step": 400
56
+ },
57
+ {
58
+ "epoch": 2.7548209366391183,
59
+ "grad_norm": 1.5606329441070557,
60
+ "learning_rate": 0.0002776547842401501,
61
+ "loss": 0.485,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 3.3057851239669422,
66
+ "grad_norm": 0.9466643333435059,
67
+ "learning_rate": 0.0002720262664165103,
68
+ "loss": 0.4346,
69
+ "step": 600
70
+ },
71
+ {
72
+ "epoch": 3.3057851239669422,
73
+ "eval_loss": 0.4709364175796509,
74
+ "eval_runtime": 41.6444,
75
+ "eval_samples_per_second": 23.268,
76
+ "eval_steps_per_second": 2.93,
77
+ "eval_wer": 0.6377978457186378,
78
+ "step": 600
79
+ },
80
+ {
81
+ "epoch": 3.8567493112947657,
82
+ "grad_norm": 0.8296416997909546,
83
+ "learning_rate": 0.0002663977485928705,
84
+ "loss": 0.3911,
85
+ "step": 700
86
+ },
87
+ {
88
+ "epoch": 4.40771349862259,
89
+ "grad_norm": 1.098000407218933,
90
+ "learning_rate": 0.00026076923076923076,
91
+ "loss": 0.3545,
92
+ "step": 800
93
+ },
94
+ {
95
+ "epoch": 4.40771349862259,
96
+ "eval_loss": 0.4685700237751007,
97
+ "eval_runtime": 41.6691,
98
+ "eval_samples_per_second": 23.255,
99
+ "eval_steps_per_second": 2.928,
100
+ "eval_wer": 0.5984114895005984,
101
+ "step": 800
102
+ },
103
+ {
104
+ "epoch": 4.958677685950414,
105
+ "grad_norm": 0.8255869746208191,
106
+ "learning_rate": 0.00025514071294559096,
107
+ "loss": 0.3439,
108
+ "step": 900
109
+ },
110
+ {
111
+ "epoch": 5.509641873278237,
112
+ "grad_norm": 0.8485454320907593,
113
+ "learning_rate": 0.00024951219512195117,
114
+ "loss": 0.3004,
115
+ "step": 1000
116
+ },
117
+ {
118
+ "epoch": 5.509641873278237,
119
+ "eval_loss": 0.4578340947628021,
120
+ "eval_runtime": 41.6261,
121
+ "eval_samples_per_second": 23.279,
122
+ "eval_steps_per_second": 2.931,
123
+ "eval_wer": 0.6202807093896203,
124
+ "step": 1000
125
+ },
126
+ {
127
+ "epoch": 6.0606060606060606,
128
+ "grad_norm": 0.5917987823486328,
129
+ "learning_rate": 0.00024388367729831143,
130
+ "loss": 0.2984,
131
+ "step": 1100
132
+ },
133
+ {
134
+ "epoch": 6.6115702479338845,
135
+ "grad_norm": 0.8203296065330505,
136
+ "learning_rate": 0.00023825515947467166,
137
+ "loss": 0.2498,
138
+ "step": 1200
139
+ },
140
+ {
141
+ "epoch": 6.6115702479338845,
142
+ "eval_loss": 0.4245435893535614,
143
+ "eval_runtime": 41.2956,
144
+ "eval_samples_per_second": 23.465,
145
+ "eval_steps_per_second": 2.954,
146
+ "eval_wer": 0.5246436731585247,
147
+ "step": 1200
148
+ },
149
+ {
150
+ "epoch": 7.162534435261708,
151
+ "grad_norm": 0.9434472322463989,
152
+ "learning_rate": 0.0002326266416510319,
153
+ "loss": 0.2456,
154
+ "step": 1300
155
+ },
156
+ {
157
+ "epoch": 7.7134986225895315,
158
+ "grad_norm": 1.4402457475662231,
159
+ "learning_rate": 0.0002269981238273921,
160
+ "loss": 0.23,
161
+ "step": 1400
162
+ },
163
+ {
164
+ "epoch": 7.7134986225895315,
165
+ "eval_loss": 0.4167534410953522,
166
+ "eval_runtime": 41.5017,
167
+ "eval_samples_per_second": 23.348,
168
+ "eval_steps_per_second": 2.94,
169
+ "eval_wer": 0.5478185181155478,
170
+ "step": 1400
171
+ },
172
+ {
173
+ "epoch": 8.264462809917354,
174
+ "grad_norm": 0.6520706415176392,
175
+ "learning_rate": 0.00022136960600375232,
176
+ "loss": 0.2146,
177
+ "step": 1500
178
+ },
179
+ {
180
+ "epoch": 8.81542699724518,
181
+ "grad_norm": 0.4538098871707916,
182
+ "learning_rate": 0.00021574108818011255,
183
+ "loss": 0.1959,
184
+ "step": 1600
185
+ },
186
+ {
187
+ "epoch": 8.81542699724518,
188
+ "eval_loss": 0.4212118089199066,
189
+ "eval_runtime": 41.8307,
190
+ "eval_samples_per_second": 23.165,
191
+ "eval_steps_per_second": 2.917,
192
+ "eval_wer": 0.523011641823523,
193
+ "step": 1600
194
+ },
195
+ {
196
+ "epoch": 9.366391184573002,
197
+ "grad_norm": 0.5852454900741577,
198
+ "learning_rate": 0.00021011257035647278,
199
+ "loss": 0.1855,
200
+ "step": 1700
201
+ },
202
+ {
203
+ "epoch": 9.917355371900827,
204
+ "grad_norm": 0.6804907321929932,
205
+ "learning_rate": 0.00020448405253283299,
206
+ "loss": 0.1682,
207
+ "step": 1800
208
+ },
209
+ {
210
+ "epoch": 9.917355371900827,
211
+ "eval_loss": 0.43570414185523987,
212
+ "eval_runtime": 41.3416,
213
+ "eval_samples_per_second": 23.439,
214
+ "eval_steps_per_second": 2.951,
215
+ "eval_wer": 0.5053857034055054,
216
+ "step": 1800
217
+ },
218
+ {
219
+ "epoch": 10.46831955922865,
220
+ "grad_norm": 0.7718915939331055,
221
+ "learning_rate": 0.00019885553470919322,
222
+ "loss": 0.1367,
223
+ "step": 1900
224
+ },
225
+ {
226
+ "epoch": 11.019283746556473,
227
+ "grad_norm": 0.7347743511199951,
228
+ "learning_rate": 0.00019322701688555345,
229
+ "loss": 0.1459,
230
+ "step": 2000
231
+ },
232
+ {
233
+ "epoch": 11.019283746556473,
234
+ "eval_loss": 0.4253489673137665,
235
+ "eval_runtime": 42.1972,
236
+ "eval_samples_per_second": 22.964,
237
+ "eval_steps_per_second": 2.891,
238
+ "eval_wer": 0.5296485692525297,
239
+ "step": 2000
240
+ },
241
+ {
242
+ "epoch": 11.019283746556473,
243
+ "step": 2000,
244
+ "total_flos": 5.988611611546594e+18,
245
+ "train_loss": 0.38784495496749877,
246
+ "train_runtime": 3222.307,
247
+ "train_samples_per_second": 26.999,
248
+ "train_steps_per_second": 1.685
249
+ }
250
+ ],
251
+ "logging_steps": 100,
252
+ "max_steps": 5430,
253
+ "num_input_tokens_seen": 0,
254
+ "num_train_epochs": 30,
255
+ "save_steps": 200,
256
+ "stateful_callbacks": {
257
+ "EarlyStoppingCallback": {
258
+ "args": {
259
+ "early_stopping_patience": 3,
260
+ "early_stopping_threshold": 0.0
261
+ },
262
+ "attributes": {
263
+ "early_stopping_patience_counter": 3
264
+ }
265
+ },
266
+ "TrainerControl": {
267
+ "args": {
268
+ "should_epoch_stop": false,
269
+ "should_evaluate": false,
270
+ "should_log": false,
271
+ "should_save": true,
272
+ "should_training_stop": true
273
+ },
274
+ "attributes": {}
275
+ }
276
+ },
277
+ "total_flos": 5.988611611546594e+18,
278
+ "train_batch_size": 8,
279
+ "trial_name": null,
280
+ "trial_params": null
281
+ }