pyf98 commited on
Commit
75f5b72
·
1 Parent(s): f125431

Update model

Browse files
Files changed (20) hide show
  1. README.md +305 -0
  2. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/RESULTS.md +31 -0
  3. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/config.yaml +205 -0
  4. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/acc.png +0 -0
  5. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/backward_time.png +0 -0
  6. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer.png +0 -0
  7. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer_ctc.png +0 -0
  8. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/forward_time.png +0 -0
  9. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/gpu_max_cached_mem_GB.png +0 -0
  10. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/iter_time.png +0 -0
  11. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss.png +0 -0
  12. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_att.png +0 -0
  13. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_ctc.png +0 -0
  14. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim0_lr0.png +0 -0
  15. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim_step_time.png +0 -0
  16. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/train_time.png +0 -0
  17. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/wer.png +0 -0
  18. exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/valid.acc.ave_10best.pth +3 -0
  19. exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz +0 -0
  20. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: noinfo
7
+ datasets:
8
+ - speechcommands
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `pyf98/speechcommands_12commands_conformer`
15
+
16
+ This model was trained by Yifan Peng using speechcommands recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout bf523b70cae8300da004b41ec6a0d1b57c7ae8bb
23
+ pip install -e .
24
+ cd egs2/speechcommands/asr1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model pyf98/speechcommands_12commands_conformer
26
+ ```
27
+
28
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Fri Dec 24 21:53:37 EST 2021`
32
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
33
+ - espnet version: `espnet 0.10.5a1`
34
+ - pytorch version: `pytorch 1.9.0`
35
+ - Git hash: `3fd3dae71427d2ba5ecbc3fe0f2ae05db79acc29`
36
+ - Commit date: `Fri Dec 24 21:32:26 2021 -0500`
37
+
38
+ ## asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
39
+ ### WER
40
+
41
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
42
+ |---|---|---|---|---|---|---|---|---|
43
+ |infer/dev|4605|4605|97.7|2.3|0.0|0.0|2.3|2.3|
44
+ |infer/test|4890|4890|97.9|2.1|0.0|0.0|2.1|2.1|
45
+ |infer/test_speechbrain|4886|4886|98.4|1.6|0.0|0.0|1.6|1.6|
46
+
47
+ ### CER
48
+
49
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
50
+ |---|---|---|---|---|---|---|---|---|
51
+ |infer/dev|4605|19541|98.6|0.9|0.5|1.0|2.5|2.3|
52
+ |infer/test|4890|19959|97.8|1.1|1.1|0.7|3.0|2.1|
53
+ |infer/test_speechbrain|4886|19923|98.7|0.7|0.6|0.6|1.9|1.6|
54
+
55
+ ### TER
56
+
57
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
58
+ |---|---|---|---|---|---|---|---|---|
59
+
60
+ ## ASR config
61
+
62
+ <details><summary>expand</summary>
63
+
64
+ ```
65
+ config: conf/train_asr_conformer_noBatchNorm.yaml
66
+ print_config: false
67
+ log_level: INFO
68
+ dry_run: false
69
+ iterator_type: sequence
70
+ output_dir: exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
71
+ ngpu: 1
72
+ seed: 0
73
+ num_workers: 1
74
+ num_att_plot: 3
75
+ dist_backend: nccl
76
+ dist_init_method: env://
77
+ dist_world_size: null
78
+ dist_rank: null
79
+ local_rank: 0
80
+ dist_master_addr: null
81
+ dist_master_port: null
82
+ dist_launcher: null
83
+ multiprocessing_distributed: false
84
+ unused_parameters: false
85
+ sharded_ddp: false
86
+ cudnn_enabled: true
87
+ cudnn_benchmark: false
88
+ cudnn_deterministic: true
89
+ collect_stats: false
90
+ write_collected_feats: false
91
+ max_epoch: 150
92
+ patience: null
93
+ val_scheduler_criterion:
94
+ - valid
95
+ - loss
96
+ early_stopping_criterion:
97
+ - valid
98
+ - loss
99
+ - min
100
+ best_model_criterion:
101
+ - - valid
102
+ - loss
103
+ - min
104
+ - - valid
105
+ - acc
106
+ - max
107
+ keep_nbest_models: 10
108
+ grad_clip: 5.0
109
+ grad_clip_type: 2.0
110
+ grad_noise: false
111
+ accum_grad: 3
112
+ no_forward_run: false
113
+ resume: true
114
+ train_dtype: float32
115
+ use_amp: false
116
+ log_interval: null
117
+ use_tensorboard: true
118
+ use_wandb: false
119
+ wandb_project: null
120
+ wandb_id: null
121
+ wandb_entity: null
122
+ wandb_name: null
123
+ wandb_model_log_interval: -1
124
+ detect_anomaly: false
125
+ pretrain_path: null
126
+ init_param: []
127
+ ignore_init_mismatch: false
128
+ freeze_param: []
129
+ num_iters_per_epoch: null
130
+ batch_size: 20
131
+ valid_batch_size: null
132
+ batch_bins: 4000000
133
+ valid_batch_bins: null
134
+ train_shape_file:
135
+ - exp/asr_stats_fbank_pitch_word_sp/train/speech_shape
136
+ - exp/asr_stats_fbank_pitch_word_sp/train/text_shape.word
137
+ valid_shape_file:
138
+ - exp/asr_stats_fbank_pitch_word_sp/valid/speech_shape
139
+ - exp/asr_stats_fbank_pitch_word_sp/valid/text_shape.word
140
+ batch_type: numel
141
+ valid_batch_type: null
142
+ fold_length:
143
+ - 800
144
+ - 150
145
+ sort_in_batch: descending
146
+ sort_batch: descending
147
+ multiple_iterator: false
148
+ chunk_length: 500
149
+ chunk_shift_ratio: 0.5
150
+ num_cache_chunks: 1024
151
+ train_data_path_and_name_and_type:
152
+ - - dump/fbank_pitch/train_sp/feats.scp
153
+ - speech
154
+ - kaldi_ark
155
+ - - dump/fbank_pitch/train_sp/text
156
+ - text
157
+ - text
158
+ valid_data_path_and_name_and_type:
159
+ - - dump/fbank_pitch/dev/feats.scp
160
+ - speech
161
+ - kaldi_ark
162
+ - - dump/fbank_pitch/dev/text
163
+ - text
164
+ - text
165
+ allow_variable_data_keys: false
166
+ max_cache_size: 0.0
167
+ max_cache_fd: 32
168
+ valid_max_cache_size: null
169
+ optim: adam
170
+ optim_conf:
171
+ lr: 0.0002
172
+ scheduler: warmuplr
173
+ scheduler_conf:
174
+ warmup_steps: 5000
175
+ token_list:
176
+ - <blank>
177
+ - <unk>
178
+ - 'yes'
179
+ - down
180
+ - 'no'
181
+ - stop
182
+ - go
183
+ - 'on'
184
+ - left
185
+ - right
186
+ - _unknown_
187
+ - _silence_
188
+ - 'off'
189
+ - up
190
+ - <sos/eos>
191
+ init: null
192
+ input_size: 83
193
+ ctc_conf:
194
+ dropout_rate: 0.0
195
+ ctc_type: builtin
196
+ reduce: true
197
+ ignore_nan_grad: true
198
+ model_conf:
199
+ ctc_weight: 0.0
200
+ lsm_weight: 0.1
201
+ length_normalized_loss: false
202
+ use_preprocessor: true
203
+ token_type: word
204
+ bpemodel: null
205
+ non_linguistic_symbols: null
206
+ cleaner: null
207
+ g2p: null
208
+ speech_volume_normalize: null
209
+ rir_scp: null
210
+ rir_apply_prob: 1.0
211
+ noise_scp: null
212
+ noise_apply_prob: 1.0
213
+ noise_db_range: '13_15'
214
+ frontend: null
215
+ frontend_conf: {}
216
+ specaug: specaug
217
+ specaug_conf:
218
+ apply_time_warp: true
219
+ time_warp_window: 5
220
+ time_warp_mode: bicubic
221
+ apply_freq_mask: true
222
+ freq_mask_width_range:
223
+ - 0
224
+ - 30
225
+ num_freq_mask: 2
226
+ apply_time_mask: true
227
+ time_mask_width_range:
228
+ - 0
229
+ - 40
230
+ num_time_mask: 2
231
+ normalize: global_mvn
232
+ normalize_conf:
233
+ stats_file: exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz
234
+ preencoder: null
235
+ preencoder_conf: {}
236
+ encoder: conformer
237
+ encoder_conf:
238
+ output_size: 256
239
+ attention_heads: 4
240
+ linear_units: 2048
241
+ num_blocks: 12
242
+ dropout_rate: 0.1
243
+ positional_dropout_rate: 0.1
244
+ attention_dropout_rate: 0.1
245
+ input_layer: conv2d
246
+ normalize_before: true
247
+ macaron_style: true
248
+ rel_pos_type: legacy
249
+ pos_enc_layer_type: rel_pos
250
+ selfattention_layer_type: rel_selfattn
251
+ activation_type: swish
252
+ use_cnn_module: true
253
+ cnn_module_kernel: 15
254
+ postencoder: null
255
+ postencoder_conf: {}
256
+ decoder: transformer
257
+ decoder_conf:
258
+ attention_heads: 4
259
+ linear_units: 2048
260
+ num_blocks: 6
261
+ dropout_rate: 0.1
262
+ positional_dropout_rate: 0.1
263
+ self_attention_dropout_rate: 0.1
264
+ src_attention_dropout_rate: 0.1
265
+ required:
266
+ - output_dir
267
+ - token_list
268
+ version: 0.10.3a3
269
+ distributed: false
270
+ ```
271
+
272
+ </details>
273
+
274
+
275
+
276
+ ### Citing ESPnet
277
+
278
+ ```BibTex
279
+ @inproceedings{watanabe2018espnet,
280
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
281
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
282
+ year={2018},
283
+ booktitle={Proceedings of Interspeech},
284
+ pages={2207--2211},
285
+ doi={10.21437/Interspeech.2018-1456},
286
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
287
+ }
288
+
289
+
290
+
291
+
292
+ ```
293
+
294
+ or arXiv:
295
+
296
+ ```bibtex
297
+ @misc{watanabe2018espnet,
298
+ title={ESPnet: End-to-End Speech Processing Toolkit},
299
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
300
+ year={2018},
301
+ eprint={1804.00015},
302
+ archivePrefix={arXiv},
303
+ primaryClass={cs.CL}
304
+ }
305
+ ```
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/RESULTS.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Fri Dec 24 21:53:37 EST 2021`
5
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
6
+ - espnet version: `espnet 0.10.5a1`
7
+ - pytorch version: `pytorch 1.9.0`
8
+ - Git hash: `3fd3dae71427d2ba5ecbc3fe0f2ae05db79acc29`
9
+ - Commit date: `Fri Dec 24 21:32:26 2021 -0500`
10
+
11
+ ## asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |infer/dev|4605|4605|97.7|2.3|0.0|0.0|2.3|2.3|
17
+ |infer/test|4890|4890|97.9|2.1|0.0|0.0|2.1|2.1|
18
+ |infer/test_speechbrain|4886|4886|98.4|1.6|0.0|0.0|1.6|1.6|
19
+
20
+ ### CER
21
+
22
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
23
+ |---|---|---|---|---|---|---|---|---|
24
+ |infer/dev|4605|19541|98.6|0.9|0.5|1.0|2.5|2.3|
25
+ |infer/test|4890|19959|97.8|1.1|1.1|0.7|3.0|2.1|
26
+ |infer/test_speechbrain|4886|19923|98.7|0.7|0.6|0.6|1.9|1.6|
27
+
28
+ ### TER
29
+
30
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
31
+ |---|---|---|---|---|---|---|---|---|
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/config.yaml ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_conformer_noBatchNorm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 150
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - valid
41
+ - acc
42
+ - max
43
+ keep_nbest_models: 10
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 3
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_tensorboard: true
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 4000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_fbank_pitch_word_sp/train/speech_shape
72
+ - exp/asr_stats_fbank_pitch_word_sp/train/text_shape.word
73
+ valid_shape_file:
74
+ - exp/asr_stats_fbank_pitch_word_sp/valid/speech_shape
75
+ - exp/asr_stats_fbank_pitch_word_sp/valid/text_shape.word
76
+ batch_type: numel
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 800
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/fbank_pitch/train_sp/feats.scp
89
+ - speech
90
+ - kaldi_ark
91
+ - - dump/fbank_pitch/train_sp/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/fbank_pitch/dev/feats.scp
96
+ - speech
97
+ - kaldi_ark
98
+ - - dump/fbank_pitch/dev/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.0002
108
+ scheduler: warmuplr
109
+ scheduler_conf:
110
+ warmup_steps: 5000
111
+ token_list:
112
+ - <blank>
113
+ - <unk>
114
+ - 'yes'
115
+ - down
116
+ - 'no'
117
+ - stop
118
+ - go
119
+ - 'on'
120
+ - left
121
+ - right
122
+ - _unknown_
123
+ - _silence_
124
+ - 'off'
125
+ - up
126
+ - <sos/eos>
127
+ init: null
128
+ input_size: 83
129
+ ctc_conf:
130
+ dropout_rate: 0.0
131
+ ctc_type: builtin
132
+ reduce: true
133
+ ignore_nan_grad: true
134
+ model_conf:
135
+ ctc_weight: 0.0
136
+ lsm_weight: 0.1
137
+ length_normalized_loss: false
138
+ use_preprocessor: true
139
+ token_type: word
140
+ bpemodel: null
141
+ non_linguistic_symbols: null
142
+ cleaner: null
143
+ g2p: null
144
+ speech_volume_normalize: null
145
+ rir_scp: null
146
+ rir_apply_prob: 1.0
147
+ noise_scp: null
148
+ noise_apply_prob: 1.0
149
+ noise_db_range: '13_15'
150
+ frontend: null
151
+ frontend_conf: {}
152
+ specaug: specaug
153
+ specaug_conf:
154
+ apply_time_warp: true
155
+ time_warp_window: 5
156
+ time_warp_mode: bicubic
157
+ apply_freq_mask: true
158
+ freq_mask_width_range:
159
+ - 0
160
+ - 30
161
+ num_freq_mask: 2
162
+ apply_time_mask: true
163
+ time_mask_width_range:
164
+ - 0
165
+ - 40
166
+ num_time_mask: 2
167
+ normalize: global_mvn
168
+ normalize_conf:
169
+ stats_file: exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz
170
+ preencoder: null
171
+ preencoder_conf: {}
172
+ encoder: conformer
173
+ encoder_conf:
174
+ output_size: 256
175
+ attention_heads: 4
176
+ linear_units: 2048
177
+ num_blocks: 12
178
+ dropout_rate: 0.1
179
+ positional_dropout_rate: 0.1
180
+ attention_dropout_rate: 0.1
181
+ input_layer: conv2d
182
+ normalize_before: true
183
+ macaron_style: true
184
+ rel_pos_type: legacy
185
+ pos_enc_layer_type: rel_pos
186
+ selfattention_layer_type: rel_selfattn
187
+ activation_type: swish
188
+ use_cnn_module: true
189
+ cnn_module_kernel: 15
190
+ postencoder: null
191
+ postencoder_conf: {}
192
+ decoder: transformer
193
+ decoder_conf:
194
+ attention_heads: 4
195
+ linear_units: 2048
196
+ num_blocks: 6
197
+ dropout_rate: 0.1
198
+ positional_dropout_rate: 0.1
199
+ self_attention_dropout_rate: 0.1
200
+ src_attention_dropout_rate: 0.1
201
+ required:
202
+ - output_dir
203
+ - token_list
204
+ version: 0.10.3a3
205
+ distributed: false
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/acc.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/backward_time.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer_ctc.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/forward_time.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/iter_time.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_att.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_ctc.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim0_lr0.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim_step_time.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/train_time.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/wer.png ADDED
exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eaa1636971d2d6f6deccb195e63c9718140dca2e8b1f61507dafe6a0de6da22
3
+ size 172237411
exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz ADDED
Binary file (1.43 kB). View file
 
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.5a1
2
+ files:
3
+ asr_model_file: exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/valid.acc.ave_10best.pth
4
+ python: "3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]"
5
+ timestamp: 1640400817.795981
6
+ torch: 1.9.0
7
+ yaml_files:
8
+ asr_train_config: exp/asr_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/config.yaml