speechbrain
/

asr-conformer-largescaleasr

Automatic Speech Recognition

hf-asr-leaderboard

Model card Files Files and versions Community

Titouan commited on Feb 7

Commit

234d8a4

·

1 Parent(s): 82b04aa

update doc and yaml

Files changed (2) hide show

README.md +6 -5
hyperparams.yaml +2 -2

README.md CHANGED Viewed

@@ -62,16 +62,17 @@ Please notice that we encourage you to read our tutorials and learn more about
 ```python
 from speechbrain.inference.ASR import EncoderDecoderASR
 asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-conformer-largescaleasr", savedir="pretrained_models/asr-conformer-largescaleasr")
-# For a full decoding with a large beam size (can be slow):
-asr_model.transcribe_file("speechbrain/asr-conformer-largescaleasr/example.wav")
-# For smaller beam size:
-asr_model.transcribe_file("speechbrain/asr-conformer-largescaleasr/example.wav", overrides={"test_beam_size":"10"})
 # For even faster decoding
-asr_model.transcribe_file("speechbrain/asr-conformer-largescaleasr/example.wav", overrides={"test_beam_size":"10", "ctc_weight_decode":0.0})
 ```
 ### Inference on GPU

 ```python
 from speechbrain.inference.ASR import EncoderDecoderASR
+# For a full decoding with a large beam size (can be slow):
 asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-conformer-largescaleasr", savedir="pretrained_models/asr-conformer-largescaleasr")
+# For greedy decoding:
+asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-conformer-largescaleasr", savedir="pretrained_models/asr-conformer-largescaleasr", overrides={"test_beam_size":"1"})
+# For Attn. only decoding (faster):
+asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-conformer-largescaleasr", savedir="pretrained_models/asr-conformer-largescaleasr", overrides={"scorer":None, "ctc_weight_decode":0.0})
 # For even faster decoding
+asr_model.transcribe_file("speechbrain/asr-conformer-largescaleasr/example.wav")
 ```
 ### Inference on GPU

hyperparams.yaml CHANGED Viewed

@@ -34,7 +34,7 @@ eos_index: 2
 # Decoding parameters
 min_decode_ratio: 0.0
 max_decode_ratio: 1.0
-test_beam_size: 80
 ctc_weight_decode: 0.3
 scorer_beam_scale: 0.3
 transformer_beam_search: True
@@ -112,7 +112,7 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearcher
     eos_index: !ref <eos_index>
     min_decode_ratio: !ref <min_decode_ratio>
     max_decode_ratio: !ref <max_decode_ratio>
-    beam_size: 3
     temperature: 1.15
     using_eos_threshold: True
     scorer: !ref <scorer>

 # Decoding parameters
 min_decode_ratio: 0.0
 max_decode_ratio: 1.0
+test_beam_size: 10
 ctc_weight_decode: 0.3
 scorer_beam_scale: 0.3
 transformer_beam_search: True
     eos_index: !ref <eos_index>
     min_decode_ratio: !ref <min_decode_ratio>
     max_decode_ratio: !ref <max_decode_ratio>
+    beam_size: !ref <test_beam_size>
     temperature: 1.15
     using_eos_threshold: True
     scorer: !ref <scorer>