Spaces:
Sleeping
Sleeping
update
Browse files
toolbox/k2_sherpa/nn_models.py
CHANGED
@@ -393,6 +393,34 @@ model_map = {
|
|
393 |
"loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
|
394 |
},
|
395 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
"Tibetan": [
|
397 |
{
|
398 |
"repo_id": "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
@@ -555,6 +583,44 @@ def load_sherpa_onnx_offline_recognizer_from_whisper(encoder_model_file: str,
|
|
555 |
return recognizer
|
556 |
|
557 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
def load_sherpa_onnx_online_recognizer_from_transducer(encoder_model_file: str,
|
559 |
decoder_model_file: str,
|
560 |
joiner_model_file: str,
|
@@ -653,6 +719,12 @@ def load_recognizer(local_model_dir: Path,
|
|
653 |
recognizer = load_sherpa_onnx_offline_recognizer_from_whisper(
|
654 |
**kwargs_
|
655 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
656 |
elif loader == "load_sherpa_onnx_online_recognizer_from_transducer":
|
657 |
recognizer = load_sherpa_onnx_online_recognizer_from_transducer(
|
658 |
**kwargs_
|
|
|
393 |
"loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
|
394 |
},
|
395 |
],
|
396 |
+
"Japanese": [
|
397 |
+
{
|
398 |
+
"repo_id": "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent",
|
399 |
+
"encoder_model_file": "encoder_jit_trace.pt",
|
400 |
+
"encoder_model_file_sub_folder": "exp_fluent",
|
401 |
+
"decoder_model_file": "decoder_jit_trace.pt",
|
402 |
+
"decoder_model_file_sub_folder": "exp_fluent",
|
403 |
+
"joiner_model_file": "joiner_jit_trace.pt",
|
404 |
+
"joiner_model_file_sub_folder": "exp_fluent",
|
405 |
+
"tokens_file": "tokens.txt",
|
406 |
+
"tokens_file_sub_folder": "data/lang_char",
|
407 |
+
"normalize_samples": True,
|
408 |
+
"loader": "load_sherpa_online_recognizer",
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"repo_id": "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent",
|
412 |
+
"encoder_model_file": "encoder_jit_trace.pt",
|
413 |
+
"encoder_model_file_sub_folder": "exp_disfluent",
|
414 |
+
"decoder_model_file": "decoder_jit_trace.pt",
|
415 |
+
"decoder_model_file_sub_folder": "exp_disfluent",
|
416 |
+
"joiner_model_file": "joiner_jit_trace.pt",
|
417 |
+
"joiner_model_file_sub_folder": "exp_disfluent",
|
418 |
+
"tokens_file": "tokens.txt",
|
419 |
+
"tokens_file_sub_folder": "data/lang_char",
|
420 |
+
"normalize_samples": True,
|
421 |
+
"loader": "load_sherpa_online_recognizer",
|
422 |
+
},
|
423 |
+
],
|
424 |
"Tibetan": [
|
425 |
{
|
426 |
"repo_id": "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
|
|
583 |
return recognizer
|
584 |
|
585 |
|
586 |
+
def load_sherpa_online_recognizer(nn_model_file: str,
|
587 |
+
encoder_model_file: str,
|
588 |
+
decoder_model_file: str,
|
589 |
+
joiner_model_file: str,
|
590 |
+
tokens_file: str,
|
591 |
+
sample_rate: int = 16000,
|
592 |
+
num_active_paths: int = 2,
|
593 |
+
decoding_method: str = "greedy_search",
|
594 |
+
num_mel_bins: int = 80,
|
595 |
+
frame_dither: int = 0,
|
596 |
+
normalize_samples: bool = False,
|
597 |
+
):
|
598 |
+
feat_config = sherpa.FeatureConfig(normalize_samples=normalize_samples)
|
599 |
+
feat_config.fbank_opts.frame_opts.samp_freq = sample_rate
|
600 |
+
feat_config.fbank_opts.mel_opts.num_bins = num_mel_bins
|
601 |
+
feat_config.fbank_opts.frame_opts.dither = frame_dither
|
602 |
+
|
603 |
+
if not os.path.exists(nn_model_file):
|
604 |
+
raise AssertionError("nn_model_file not found. nn_model_file: {}".format(nn_model_file))
|
605 |
+
|
606 |
+
config = sherpa.OfflineRecognizerConfig(
|
607 |
+
nn_model=nn_model_file,
|
608 |
+
encoder_model=encoder_model_file,
|
609 |
+
decoder_model=decoder_model_file,
|
610 |
+
joiner_model=joiner_model_file,
|
611 |
+
tokens=tokens_file,
|
612 |
+
use_gpu=False,
|
613 |
+
feat_config=feat_config,
|
614 |
+
decoding_method=decoding_method,
|
615 |
+
num_active_paths=num_active_paths,
|
616 |
+
chunk_size=32,
|
617 |
+
)
|
618 |
+
|
619 |
+
recognizer = sherpa.OnlineRecognizer(config)
|
620 |
+
|
621 |
+
return recognizer
|
622 |
+
|
623 |
+
|
624 |
def load_sherpa_onnx_online_recognizer_from_transducer(encoder_model_file: str,
|
625 |
decoder_model_file: str,
|
626 |
joiner_model_file: str,
|
|
|
719 |
recognizer = load_sherpa_onnx_offline_recognizer_from_whisper(
|
720 |
**kwargs_
|
721 |
)
|
722 |
+
elif loader == "load_sherpa_online_recognizer":
|
723 |
+
recognizer = load_sherpa_online_recognizer(
|
724 |
+
decoding_method=decoding_method,
|
725 |
+
num_active_paths=num_active_paths,
|
726 |
+
**kwargs_
|
727 |
+
)
|
728 |
elif loader == "load_sherpa_onnx_online_recognizer_from_transducer":
|
729 |
recognizer = load_sherpa_onnx_online_recognizer_from_transducer(
|
730 |
**kwargs_
|