HoneyTian commited on
Commit
4a879cc
·
1 Parent(s): 961a8f0
toolbox/k2_sherpa/examples.py CHANGED
@@ -19,4 +19,36 @@ examples = [
19
  "Yes",
20
  "./data/test_wavs/librispeech/1089-134686-0001.wav",
21
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  ]
 
19
  "Yes",
20
  "./data/test_wavs/librispeech/1089-134686-0001.wav",
21
  ],
22
+ [
23
+ "Chinese+English",
24
+ "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
25
+ "greedy_search",
26
+ 4,
27
+ "Yes",
28
+ "./data/test_wavs/tal_csasr/0.wav",
29
+ ],
30
+ [
31
+ "Chinese+English+Cantonese",
32
+ "csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
33
+ "greedy_search",
34
+ 4,
35
+ "Yes",
36
+ "./data/test_wavs/cantonese/2.wav",
37
+ ],
38
+ [
39
+ "Cantonese",
40
+ "zrjin/icefall-asr-mdcc-zipformer-2024-03-11",
41
+ "greedy_search",
42
+ 4,
43
+ "Yes",
44
+ "./data/test_wavs/cantonese/1.wav",
45
+ ],
46
+ [
47
+ "Tibetan",
48
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
49
+ "greedy_search",
50
+ 4,
51
+ "No",
52
+ "./data/test_wavs/tibetan/a_0_cacm-A70_31117.wav",
53
+ ],
54
  ]
toolbox/k2_sherpa/nn_models.py CHANGED
@@ -65,7 +65,7 @@ model_map = {
65
  "joiner_model_file_sub_folder": ".",
66
  "tokens_file": "tokens.txt",
67
  "tokens_file_sub_folder": ".",
68
- "loader": "load_sherpa_offline_recognizer_from_transducer",
69
  },
70
  {
71
  "repo_id": "zrjin/icefall-asr-aishell-zipformer-large-2023-10-24",
@@ -77,7 +77,7 @@ model_map = {
77
  "joiner_model_file_sub_folder": "exp",
78
  "tokens_file": "tokens.txt",
79
  "tokens_file_sub_folder": "data/lang_char",
80
- "loader": "load_sherpa_offline_recognizer_from_transducer",
81
  },
82
  {
83
  "repo_id": "zrjin/icefall-asr-aishell-zipformer-small-2023-10-24",
@@ -89,7 +89,7 @@ model_map = {
89
  "joiner_model_file_sub_folder": "exp",
90
  "tokens_file": "tokens.txt",
91
  "tokens_file_sub_folder": "data/lang_char",
92
- "loader": "load_sherpa_offline_recognizer_from_transducer",
93
  },
94
  {
95
  "repo_id": "zrjin/icefall-asr-aishell-zipformer-2023-10-24",
@@ -101,7 +101,7 @@ model_map = {
101
  "joiner_model_file_sub_folder": "exp",
102
  "tokens_file": "tokens.txt",
103
  "tokens_file_sub_folder": "data/lang_char",
104
- "loader": "load_sherpa_offline_recognizer_from_transducer",
105
  },
106
  {
107
  "repo_id": "desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
@@ -158,7 +158,7 @@ model_map = {
158
  "decoder_model_file_sub_folder": ".",
159
  "tokens_file": "tiny.en-tokens.txt",
160
  "tokens_file_sub_folder": ".",
161
- "loader": "load_sherpa_offline_recognizer_from_whisper",
162
  },
163
  {
164
  "repo_id": "csukuangfj/sherpa-onnx-whisper-base.en",
@@ -168,7 +168,7 @@ model_map = {
168
  "decoder_model_file_sub_folder": ".",
169
  "tokens_file": "base.en-tokens.txt",
170
  "tokens_file_sub_folder": ".",
171
- "loader": "load_sherpa_offline_recognizer_from_whisper",
172
  },
173
  {
174
  "repo_id": "csukuangfj/sherpa-onnx-whisper-small.en",
@@ -178,7 +178,7 @@ model_map = {
178
  "decoder_model_file_sub_folder": ".",
179
  "tokens_file": "small.en-tokens.txt",
180
  "tokens_file_sub_folder": ".",
181
- "loader": "load_sherpa_offline_recognizer_from_whisper",
182
  },
183
  {
184
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-en-2024-03-09",
@@ -198,7 +198,7 @@ model_map = {
198
  "joiner_model_file_sub_folder": "exp",
199
  "tokens_file": "tokens.txt",
200
  "tokens_file_sub_folder": "data/lang_bpe_500",
201
- "loader": "load_sherpa_offline_recognizer_from_transducer",
202
  },
203
  {
204
  "repo_id": "wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2",
@@ -332,7 +332,7 @@ model_map = {
332
  "joiner_model_file_sub_folder": ".",
333
  "tokens_file": "tokens.txt",
334
  "tokens_file_sub_folder": ".",
335
- "loader": "load_sherpa_online_recognizer_from_transducer",
336
  },
337
  {
338
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28",
@@ -359,16 +359,58 @@ model_map = {
359
  "loader": "load_sherpa_offline_recognizer",
360
  },
361
  ],
362
- "Chinese+Cantonese+English": [
363
  {
364
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
365
  "nn_model_file": "model.int8.onnx",
366
  "nn_model_file_sub_folder": ".",
367
  "tokens_file": "tokens.txt",
368
  "tokens_file_sub_folder": ".",
369
- "loader": "load_sherpa_offline_recognizer_from_paraformer",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  },
371
- ]
 
 
 
 
 
 
 
 
372
  }
373
 
374
 
@@ -456,13 +498,13 @@ def load_sherpa_offline_recognizer(nn_model_file: str,
456
  return recognizer
457
 
458
 
459
- def load_sherpa_offline_recognizer_from_paraformer(nn_model_file: str,
460
- tokens_file: str,
461
- sample_rate: int = 16000,
462
- decoding_method: str = "greedy_search",
463
- feature_dim: int = 80,
464
- num_threads: int = 2,
465
- ):
466
  recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
467
  paraformer=nn_model_file,
468
  tokens=tokens_file,
@@ -475,16 +517,16 @@ def load_sherpa_offline_recognizer_from_paraformer(nn_model_file: str,
475
  return recognizer
476
 
477
 
478
- def load_sherpa_offline_recognizer_from_transducer(encoder_model_file: str,
479
- decoder_model_file: str,
480
- joiner_model_file: str,
481
- tokens_file: str,
482
- sample_rate: int = 16000,
483
- decoding_method: str = "greedy_search",
484
- feature_dim: int = 80,
485
- num_threads: int = 2,
486
- num_active_paths: int = 2,
487
- ):
488
  recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
489
  encoder=encoder_model_file,
490
  decoder=decoder_model_file,
@@ -499,11 +541,11 @@ def load_sherpa_offline_recognizer_from_transducer(encoder_model_file: str,
499
  return recognizer
500
 
501
 
502
- def load_sherpa_offline_recognizer_from_whisper(encoder_model_file: str,
503
- decoder_model_file: str,
504
- tokens_file: str,
505
- num_threads: int = 2,
506
- ):
507
  recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
508
  encoder=encoder_model_file,
509
  decoder=decoder_model_file,
@@ -513,17 +555,17 @@ def load_sherpa_offline_recognizer_from_whisper(encoder_model_file: str,
513
  return recognizer
514
 
515
 
516
- def load_sherpa_online_recognizer_from_transducer(encoder_model_file: str,
517
- decoder_model_file: str,
518
- joiner_model_file: str,
519
- tokens_file: str,
520
- sample_rate: int = 16000,
521
- decoding_method: str = "greedy_search",
522
- feature_dim: int = 80,
523
- num_threads: int = 2,
524
- num_active_paths: int = 2,
525
- ):
526
- recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
527
  encoder=encoder_model_file,
528
  decoder=decoder_model_file,
529
  joiner=joiner_model_file,
@@ -537,6 +579,26 @@ def load_sherpa_online_recognizer_from_transducer(encoder_model_file: str,
537
  return recognizer
538
 
539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  def load_recognizer(local_model_dir: Path,
541
  decoding_method: str = "greedy_search",
542
  num_active_paths: int = 4,
@@ -577,22 +639,26 @@ def load_recognizer(local_model_dir: Path,
577
  num_active_paths=num_active_paths,
578
  **kwargs_
579
  )
580
- elif loader == "load_sherpa_offline_recognizer_from_paraformer":
581
- recognizer = load_sherpa_offline_recognizer_from_paraformer(
582
  decoding_method=decoding_method,
583
  **kwargs_
584
  )
585
- elif loader == "load_sherpa_offline_recognizer_from_transducer":
586
- recognizer = load_sherpa_offline_recognizer_from_transducer(
587
  decoding_method=decoding_method,
588
  **kwargs_
589
  )
590
- elif loader == "load_sherpa_offline_recognizer_from_whisper":
591
- recognizer = load_sherpa_offline_recognizer_from_whisper(
 
 
 
 
592
  **kwargs_
593
  )
594
- elif loader == "load_sherpa_online_recognizer_from_transducer":
595
- recognizer = load_sherpa_online_recognizer_from_transducer(
596
  **kwargs_
597
  )
598
  else:
 
65
  "joiner_model_file_sub_folder": ".",
66
  "tokens_file": "tokens.txt",
67
  "tokens_file_sub_folder": ".",
68
+ "loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
69
  },
70
  {
71
  "repo_id": "zrjin/icefall-asr-aishell-zipformer-large-2023-10-24",
 
77
  "joiner_model_file_sub_folder": "exp",
78
  "tokens_file": "tokens.txt",
79
  "tokens_file_sub_folder": "data/lang_char",
80
+ "loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
81
  },
82
  {
83
  "repo_id": "zrjin/icefall-asr-aishell-zipformer-small-2023-10-24",
 
89
  "joiner_model_file_sub_folder": "exp",
90
  "tokens_file": "tokens.txt",
91
  "tokens_file_sub_folder": "data/lang_char",
92
+ "loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
93
  },
94
  {
95
  "repo_id": "zrjin/icefall-asr-aishell-zipformer-2023-10-24",
 
101
  "joiner_model_file_sub_folder": "exp",
102
  "tokens_file": "tokens.txt",
103
  "tokens_file_sub_folder": "data/lang_char",
104
+ "loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
105
  },
106
  {
107
  "repo_id": "desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
 
158
  "decoder_model_file_sub_folder": ".",
159
  "tokens_file": "tiny.en-tokens.txt",
160
  "tokens_file_sub_folder": ".",
161
+ "loader": "load_sherpa_onnx_offline_recognizer_from_whisper",
162
  },
163
  {
164
  "repo_id": "csukuangfj/sherpa-onnx-whisper-base.en",
 
168
  "decoder_model_file_sub_folder": ".",
169
  "tokens_file": "base.en-tokens.txt",
170
  "tokens_file_sub_folder": ".",
171
+ "loader": "load_sherpa_onnx_offline_recognizer_from_whisper",
172
  },
173
  {
174
  "repo_id": "csukuangfj/sherpa-onnx-whisper-small.en",
 
178
  "decoder_model_file_sub_folder": ".",
179
  "tokens_file": "small.en-tokens.txt",
180
  "tokens_file_sub_folder": ".",
181
+ "loader": "load_sherpa_onnx_offline_recognizer_from_whisper",
182
  },
183
  {
184
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-en-2024-03-09",
 
198
  "joiner_model_file_sub_folder": "exp",
199
  "tokens_file": "tokens.txt",
200
  "tokens_file_sub_folder": "data/lang_bpe_500",
201
+ "loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
202
  },
203
  {
204
  "repo_id": "wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2",
 
332
  "joiner_model_file_sub_folder": ".",
333
  "tokens_file": "tokens.txt",
334
  "tokens_file_sub_folder": ".",
335
+ "loader": "load_sherpa_onnx_online_recognizer_from_transducer",
336
  },
337
  {
338
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28",
 
359
  "loader": "load_sherpa_offline_recognizer",
360
  },
361
  ],
362
+ "Chinese+English+Cantonese": [
363
  {
364
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
365
  "nn_model_file": "model.int8.onnx",
366
  "nn_model_file_sub_folder": ".",
367
  "tokens_file": "tokens.txt",
368
  "tokens_file_sub_folder": ".",
369
+ "loader": "load_sherpa_onnx_offline_recognizer_from_paraformer",
370
+ },
371
+ {
372
+ "repo_id": "csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en",
373
+ "encoder_model_file": "encoder.int8.onnx",
374
+ "encoder_model_file_sub_folder": ".",
375
+ "decoder_model_file": "decoder.int8.onnx",
376
+ "decoder_model_file_sub_folder": ".",
377
+ "tokens_file": "tokens.txt",
378
+ "tokens_file_sub_folder": ".",
379
+ "loader": "load_sherpa_onnx_online_recognizer_from_paraformer",
380
+ },
381
+ ],
382
+ "Cantonese": [
383
+ {
384
+ "repo_id": "zrjin/icefall-asr-mdcc-zipformer-2024-03-11",
385
+ "encoder_model_file": "encoder-epoch-45-avg-35.int8.onnx",
386
+ "encoder_model_file_sub_folder": "exp",
387
+ "decoder_model_file": "decoder-epoch-45-avg-35.onnx",
388
+ "decoder_model_file_sub_folder": "exp",
389
+ "joiner_model_file": "joiner-epoch-45-avg-35.int8.onnx",
390
+ "joiner_model_file_sub_folder": "exp",
391
+ "tokens_file": "tokens.txt",
392
+ "tokens_file_sub_folder": "data/lang_char",
393
+ "loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
394
+ },
395
+ ],
396
+ "Tibetan": [
397
+ {
398
+ "repo_id": "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
399
+ "nn_model_file": "cpu_jit.pt",
400
+ "nn_model_file_sub_folder": "exp",
401
+ "tokens_file": "tokens.txt",
402
+ "tokens_file_sub_folder": "data/lang_bpe_500",
403
+ "loader": "load_sherpa_offline_recognizer",
404
  },
405
+ {
406
+ "repo_id": "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
407
+ "nn_model_file": "cpu_jit-epoch-28-avg-23-torch-1.10.0.pt",
408
+ "nn_model_file_sub_folder": "exp",
409
+ "tokens_file": "tokens.txt",
410
+ "tokens_file_sub_folder": "data/lang_bpe_500",
411
+ "loader": "load_sherpa_offline_recognizer",
412
+ },
413
+ ],
414
  }
415
 
416
 
 
498
  return recognizer
499
 
500
 
501
+ def load_sherpa_onnx_offline_recognizer_from_paraformer(nn_model_file: str,
502
+ tokens_file: str,
503
+ sample_rate: int = 16000,
504
+ decoding_method: str = "greedy_search",
505
+ feature_dim: int = 80,
506
+ num_threads: int = 2,
507
+ ):
508
  recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
509
  paraformer=nn_model_file,
510
  tokens=tokens_file,
 
517
  return recognizer
518
 
519
 
520
+ def load_sherpa_onnx_offline_recognizer_from_transducer(encoder_model_file: str,
521
+ decoder_model_file: str,
522
+ joiner_model_file: str,
523
+ tokens_file: str,
524
+ sample_rate: int = 16000,
525
+ decoding_method: str = "greedy_search",
526
+ feature_dim: int = 80,
527
+ num_threads: int = 2,
528
+ num_active_paths: int = 2,
529
+ ):
530
  recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
531
  encoder=encoder_model_file,
532
  decoder=decoder_model_file,
 
541
  return recognizer
542
 
543
 
544
+ def load_sherpa_onnx_offline_recognizer_from_whisper(encoder_model_file: str,
545
+ decoder_model_file: str,
546
+ tokens_file: str,
547
+ num_threads: int = 2,
548
+ ):
549
  recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
550
  encoder=encoder_model_file,
551
  decoder=decoder_model_file,
 
555
  return recognizer
556
 
557
 
558
+ def load_sherpa_onnx_online_recognizer_from_transducer(encoder_model_file: str,
559
+ decoder_model_file: str,
560
+ joiner_model_file: str,
561
+ tokens_file: str,
562
+ sample_rate: int = 16000,
563
+ decoding_method: str = "greedy_search",
564
+ feature_dim: int = 80,
565
+ num_threads: int = 2,
566
+ num_active_paths: int = 2,
567
+ ):
568
+ recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
569
  encoder=encoder_model_file,
570
  decoder=decoder_model_file,
571
  joiner=joiner_model_file,
 
579
  return recognizer
580
 
581
 
582
+ def load_sherpa_onnx_online_recognizer_from_paraformer(encoder_model_file: str,
583
+ decoder_model_file: str,
584
+ tokens_file: str,
585
+ sample_rate: int = 16000,
586
+ decoding_method: str = "greedy_search",
587
+ feature_dim: int = 80,
588
+ num_threads: int = 2,
589
+ ):
590
+ recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
591
+ encoder=encoder_model_file,
592
+ decoder=decoder_model_file,
593
+ tokens=tokens_file,
594
+ num_threads=num_threads,
595
+ sample_rate=sample_rate,
596
+ feature_dim=feature_dim,
597
+ decoding_method=decoding_method,
598
+ )
599
+ return recognizer
600
+
601
+
602
  def load_recognizer(local_model_dir: Path,
603
  decoding_method: str = "greedy_search",
604
  num_active_paths: int = 4,
 
639
  num_active_paths=num_active_paths,
640
  **kwargs_
641
  )
642
+ elif loader == "load_sherpa_onnx_offline_recognizer_from_paraformer":
643
+ recognizer = load_sherpa_onnx_offline_recognizer_from_paraformer(
644
  decoding_method=decoding_method,
645
  **kwargs_
646
  )
647
+ elif loader == "load_sherpa_onnx_offline_recognizer_from_transducer":
648
+ recognizer = load_sherpa_onnx_offline_recognizer_from_transducer(
649
  decoding_method=decoding_method,
650
  **kwargs_
651
  )
652
+ elif loader == "load_sherpa_onnx_offline_recognizer_from_whisper":
653
+ recognizer = load_sherpa_onnx_offline_recognizer_from_whisper(
654
+ **kwargs_
655
+ )
656
+ elif loader == "load_sherpa_onnx_online_recognizer_from_transducer":
657
+ recognizer = load_sherpa_onnx_online_recognizer_from_transducer(
658
  **kwargs_
659
  )
660
+ elif loader == "load_sherpa_onnx_online_recognizer_from_paraformer":
661
+ recognizer = load_sherpa_onnx_online_recognizer_from_paraformer(
662
  **kwargs_
663
  )
664
  else: