HoneyTian commited on
Commit
03aae6c
·
1 Parent(s): f392af0
Files changed (2) hide show
  1. main.py +2 -1
  2. toolbox/k2_sherpa/nn_models.py +22 -12
main.py CHANGED
@@ -105,8 +105,9 @@ def process(
105
  recognizer = nn_models.load_recognizer(
106
  repo_id=m_dict["repo_id"],
107
  nn_model_file=m_dict["nn_model_file"],
 
108
  tokens_file=m_dict["tokens_file"],
109
- sub_folder=m_dict["sub_folder"],
110
  local_model_dir=local_model_dir,
111
  loader=m_dict["loader"],
112
  decoding_method=decoding_method,
 
105
  recognizer = nn_models.load_recognizer(
106
  repo_id=m_dict["repo_id"],
107
  nn_model_file=m_dict["nn_model_file"],
108
+ nn_model_file_sub_folder=m_dict["nn_model_file_sub_folder"],
109
  tokens_file=m_dict["tokens_file"],
110
+ tokens_file_sub_folder=m_dict["tokens_file_sub_folder"],
111
  local_model_dir=local_model_dir,
112
  loader=m_dict["loader"],
113
  decoding_method=decoding_method,
toolbox/k2_sherpa/nn_models.py CHANGED
@@ -20,30 +20,34 @@ model_map = {
20
  {
21
  "repo_id": "csukuangfj/wenet-chinese-model",
22
  "nn_model_file": "final.zip",
 
23
  "tokens_file": "units.txt",
24
- "sub_folder": ".",
25
  "loader": "load_sherpa_offline_recognizer",
26
  "normalize_samples": False,
27
  },
28
  {
29
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-2024-03-09",
30
  "nn_model_file": "model.int8.onnx",
 
31
  "tokens_file": "tokens.txt",
32
- "sub_folder": ".",
33
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
34
  },
35
  {
36
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-small-2024-03-09",
37
  "nn_model_file": "model.int8.onnx",
 
38
  "tokens_file": "tokens.txt",
39
- "sub_folder": ".",
40
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
41
  },
42
  {
43
  "repo_id": "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2",
44
  "nn_model_file": "cpu_jit_epoch_10_avg_2_torch_1.7.1.pt",
 
45
  "tokens_file": "tokens.txt",
46
- "sub_folder": ".",
47
  "loader": "load_sherpa_offline_recognizer",
48
  "normalize_samples": True,
49
  }
@@ -52,8 +56,9 @@ model_map = {
52
  {
53
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-en-2024-03-09",
54
  "nn_model_file": "model.int8.onnx",
 
55
  "tokens_file": "tokens.txt",
56
- "sub_folder": ".",
57
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
58
  },
59
  ],
@@ -61,8 +66,9 @@ model_map = {
61
  {
62
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28",
63
  "nn_model_file": "model.int8.onnx",
 
64
  "tokens_file": "tokens.txt",
65
- "sub_folder": ".",
66
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
67
  },
68
  ],
@@ -70,8 +76,9 @@ model_map = {
70
  {
71
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
72
  "nn_model_file": "model.int8.onnx",
 
73
  "tokens_file": "tokens.txt",
74
- "sub_folder": ".",
75
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
76
  },
77
  ]
@@ -80,22 +87,23 @@ model_map = {
80
 
81
  def download_model(repo_id: str,
82
  nn_model_file: str,
 
83
  tokens_file: str,
84
- sub_folder: str,
85
  local_model_dir: str,
86
  ):
87
 
88
  nn_model_file = huggingface_hub.hf_hub_download(
89
  repo_id=repo_id,
90
  filename=nn_model_file,
91
- subfolder=sub_folder,
92
  local_dir=local_model_dir,
93
  )
94
 
95
  tokens_file = huggingface_hub.hf_hub_download(
96
  repo_id=repo_id,
97
  filename=tokens_file,
98
- subfolder=sub_folder,
99
  local_dir=local_model_dir,
100
  )
101
  return nn_model_file, tokens_file
@@ -150,8 +158,9 @@ def load_sherpa_offline_recognizer_from_paraformer(nn_model_file: str,
150
 
151
  def load_recognizer(repo_id: str,
152
  nn_model_file: str,
 
153
  tokens_file: str,
154
- sub_folder: str,
155
  local_model_dir: Path,
156
  loader: str,
157
  decoding_method: str = "greedy_search",
@@ -161,8 +170,9 @@ def load_recognizer(repo_id: str,
161
  download_model(
162
  repo_id=repo_id,
163
  nn_model_file=nn_model_file,
 
164
  tokens_file=tokens_file,
165
- sub_folder=sub_folder,
166
  local_model_dir=local_model_dir.as_posix(),
167
  )
168
 
 
20
  {
21
  "repo_id": "csukuangfj/wenet-chinese-model",
22
  "nn_model_file": "final.zip",
23
+ "nn_model_file_sub_folder": ".",
24
  "tokens_file": "units.txt",
25
+ "tokens_file_sub_folder": ".",
26
  "loader": "load_sherpa_offline_recognizer",
27
  "normalize_samples": False,
28
  },
29
  {
30
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-2024-03-09",
31
  "nn_model_file": "model.int8.onnx",
32
+ "nn_model_file_sub_folder": ".",
33
  "tokens_file": "tokens.txt",
34
+ "tokens_file_sub_folder": ".",
35
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
36
  },
37
  {
38
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-small-2024-03-09",
39
  "nn_model_file": "model.int8.onnx",
40
+ "nn_model_file_sub_folder": ".",
41
  "tokens_file": "tokens.txt",
42
+ "tokens_file_sub_folder": ".",
43
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
44
  },
45
  {
46
  "repo_id": "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2",
47
  "nn_model_file": "cpu_jit_epoch_10_avg_2_torch_1.7.1.pt",
48
+ "nn_model_file_sub_folder": "exp",
49
  "tokens_file": "tokens.txt",
50
+ "tokens_file_sub_folder": "data/lang_char",
51
  "loader": "load_sherpa_offline_recognizer",
52
  "normalize_samples": True,
53
  }
 
56
  {
57
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-en-2024-03-09",
58
  "nn_model_file": "model.int8.onnx",
59
+ "nn_model_file_sub_folder": ".",
60
  "tokens_file": "tokens.txt",
61
+ "tokens_file_sub_folder": ".",
62
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
63
  },
64
  ],
 
66
  {
67
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28",
68
  "nn_model_file": "model.int8.onnx",
69
+ "nn_model_file_sub_folder": ".",
70
  "tokens_file": "tokens.txt",
71
+ "tokens_file_sub_folder": ".",
72
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
73
  },
74
  ],
 
76
  {
77
  "repo_id": "csukuangfj/sherpa-onnx-paraformer-trilingual-zh-cantonese-en",
78
  "nn_model_file": "model.int8.onnx",
79
+ "nn_model_file_sub_folder": ".",
80
  "tokens_file": "tokens.txt",
81
+ "tokens_file_sub_folder": ".",
82
  "loader": "load_sherpa_offline_recognizer_from_paraformer",
83
  },
84
  ]
 
87
 
88
  def download_model(repo_id: str,
89
  nn_model_file: str,
90
+ nn_model_file_sub_folder: str,
91
  tokens_file: str,
92
+ tokens_file_sub_folder: str,
93
  local_model_dir: str,
94
  ):
95
 
96
  nn_model_file = huggingface_hub.hf_hub_download(
97
  repo_id=repo_id,
98
  filename=nn_model_file,
99
+ subfolder=nn_model_file_sub_folder,
100
  local_dir=local_model_dir,
101
  )
102
 
103
  tokens_file = huggingface_hub.hf_hub_download(
104
  repo_id=repo_id,
105
  filename=tokens_file,
106
+ subfolder=tokens_file_sub_folder,
107
  local_dir=local_model_dir,
108
  )
109
  return nn_model_file, tokens_file
 
158
 
159
  def load_recognizer(repo_id: str,
160
  nn_model_file: str,
161
+ nn_model_file_sub_folder: str,
162
  tokens_file: str,
163
+ tokens_file_sub_folder: str,
164
  local_model_dir: Path,
165
  loader: str,
166
  decoding_method: str = "greedy_search",
 
170
  download_model(
171
  repo_id=repo_id,
172
  nn_model_file=nn_model_file,
173
+ nn_model_file_sub_folder=nn_model_file_sub_folder,
174
  tokens_file=tokens_file,
175
+ tokens_file_sub_folder=tokens_file_sub_folder,
176
  local_model_dir=local_model_dir.as_posix(),
177
  )
178