HoneyTian commited on
Commit
a92b815
·
1 Parent(s): c5c31aa

add split tab

Browse files
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.8
2
 
3
  WORKDIR /code
4
 
 
1
+ FROM python:3.12
2
 
3
  WORKDIR /code
4
 
README.md CHANGED
@@ -9,3 +9,86 @@ license: apache-2.0
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+ ## CC Audio 8
13
+
14
+
15
+ ### ### 创建训练容器
16
+
17
+ ```text
18
+ 在容器中训练模型,需要能够从容器中访问到 GPU,参考:
19
+ https://hub.docker.com/r/ollama/ollama
20
+
21
+ docker run -itd \
22
+ --name cc_audio_8 \
23
+ --network host \
24
+ --gpus all \
25
+ --privileged \
26
+ --ipc=host \
27
+ -v /data/tianxing/PycharmProjects/datasets:/data/tianxing/PycharmProjects/datasets \
28
+ -v /data/tianxing/PycharmProjects/cc_audio_8:/data/tianxing/PycharmProjects/cc_audio_8 \
29
+ python:3.12 /bin/bash
30
+
31
+
32
+ 查看GPU
33
+ nvidia-smi
34
+ watch -n 1 -d nvidia-smi
35
+
36
+
37
+ ```
38
+
39
+ ```text
40
+ 在容器中访问 GPU
41
+
42
+ 参考:
43
+ https://blog.csdn.net/footless_bird/article/details/136291344
44
+ 步骤:
45
+ # 安装
46
+ yum install -y nvidia-container-toolkit
47
+
48
+ # 编辑文件 /etc/docker/daemon.json
49
+ cat /etc/docker/daemon.json
50
+ {
51
+ "data-root": "/data/lib/docker",
52
+ "default-runtime": "nvidia",
53
+ "runtimes": {
54
+ "nvidia": {
55
+ "path": "/usr/bin/nvidia-container-runtime",
56
+ "runtimeArgs": []
57
+ }
58
+ },
59
+ "registry-mirrors": [
60
+ "https://docker.m.daocloud.io",
61
+ "https://dockerproxy.com",
62
+ "https://docker.mirrors.ustc.edu.cn",
63
+ "https://docker.nju.edu.cn"
64
+ ]
65
+ }
66
+
67
+ # 重启 docker
68
+ systemctl restart docker
69
+ systemctl daemon-reload
70
+
71
+ # 测试容器内能否访问 GPU.
72
+ docker run --gpus all python:3.12-slim nvidia-smi
73
+
74
+ # 通过这种方式启动容器, 在容器中, 可以查看到 GPU. 但是容器中没有 GPU驱动 nvidia-smi 不工作.
75
+ docker run -it --privileged python:3.12-slim /bin/bash
76
+ apt update
77
+ apt install -y pciutils
78
+ lspci | grep -i nvidia
79
+ #00:08.0 3D controller: NVIDIA Corporation TU104GL [Tesla T4] (rev a1)
80
+
81
+ # 网上看的是这种启动容器的方式, 但是进去后仍然是 nvidia-smi 不工作.
82
+ docker run \
83
+ --device /dev/nvidia0:/dev/nvidia0 \
84
+ --device /dev/nvidiactl:/dev/nvidiactl \
85
+ --device /dev/nvidia-uvm:/dev/nvidia-uvm \
86
+ -v /usr/local/nvidia:/usr/local/nvidia \
87
+ -it --privileged python:3.12-slim /bin/bash
88
+
89
+
90
+ # 这种方式进入容器, nvidia-smi 可以工作. 应该关键是 --gpus all 参数.
91
+ docker run -itd --gpus all --name open_unsloth python:3.12-slim /bin/bash
92
+ docker run -itd --gpus all --name Qwen2-7B-Instruct python:3.12-slim /bin/bash
93
+
94
+ ```
examples/sample_filter/bad_case_find.py CHANGED
@@ -12,20 +12,17 @@ def get_args():
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument(
14
  "--data_dir",
15
- # default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\data",
16
- # default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\us-3",
17
- default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\transfer",
18
- # default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\id",
19
  type=str
20
  )
21
  parser.add_argument(
22
  "--keep_dir",
23
- default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\keep",
24
  type=str
25
  )
26
  parser.add_argument(
27
  "--trash_dir",
28
- default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\trash",
29
  type=str
30
  )
31
  args = parser.parse_args()
@@ -51,7 +48,8 @@ def main():
51
  label1, prob1 = client.predict(
52
  audio=handle_file(filename),
53
  # model_name="cc_audio_8-ch32",
54
- model_name="voicemail-en-ph-2-ch4",
 
55
  ground_true="Hello!!",
56
  api_name="/click_button"
57
  )
 
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument(
14
  "--data_dir",
15
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\62\wav_segmented",
 
 
 
16
  type=str
17
  )
18
  parser.add_argument(
19
  "--keep_dir",
20
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\62\keep",
21
  type=str
22
  )
23
  parser.add_argument(
24
  "--trash_dir",
25
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\62\trash",
26
  type=str
27
  )
28
  args = parser.parse_args()
 
48
  label1, prob1 = client.predict(
49
  audio=handle_file(filename),
50
  # model_name="cc_audio_8-ch32",
51
+ # model_name="voicemail-en-ph-2-ch4",
52
+ model_name="voicemail-id-id-2-ch4",
53
  ground_true="Hello!!",
54
  api_name="/click_button"
55
  )
install.sh CHANGED
@@ -54,7 +54,7 @@ fi
54
  if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
55
  $verbose && echo "stage 2: create virtualenv"
56
 
57
- # /usr/local/python-3.6.5/bin/virtualenv cc_audio_8
58
  # source /data/local/bin/cc_audio_8/bin/activate
59
  /usr/local/python-${python_version}/bin/pip3 install virtualenv
60
  mkdir -p /data/local/bin
 
54
  if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
55
  $verbose && echo "stage 2: create virtualenv"
56
 
57
+ # /usr/local/python-3.9.9/bin/virtualenv cc_audio_8
58
  # source /data/local/bin/cc_audio_8/bin/activate
59
  /usr/local/python-${python_version}/bin/pip3 install virtualenv
60
  mkdir -p /data/local/bin
main.py CHANGED
@@ -16,6 +16,10 @@ import torch
16
 
17
  from project_settings import environment, project_path
18
  from toolbox.torch.utils.data.vocabulary import Vocabulary
 
 
 
 
19
 
20
 
21
  def get_args():
@@ -127,71 +131,28 @@ def main():
127
  token=args.hf_token,
128
  )
129
 
130
- # examples
131
- example_zip_file = trained_model_dir / "examples.zip"
132
- with zipfile.ZipFile(example_zip_file.as_posix(), "r") as f_zip:
133
- out_root = examples_dir
134
- if out_root.exists():
135
- shutil.rmtree(out_root.as_posix())
136
- out_root.mkdir(parents=True, exist_ok=True)
137
- f_zip.extractall(path=out_root)
138
-
139
- # models
140
- model_choices = list()
141
- for filename in trained_model_dir.glob("*.zip"):
142
- model_name = filename.stem
143
- if model_name == "examples":
144
- continue
145
- model_choices.append(model_name)
146
- model_choices = list(sorted(model_choices))
147
-
148
- # examples
149
- examples = list()
150
- for filename in examples_dir.glob("**/*/*.wav"):
151
- label = filename.parts[-2]
152
-
153
- examples.append([
154
- filename.as_posix(),
155
- model_choices[0],
156
- label
157
- ])
158
-
159
- # ui
160
- brief_description = """
161
- 国际语音智能外呼系统, 电话声音分类, 8000, int16.
162
- """
163
 
164
  # ui
165
  with gr.Blocks() as blocks:
166
- gr.Markdown(value=brief_description)
167
-
168
- with gr.Row():
169
- with gr.Column(scale=3):
170
- c_audio = gr.Audio(label="audio")
171
- with gr.Row():
172
- with gr.Column(scale=3):
173
- c_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
174
- with gr.Column(scale=3):
175
- c_ground_true = gr.Textbox(label="ground_true")
176
-
177
- c_button = gr.Button("run", variant="primary")
178
- with gr.Column(scale=3):
179
- c_label = gr.Textbox(label="label")
180
- c_probability = gr.Number(label="probability")
181
-
182
- gr.Examples(
183
- examples,
184
- inputs=[c_audio, c_model_name, c_ground_true],
185
- outputs=[c_label, c_probability],
186
- fn=click_button,
187
- examples_per_page=5,
188
- )
189
-
190
- c_button.click(
191
- click_button,
192
- inputs=[c_audio, c_model_name, c_ground_true],
193
- outputs=[c_label, c_probability],
194
- )
195
 
196
  # http://127.0.0.1:7864/
197
  blocks.queue().launch(
 
16
 
17
  from project_settings import environment, project_path
18
  from toolbox.torch.utils.data.vocabulary import Vocabulary
19
+ from tabs.cls_tab import get_cls_tab
20
+ from tabs.split_tabs import get_split_tab
21
+ from tabs.shell_tab import get_shell_tab
22
+
23
 
24
 
25
  def get_args():
 
131
  token=args.hf_token,
132
  )
133
 
134
+ # examples zip
135
+ if not examples_dir.exists():
136
+ example_zip_file = trained_model_dir / "examples.zip"
137
+ with zipfile.ZipFile(example_zip_file.as_posix(), "r") as f_zip:
138
+ out_root = examples_dir
139
+ if out_root.exists():
140
+ shutil.rmtree(out_root.as_posix())
141
+ out_root.mkdir(parents=True, exist_ok=True)
142
+ f_zip.extractall(path=out_root)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  # ui
145
  with gr.Blocks() as blocks:
146
+ with gr.Tabs():
147
+ _ = get_cls_tab(
148
+ examples_dir=args.examples_dir,
149
+ trained_model_dir=args.trained_model_dir,
150
+ )
151
+ _ = get_split_tab(
152
+ examples_dir=args.examples_dir,
153
+ trained_model_dir=args.trained_model_dir,
154
+ )
155
+ _ = get_shell_tab()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  # http://127.0.0.1:7864/
158
  blocks.queue().launch(
requirements.txt CHANGED
@@ -1,13 +1,14 @@
1
- torch==2.3.0
2
- torchaudio==2.3.0
3
- fsspec==2024.5.0
4
- librosa==0.10.2
5
- pandas==2.0.3
6
- openpyxl==3.0.9
7
- xlrd==1.2.0
8
- tqdm==4.66.4
9
- overrides==1.9.0
10
- pyyaml==6.0.1
11
- evaluate==0.4.2
12
  gradio
13
- python-dotenv==1.0.1
 
 
1
+ torch
2
+ torchaudio
3
+ fsspec
4
+ librosa
5
+ pandas
6
+ openpyxl
7
+ xlrd
8
+ tqdm
9
+ overrides
10
+ pyyaml
11
+ evaluate
12
  gradio
13
+ python-dotenv
14
+ numpy
tabs/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == "__main__":
6
+ pass
tabs/cls_tab.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ from functools import lru_cache
5
+ from pathlib import Path
6
+ import platform
7
+ import shutil
8
+ import tempfile
9
+ import zipfile
10
+ from typing import Tuple
11
+
12
+ import gradio as gr
13
+ import numpy as np
14
+ import torch
15
+
16
+ from project_settings import project_path
17
+ from toolbox.torch.utils.data.vocabulary import Vocabulary
18
+
19
+
20
+ @lru_cache(maxsize=100)
21
+ def load_model(model_file: Path):
22
+ with zipfile.ZipFile(model_file, "r") as f_zip:
23
+ out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
24
+ if out_root.exists():
25
+ shutil.rmtree(out_root.as_posix())
26
+ out_root.mkdir(parents=True, exist_ok=True)
27
+ f_zip.extractall(path=out_root)
28
+
29
+ tgt_path = out_root / model_file.stem
30
+ jit_model_file = tgt_path / "trace_model.zip"
31
+ vocab_path = tgt_path / "vocabulary"
32
+
33
+ vocabulary = Vocabulary.from_files(vocab_path.as_posix())
34
+
35
+ with open(jit_model_file.as_posix(), "rb") as f:
36
+ model = torch.jit.load(f)
37
+ model.eval()
38
+
39
+ shutil.rmtree(tgt_path)
40
+
41
+ d = {
42
+ "model": model,
43
+ "vocabulary": vocabulary
44
+ }
45
+ return d
46
+
47
+
48
+ def when_click_cls_button(audio_t,
49
+ model_name: str,
50
+ ground_true: str) -> Tuple[str, float]:
51
+
52
+ sample_rate, signal = audio_t
53
+
54
+ model_file = project_path / f"trained_models/{model_name}.zip"
55
+ d = load_model(model_file)
56
+
57
+ model = d["model"]
58
+ vocabulary = d["vocabulary"]
59
+
60
+ inputs = signal / (1 << 15)
61
+ inputs = torch.tensor(inputs, dtype=torch.float32)
62
+ inputs = torch.unsqueeze(inputs, dim=0)
63
+
64
+ with torch.no_grad():
65
+ logits = model.forward(inputs)
66
+ probs = torch.nn.functional.softmax(logits, dim=-1)
67
+ label_idx = torch.argmax(probs, dim=-1)
68
+
69
+ label_idx = label_idx.cpu()
70
+ probs = probs.cpu()
71
+
72
+ label_idx = label_idx.numpy()[0]
73
+ prob = probs.numpy()[0][label_idx]
74
+
75
+ label_str = vocabulary.get_token_from_index(label_idx, namespace="labels")
76
+
77
+ return label_str, round(prob, 4)
78
+
79
+
80
+ def get_cls_tab(examples_dir: str, trained_model_dir: str):
81
+ cls_examples_dir = Path(examples_dir)
82
+ cls_trained_model_dir = Path(trained_model_dir)
83
+
84
+ # models
85
+ cls_model_choices = list()
86
+ for filename in cls_trained_model_dir.glob("*.zip"):
87
+ model_name = filename.stem
88
+ if model_name == "examples":
89
+ continue
90
+ cls_model_choices.append(model_name)
91
+ model_choices = list(sorted(cls_model_choices))
92
+
93
+ # examples zip
94
+ cls_example_zip_file = cls_trained_model_dir / "examples.zip"
95
+ with zipfile.ZipFile(cls_example_zip_file.as_posix(), "r") as f_zip:
96
+ out_root = cls_examples_dir
97
+ if out_root.exists():
98
+ shutil.rmtree(out_root.as_posix())
99
+ out_root.mkdir(parents=True, exist_ok=True)
100
+ f_zip.extractall(path=out_root)
101
+
102
+ # examples
103
+ cls_examples = list()
104
+ for filename in cls_examples_dir.glob("**/*/*.wav"):
105
+ label = filename.parts[-2]
106
+ cls_examples.append([
107
+ filename.as_posix(),
108
+ model_choices[0],
109
+ label
110
+ ])
111
+
112
+ with gr.TabItem("cls"):
113
+ with gr.Row():
114
+ with gr.Column(scale=3):
115
+ cls_audio = gr.Audio(label="audio")
116
+ with gr.Row():
117
+ with gr.Column(scale=3):
118
+ cls_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
119
+ with gr.Column(scale=3):
120
+ cls_ground_true = gr.Textbox(label="ground_true")
121
+
122
+ cls_button = gr.Button("run", variant="primary")
123
+ with gr.Column(scale=3):
124
+ cls_label = gr.Textbox(label="label")
125
+ cls_probability = gr.Number(label="probability")
126
+
127
+ gr.Examples(
128
+ cls_examples,
129
+ inputs=[cls_audio, cls_model_name, cls_ground_true],
130
+ outputs=[cls_label, cls_probability],
131
+ fn=when_click_cls_button,
132
+ examples_per_page=5,
133
+ )
134
+
135
+ cls_button.click(
136
+ when_click_cls_button,
137
+ inputs=[cls_audio, cls_model_name, cls_ground_true],
138
+ outputs=[cls_label, cls_probability],
139
+ )
140
+
141
+ return locals()
142
+
143
+
144
+ if __name__ == "__main__":
145
+ pass
tabs/fs_tab.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import gradio as gr
4
+
5
+ from project_settings import project_path
6
+
7
+
8
+ def get_fs_tab():
9
+ with gr.TabItem("fs"):
10
+ with gr.Row():
11
+ with gr.Column(scale=5):
12
+ fs_filename = gr.Textbox(label="filename", max_lines=10)
13
+ fs_file = gr.File(label="file")
14
+ fs_file_dir = gr.Textbox(value="data", label="file_dir")
15
+ fs_query = gr.Button("query", variant="primary")
16
+ with gr.Column(scale=5):
17
+ fs_filelist_dataset_state = gr.State(value=[])
18
+ fs_filelist_dataset = gr.Dataset(
19
+ components=[fs_filename, fs_file],
20
+ samples=fs_filelist_dataset_state.value,
21
+ )
22
+
23
+ def when_click_query_files(file_dir: str = "data"):
24
+ file_dir = project_path / file_dir
25
+ dataset_state = list()
26
+ for filename in file_dir.glob("**/*.*"):
27
+ if filename.is_dir():
28
+ continue
29
+ if filename.stem.startswith("."):
30
+ continue
31
+ if filename.name.endswith(".py"):
32
+ continue
33
+ dataset_state.append((
34
+ filename.relative_to(file_dir).as_posix(),
35
+ filename.as_posix(),
36
+ ))
37
+
38
+ dataset = gr.Dataset(
39
+ components=[fs_filename, fs_file],
40
+ samples=dataset_state,
41
+ )
42
+ return dataset_state, dataset
43
+
44
+ fs_filelist_dataset.click(
45
+ fn=lambda x: (
46
+ x[1], x[1]
47
+ ),
48
+ inputs=[fs_filelist_dataset],
49
+ outputs=[fs_filename, fs_file]
50
+ )
51
+ fs_query.click(
52
+ fn=when_click_query_files,
53
+ inputs=[fs_file_dir],
54
+ outputs=[fs_filelist_dataset_state, fs_filelist_dataset]
55
+ )
56
+ return locals()
57
+
58
+
59
+ if __name__ == "__main__":
60
+ with gr.Blocks() as block:
61
+ fs_components = get_fs_tab()
62
+ block.launch()
tabs/shell_tab.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import gradio as gr
4
+
5
+ from toolbox.os.command import Command
6
+
7
+
8
+ def shell(cmd: str):
9
+ return Command.popen(cmd)
10
+
11
+
12
+ def get_shell_tab():
13
+ with gr.TabItem("shell"):
14
+ shell_text = gr.Textbox(label="cmd")
15
+ shell_button = gr.Button("run")
16
+ shell_output = gr.Textbox(label="output", max_lines=100)
17
+
18
+ shell_button.click(
19
+ shell,
20
+ inputs=[shell_text, ],
21
+ outputs=[shell_output],
22
+ )
23
+
24
+ return locals()
25
+
26
+
27
+ if __name__ == "__main__":
28
+ pass
tabs/split_tabs.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import json
5
+ from functools import lru_cache, partial
6
+ from pathlib import Path
7
+ import shutil
8
+ import tempfile
9
+ import zipfile
10
+
11
+ import gradio as gr
12
+ import numpy as np
13
+ import torch
14
+ import torch.nn as nn
15
+ from typing import List
16
+
17
+ from project_settings import project_path
18
+ from toolbox.cv2.misc import erode, dilate
19
+ from toolbox.torch.utils.data.vocabulary import Vocabulary
20
+
21
+
22
+ @lru_cache(maxsize=100)
23
+ def load_model(model_file: Path):
24
+ with zipfile.ZipFile(model_file, "r") as f_zip:
25
+ out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
26
+ if out_root.exists():
27
+ shutil.rmtree(out_root.as_posix())
28
+ out_root.mkdir(parents=True, exist_ok=True)
29
+ f_zip.extractall(path=out_root)
30
+
31
+ tgt_path = out_root / model_file.stem
32
+ jit_model_file = tgt_path / "trace_model.zip"
33
+ vocab_path = tgt_path / "vocabulary"
34
+
35
+ vocabulary = Vocabulary.from_files(vocab_path.as_posix())
36
+
37
+ with open(jit_model_file.as_posix(), "rb") as f:
38
+ model = torch.jit.load(f)
39
+ model.eval()
40
+
41
+ shutil.rmtree(tgt_path)
42
+
43
+ d = {
44
+ "model": model,
45
+ "vocabulary": vocabulary
46
+ }
47
+ return d
48
+
49
+
50
+ class Tagger(object):
51
+ def __init__(self,
52
+ model_file: str,
53
+ win_size: int,
54
+ win_step: int,
55
+ sample_rate: int = 8000,
56
+ ):
57
+ self.model_file = Path(model_file)
58
+ self.win_size = win_size
59
+ self.win_step = win_step
60
+ self.sample_rate = sample_rate
61
+
62
+ self.model: nn.Module = None
63
+ self.vocabulary: Vocabulary = None
64
+ self.load_models()
65
+
66
+ def load_models(self):
67
+ m = load_model(self.model_file)
68
+
69
+ model = m["model"]
70
+ vocabulary = m["vocabulary"]
71
+
72
+ self.model = model
73
+ self.vocabulary = vocabulary
74
+ return model, vocabulary
75
+
76
+ def tag(self, signal: np.ndarray):
77
+ signal_length = len(signal)
78
+ win_size = int(self.win_size * self.sample_rate)
79
+ win_step = int(self.win_step * self.sample_rate)
80
+
81
+ signal = np.concatenate([
82
+ np.zeros(shape=(win_size // 2,), dtype=np.int16),
83
+ signal,
84
+ np.zeros(shape=(win_size // 2,), dtype=np.int16),
85
+ ])
86
+
87
+ result = list()
88
+ for i in range(0, signal_length, win_step):
89
+ sub_signal = signal[i: i+win_size]
90
+ if len(sub_signal) < win_size:
91
+ break
92
+
93
+ inputs = torch.tensor(sub_signal, dtype=torch.float32)
94
+ inputs = torch.unsqueeze(inputs, dim=0)
95
+
96
+ probs = self.model(inputs)
97
+
98
+ probs = probs.tolist()[0]
99
+ argidx = np.argmax(probs)
100
+ label_str = self.vocabulary.get_token_from_index(argidx, namespace="labels")
101
+ prob = probs[argidx]
102
+ result.append(label_str)
103
+
104
+ return result
105
+
106
+
107
+ def correct_labels(labels: List[str], target_label: str = "noise", n_erode: int = 2, n_dilate: int = 2):
108
+ labels = erode(labels, erode_label=target_label, n=n_erode)
109
+ labels = dilate(labels, dilate_label=target_label, n=n_dilate)
110
+ return labels
111
+
112
+
113
+ def split_signal_by_labels(signal: np.ndarray, labels: List[str], target_label: str):
114
+ l = len(labels)
115
+
116
+ noise_list = list()
117
+ begin = None
118
+ for idx, label in enumerate(labels):
119
+ if label == target_label:
120
+ if begin is None:
121
+ begin = idx
122
+ elif label != target_label:
123
+ if begin is not None:
124
+ noise_list.append((begin, idx))
125
+ begin = None
126
+ else:
127
+ pass
128
+ else:
129
+ if begin is not None:
130
+ noise_list.append((begin, l))
131
+
132
+ result = list()
133
+
134
+ win_step = signal.shape[0] / l
135
+ for begin, end in noise_list:
136
+ begin = int(begin * win_step)
137
+ end = int(end * win_step)
138
+
139
+ sub_signal = signal[begin: end + 1]
140
+ result.append({
141
+ "begin": begin,
142
+ "end": end + 1,
143
+ "sub_signal": sub_signal,
144
+ })
145
+
146
+ return result
147
+
148
+
149
+ @lru_cache(maxsize=100)
150
+ def get_tagger(model_file: str,
151
+ win_size: int = 2.0,
152
+ win_step: int = 0.25,
153
+ ):
154
+ tagger = Tagger(
155
+ model_file=model_file,
156
+ win_size=win_size,
157
+ win_step=win_step,
158
+ )
159
+ return tagger
160
+
161
+
162
+ def when_model_name_change(model_name: str, split_trained_model_dir: Path):
163
+ m = load_model(
164
+ model_file=(split_trained_model_dir / f"{model_name}.zip")
165
+ )
166
+ token_to_index: dict = m["vocabulary"].get_token_to_index_vocabulary(namespace="labels")
167
+ label_choices = list(token_to_index.keys())
168
+
169
+ split_label = gr.Dropdown(choices=label_choices, value=label_choices[0], label="label")
170
+
171
+ return split_label
172
+
173
+
174
+ def get_split_tab(examples_dir: str, trained_model_dir: str):
175
+ split_examples_dir = Path(examples_dir)
176
+ split_trained_model_dir = Path(trained_model_dir)
177
+
178
+ # models
179
+ split_model_choices = list()
180
+ for filename in split_trained_model_dir.glob("*.zip"):
181
+ model_name = filename.stem
182
+ if model_name == "examples":
183
+ continue
184
+ split_model_choices.append(model_name)
185
+ model_choices = list(sorted(split_model_choices))
186
+
187
+ # model_labels_choices
188
+ m = load_model(
189
+ model_file=(split_trained_model_dir / f"{model_choices[0]}.zip")
190
+ )
191
+ token_to_index = m["vocabulary"].get_token_to_index_vocabulary(namespace="labels")
192
+ model_labels_choices = list(token_to_index.keys())
193
+
194
+ # examples
195
+ split_examples = list()
196
+ for filename in split_examples_dir.glob("**/*/*.wav"):
197
+ label = filename.parts[-2]
198
+ target_label = m["vocabulary"].get_token_from_index(index=0, namespace="labels")
199
+ split_examples.append([
200
+ filename.as_posix(),
201
+ model_choices[0],
202
+ model_labels_choices[0]
203
+ ])
204
+
205
+ with gr.TabItem("split"):
206
+ with gr.Row():
207
+ with gr.Column(scale=3):
208
+ split_audio = gr.Audio(label="audio")
209
+ with gr.Row():
210
+ split_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
211
+ split_label = gr.Dropdown(choices=model_labels_choices, value=model_labels_choices[0], label="label")
212
+ split_win_size = gr.Number(value=2.0, minimum=0, maximum=5, step=0.05, label="win_size")
213
+ split_win_step = gr.Number(value=0.25, minimum=0, maximum=5, step=0.05, label="win_step")
214
+ split_n_erode = gr.Number(value=2, minimum=0, maximum=5, step=1, label="n_erode")
215
+ split_n_dilate = gr.Number(value=2, minimum=0, maximum=5, step=1, label="n_dilate")
216
+
217
+ split_button = gr.Button("run", variant="primary")
218
+ with gr.Column(scale=3):
219
+ split_sub_audio = gr.Audio(label="sub_audio")
220
+ split_sub_audio_message = gr.Textbox(max_lines=10, label="sub_audio_message")
221
+
222
+ split_sub_audio_dataset_state = gr.State(value=[])
223
+ split_sub_audio_dataset = gr.Dataset(
224
+ components=[split_sub_audio, split_sub_audio_message],
225
+ samples=split_sub_audio_dataset_state.value,
226
+ )
227
+ split_sub_audio_dataset.click(
228
+ fn=lambda x: (
229
+ x[0], x[1]
230
+ ),
231
+ inputs=[split_sub_audio_dataset],
232
+ outputs=[split_sub_audio, split_sub_audio_message]
233
+ )
234
+
235
+ def when_click_split_button(audio_t,
236
+ model_name: str,
237
+ label: str,
238
+ win_size: int,
239
+ win_step: int,
240
+ n_erode: int = 2,
241
+ n_dilate: int = 2
242
+ ):
243
+ max_wave_value = 32768.0
244
+
245
+ sample_rate, signal = audio_t
246
+
247
+ model_file = project_path / f"trained_models/{model_name}.zip"
248
+ tagger = get_tagger(model_file.as_posix(), win_size, win_step)
249
+
250
+ signal_ = signal / max_wave_value
251
+
252
+ labels = tagger.tag(signal_)
253
+ labels = correct_labels(labels, target_label=label, n_erode=n_erode, n_dilate=n_dilate)
254
+
255
+ sub_signal_list = split_signal_by_labels(signal, labels, target_label=label)
256
+
257
+ _split_sub_audio_dataset_state = [
258
+ [
259
+ (sample_rate, item["sub_signal"]),
260
+ json.dumps({"begin": item["begin"], "end": item["end"]}, ensure_ascii=False, indent=2),
261
+ ]
262
+ for item in sub_signal_list
263
+ ]
264
+ _split_sub_audio_dataset = gr.Dataset(
265
+ components=[split_sub_audio, split_sub_audio_message],
266
+ samples=_split_sub_audio_dataset_state,
267
+ visible=True
268
+ )
269
+ return _split_sub_audio_dataset_state, _split_sub_audio_dataset
270
+
271
+ gr.Examples(
272
+ split_examples,
273
+ inputs=[
274
+ split_audio,
275
+ split_model_name, split_label,
276
+ split_win_size, split_win_step,
277
+ split_n_erode, split_n_dilate,
278
+ ],
279
+ outputs=[split_sub_audio_dataset_state, split_sub_audio_dataset],
280
+ fn=when_click_split_button,
281
+ examples_per_page=5,
282
+ )
283
+
284
+ split_model_name.change(
285
+ partial(when_model_name_change, split_trained_model_dir=split_trained_model_dir),
286
+ inputs=[split_model_name],
287
+ outputs=[split_label],
288
+ )
289
+
290
+ split_button.click(
291
+ when_click_split_button,
292
+ inputs=[
293
+ split_audio,
294
+ split_model_name, split_label,
295
+ split_win_size, split_win_step,
296
+ split_n_erode, split_n_dilate,
297
+ ],
298
+ outputs=[split_sub_audio_dataset_state, split_sub_audio_dataset],
299
+ )
300
+
301
+ return locals()
302
+
303
+
304
+ if __name__ == "__main__":
305
+ pass
toolbox/cv2/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == '__main__':
6
+ pass
toolbox/cv2/misc.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ from typing import List, Union
4
+
5
+
6
+ def erode(labels: List[Union[str, int]], erode_label: Union[str, int], n: int = 1):
7
+ """
8
+ 遍历 labels 列表, 将连续的 erode_label 标签侵蚀 n 个.
9
+ """
10
+ result = list()
11
+ in_span = False
12
+ count = 0
13
+ for idx, label in enumerate(labels):
14
+ if label == erode_label:
15
+ if not in_span:
16
+ in_span = True
17
+ count = 0
18
+ if count < n:
19
+ if len(result) == 0:
20
+ result.append(label)
21
+ else:
22
+ result.append(result[-1])
23
+ count += 1
24
+ continue
25
+ else:
26
+ result.append(label)
27
+ continue
28
+ elif label != erode_label:
29
+ if in_span:
30
+ in_span = False
31
+
32
+ for i in range(min(len(result), n)):
33
+ result[-i-1] = label
34
+ result.append(label)
35
+ continue
36
+ else:
37
+ result.append(label)
38
+ continue
39
+
40
+ result.append(label)
41
+ return result
42
+
43
+
44
+ def dilate(labels: List[Union[str, int]], dilate_label: Union[str, int], n: int = 1):
45
+ """
46
+ 遍历 labels 列表, 将连续的 dilate_label 标签扩张 n 个.
47
+ """
48
+ result = list()
49
+ in_span = False
50
+ count = float('inf')
51
+ for idx, label in enumerate(labels):
52
+ if count < n:
53
+ result.append(dilate_label)
54
+ count += 1
55
+ continue
56
+ if label == dilate_label:
57
+ if not in_span:
58
+ in_span = True
59
+
60
+ for i in range(min(len(result), n)):
61
+ result[-i-1] = label
62
+ result.append(label)
63
+ continue
64
+ else:
65
+ result.append(label)
66
+ continue
67
+ else:
68
+ if in_span:
69
+ in_span = False
70
+ result.append(dilate_label)
71
+ count = 1
72
+ continue
73
+ else:
74
+ result.append(label)
75
+ continue
76
+
77
+ return result
78
+
79
+
80
+ def demo1():
81
+ labels = [
82
+ 'voice', 'mute', 'mute', 'voice', 'voice', 'voice', 'voice', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'voice',
83
+ ]
84
+
85
+ result = erode(
86
+ labels=labels,
87
+ erode_label='voice',
88
+ n=1,
89
+
90
+ )
91
+ print(len(labels))
92
+ print(len(result))
93
+ print(result)
94
+ return
95
+
96
+
97
+ def demo2():
98
+ labels = [
99
+ 'voice', 'mute', 'mute', 'voice', 'voice', 'voice', 'voice', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'voice',
100
+ ]
101
+
102
+ result = dilate(
103
+ labels=labels,
104
+ dilate_label='voice',
105
+ n=2,
106
+
107
+ )
108
+ print(len(labels))
109
+ print(len(result))
110
+ print(result)
111
+
112
+ return
113
+
114
+
115
+ def demo3():
116
+ import time
117
+ labels = ['mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'voice', 'bell', 'bell', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'bell', 'bell', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'bell', 'bell', 'bell', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute']
118
+
119
+ begin = time.time()
120
+ labels = erode(labels, erode_label='music', n=1)
121
+ labels = dilate(labels, dilate_label='music', n=1)
122
+
123
+ labels = dilate(labels, dilate_label='voice', n=2)
124
+ labels = erode(labels, erode_label='voice', n=2)
125
+ labels = erode(labels, erode_label='voice', n=1)
126
+ labels = dilate(labels, dilate_label='voice', n=3)
127
+
128
+ cost = time.time() - begin
129
+ print(cost)
130
+ print(labels)
131
+ return
132
+
133
+
134
+ if __name__ == '__main__':
135
+ # demo1()
136
+ # demo2()
137
+ demo3()