Spaces:

qgyd2021
/

cc_audio_8

Sleeping

App Files Files Community

HoneyTian commited on 13 days ago

Commit

a92b815

1 Parent(s): c5c31aa

add split tab

Browse files

Files changed (13) hide show

Dockerfile +1 -1
README.md +83 -0
examples/sample_filter/bad_case_find.py +5 -7
install.sh +1 -1
main.py +23 -62
requirements.txt +13 -12
tabs/__init__.py +6 -0
tabs/cls_tab.py +145 -0
tabs/fs_tab.py +62 -0
tabs/shell_tab.py +28 -0
tabs/split_tabs.py +305 -0
toolbox/cv2/__init__.py +6 -0
toolbox/cv2/misc.py +137 -0

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.8
 WORKDIR /code


1	+ FROM python:3.12
2
3	WORKDIR /code
4

README.md CHANGED Viewed

@@ -9,3 +9,86 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## CC Audio 8
+### ### 创建训练容器
+```text
+在容器中训练模型，需要能够从容器中访问到 GPU，参考：
+https://hub.docker.com/r/ollama/ollama
+docker run -itd \
+--name cc_audio_8 \
+--network host \
+--gpus all \
+--privileged \
+--ipc=host \
+-v /data/tianxing/PycharmProjects/datasets:/data/tianxing/PycharmProjects/datasets \
+-v /data/tianxing/PycharmProjects/cc_audio_8:/data/tianxing/PycharmProjects/cc_audio_8 \
+python:3.12 /bin/bash
+查看GPU
+nvidia-smi
+watch -n 1 -d nvidia-smi
+```
+```text
+在容器中访问 GPU
+参考：
+https://blog.csdn.net/footless_bird/article/details/136291344
+步骤：
+# 安装
+yum install -y nvidia-container-toolkit
+# 编辑文件 /etc/docker/daemon.json
+cat /etc/docker/daemon.json
+{
+    "data-root": "/data/lib/docker",
+    "default-runtime": "nvidia",
+    "runtimes": {
+        "nvidia": {
+            "path": "/usr/bin/nvidia-container-runtime",
+            "runtimeArgs": []
+        }
+    },
+    "registry-mirrors": [
+        "https://docker.m.daocloud.io",
+        "https://dockerproxy.com",
+        "https://docker.mirrors.ustc.edu.cn",
+        "https://docker.nju.edu.cn"
+    ]
+}
+# 重启 docker
+systemctl restart docker
+systemctl daemon-reload
+# 测试容器内能否访问 GPU.
+docker run --gpus all python:3.12-slim nvidia-smi
+# 通过这种方式启动容器, 在容器中, 可以查看到 GPU. 但是容器中没有 GPU驱动 nvidia-smi 不工作.
+docker run -it --privileged python:3.12-slim /bin/bash
+apt update
+apt install -y pciutils
+lspci | grep -i nvidia
+#00:08.0 3D controller: NVIDIA Corporation TU104GL [Tesla T4] (rev a1)
+# 网上看的是这种启动容器的方式, 但是进去后仍然是 nvidia-smi 不工作.
+docker run \
+--device /dev/nvidia0:/dev/nvidia0 \
+--device /dev/nvidiactl:/dev/nvidiactl \
+--device /dev/nvidia-uvm:/dev/nvidia-uvm \
+-v /usr/local/nvidia:/usr/local/nvidia \
+-it --privileged python:3.12-slim /bin/bash
+# 这种方式进入容器, nvidia-smi 可以工作. 应该关键是 --gpus all 参数.
+docker run -itd --gpus all --name open_unsloth python:3.12-slim /bin/bash
+docker run -itd --gpus all --name Qwen2-7B-Instruct python:3.12-slim /bin/bash
+```

examples/sample_filter/bad_case_find.py CHANGED Viewed

@@ -12,20 +12,17 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--data_dir",
-        # default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\data",
-        # default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\us-3",
-        default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\transfer",
-        # default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\id",
         type=str
     )
     parser.add_argument(
         "--keep_dir",
-        default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\keep",
         type=str
     )
     parser.add_argument(
         "--trash_dir",
-        default=r"E:\Users\tianx\HuggingDatasets\cc_audio_8\data\trash",
         type=str
     )
     args = parser.parse_args()
@@ -51,7 +48,8 @@ def main():
         label1, prob1 = client.predict(
             audio=handle_file(filename),
             # model_name="cc_audio_8-ch32",
-            model_name="voicemail-en-ph-2-ch4",
             ground_true="Hello!!",
             api_name="/click_button"
         )

     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--data_dir",
+        default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\62\wav_segmented",
         type=str
     )
     parser.add_argument(
         "--keep_dir",
+        default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\62\keep",
         type=str
     )
     parser.add_argument(
         "--trash_dir",
+        default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\62\trash",
         type=str
     )
     args = parser.parse_args()
         label1, prob1 = client.predict(
             audio=handle_file(filename),
             # model_name="cc_audio_8-ch32",
+            # model_name="voicemail-en-ph-2-ch4",
+            model_name="voicemail-id-id-2-ch4",
             ground_true="Hello!!",
             api_name="/click_button"
         )

install.sh CHANGED Viewed

@@ -54,7 +54,7 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   $verbose && echo "stage 2: create virtualenv"
-  # /usr/local/python-3.6.5/bin/virtualenv cc_audio_8
   # source /data/local/bin/cc_audio_8/bin/activate
   /usr/local/python-${python_version}/bin/pip3 install virtualenv
   mkdir -p /data/local/bin

 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   $verbose && echo "stage 2: create virtualenv"
+  # /usr/local/python-3.9.9/bin/virtualenv cc_audio_8
   # source /data/local/bin/cc_audio_8/bin/activate
   /usr/local/python-${python_version}/bin/pip3 install virtualenv
   mkdir -p /data/local/bin

main.py CHANGED Viewed

@@ -16,6 +16,10 @@ import torch
 from project_settings import environment, project_path
 from toolbox.torch.utils.data.vocabulary import Vocabulary
 def get_args():
@@ -127,71 +131,28 @@ def main():
             token=args.hf_token,
         )
-    # examples
-    example_zip_file = trained_model_dir / "examples.zip"
-    with zipfile.ZipFile(example_zip_file.as_posix(), "r") as f_zip:
-        out_root = examples_dir
-        if out_root.exists():
-            shutil.rmtree(out_root.as_posix())
-        out_root.mkdir(parents=True, exist_ok=True)
-        f_zip.extractall(path=out_root)
-    # models
-    model_choices = list()
-    for filename in trained_model_dir.glob("*.zip"):
-        model_name = filename.stem
-        if model_name == "examples":
-            continue
-        model_choices.append(model_name)
-    model_choices = list(sorted(model_choices))
-    # examples
-    examples = list()
-    for filename in examples_dir.glob("**/*/*.wav"):
-        label = filename.parts[-2]
-        examples.append([
-            filename.as_posix(),
-            model_choices[0],
-            label
-        ])
-    # ui
-    brief_description = """
-国际语音智能外呼系统, 电话声音分类, 8000, int16.
-"""
     # ui
     with gr.Blocks() as blocks:
-        gr.Markdown(value=brief_description)
-        with gr.Row():
-            with gr.Column(scale=3):
-                c_audio = gr.Audio(label="audio")
-                with gr.Row():
-                    with gr.Column(scale=3):
-                        c_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
-                    with gr.Column(scale=3):
-                        c_ground_true = gr.Textbox(label="ground_true")
-                c_button = gr.Button("run", variant="primary")
-            with gr.Column(scale=3):
-                c_label = gr.Textbox(label="label")
-                c_probability = gr.Number(label="probability")
-        gr.Examples(
-            examples,
-            inputs=[c_audio, c_model_name, c_ground_true],
-            outputs=[c_label, c_probability],
-            fn=click_button,
-            examples_per_page=5,
-        )
-        c_button.click(
-            click_button,
-            inputs=[c_audio, c_model_name, c_ground_true],
-            outputs=[c_label, c_probability],
-        )
     # http://127.0.0.1:7864/
     blocks.queue().launch(

 from project_settings import environment, project_path
 from toolbox.torch.utils.data.vocabulary import Vocabulary
+from tabs.cls_tab import get_cls_tab
+from tabs.split_tabs import get_split_tab
+from tabs.shell_tab import get_shell_tab
 def get_args():
             token=args.hf_token,
         )
+    # examples zip
+    if not examples_dir.exists():
+        example_zip_file = trained_model_dir / "examples.zip"
+        with zipfile.ZipFile(example_zip_file.as_posix(), "r") as f_zip:
+            out_root = examples_dir
+            if out_root.exists():
+                shutil.rmtree(out_root.as_posix())
+            out_root.mkdir(parents=True, exist_ok=True)
+            f_zip.extractall(path=out_root)
     # ui
     with gr.Blocks() as blocks:
+        with gr.Tabs():
+            _ = get_cls_tab(
+                examples_dir=args.examples_dir,
+                trained_model_dir=args.trained_model_dir,
+            )
+            _ = get_split_tab(
+                examples_dir=args.examples_dir,
+                trained_model_dir=args.trained_model_dir,
+            )
+            _ = get_shell_tab()
     # http://127.0.0.1:7864/
     blocks.queue().launch(

requirements.txt CHANGED Viewed

@@ -1,13 +1,14 @@
-torch==2.3.0
-torchaudio==2.3.0
-fsspec==2024.5.0
-librosa==0.10.2
-pandas==2.0.3
-openpyxl==3.0.9
-xlrd==1.2.0
-tqdm==4.66.4
-overrides==1.9.0
-pyyaml==6.0.1
-evaluate==0.4.2
 gradio
-python-dotenv==1.0.1

+torch
+torchaudio
+fsspec
+librosa
+pandas
+openpyxl
+xlrd
+tqdm
+overrides
+pyyaml
+evaluate
 gradio
+python-dotenv
+numpy

tabs/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

tabs/cls_tab.py ADDED Viewed

	@@ -0,0 +1,145 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+from functools import lru_cache
+from pathlib import Path
+import platform
+import shutil
+import tempfile
+import zipfile
+from typing import Tuple
+import gradio as gr
+import numpy as np
+import torch
+from project_settings import project_path
+from toolbox.torch.utils.data.vocabulary import Vocabulary
+@lru_cache(maxsize=100)
+def load_model(model_file: Path):
+    with zipfile.ZipFile(model_file, "r") as f_zip:
+        out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
+        if out_root.exists():
+            shutil.rmtree(out_root.as_posix())
+        out_root.mkdir(parents=True, exist_ok=True)
+        f_zip.extractall(path=out_root)
+    tgt_path = out_root / model_file.stem
+    jit_model_file = tgt_path / "trace_model.zip"
+    vocab_path = tgt_path / "vocabulary"
+    vocabulary = Vocabulary.from_files(vocab_path.as_posix())
+    with open(jit_model_file.as_posix(), "rb") as f:
+        model = torch.jit.load(f)
+    model.eval()
+    shutil.rmtree(tgt_path)
+    d = {
+        "model": model,
+        "vocabulary": vocabulary
+    }
+    return d
+def when_click_cls_button(audio_t,
+                          model_name: str,
+                          ground_true: str) -> Tuple[str, float]:
+    sample_rate, signal = audio_t
+    model_file = project_path / f"trained_models/{model_name}.zip"
+    d = load_model(model_file)
+    model = d["model"]
+    vocabulary = d["vocabulary"]
+    inputs = signal / (1 << 15)
+    inputs = torch.tensor(inputs, dtype=torch.float32)
+    inputs = torch.unsqueeze(inputs, dim=0)
+    with torch.no_grad():
+        logits = model.forward(inputs)
+        probs = torch.nn.functional.softmax(logits, dim=-1)
+        label_idx = torch.argmax(probs, dim=-1)
+    label_idx = label_idx.cpu()
+    probs = probs.cpu()
+    label_idx = label_idx.numpy()[0]
+    prob = probs.numpy()[0][label_idx]
+    label_str = vocabulary.get_token_from_index(label_idx, namespace="labels")
+    return label_str, round(prob, 4)
+def get_cls_tab(examples_dir: str, trained_model_dir: str):
+    cls_examples_dir = Path(examples_dir)
+    cls_trained_model_dir = Path(trained_model_dir)
+    # models
+    cls_model_choices = list()
+    for filename in cls_trained_model_dir.glob("*.zip"):
+        model_name = filename.stem
+        if model_name == "examples":
+            continue
+        cls_model_choices.append(model_name)
+    model_choices = list(sorted(cls_model_choices))
+    # examples zip
+    cls_example_zip_file = cls_trained_model_dir / "examples.zip"
+    with zipfile.ZipFile(cls_example_zip_file.as_posix(), "r") as f_zip:
+        out_root = cls_examples_dir
+        if out_root.exists():
+            shutil.rmtree(out_root.as_posix())
+        out_root.mkdir(parents=True, exist_ok=True)
+        f_zip.extractall(path=out_root)
+    # examples
+    cls_examples = list()
+    for filename in cls_examples_dir.glob("**/*/*.wav"):
+        label = filename.parts[-2]
+        cls_examples.append([
+            filename.as_posix(),
+            model_choices[0],
+            label
+        ])
+    with gr.TabItem("cls"):
+        with gr.Row():
+            with gr.Column(scale=3):
+                cls_audio = gr.Audio(label="audio")
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        cls_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
+                    with gr.Column(scale=3):
+                        cls_ground_true = gr.Textbox(label="ground_true")
+                cls_button = gr.Button("run", variant="primary")
+            with gr.Column(scale=3):
+                cls_label = gr.Textbox(label="label")
+                cls_probability = gr.Number(label="probability")
+        gr.Examples(
+            cls_examples,
+            inputs=[cls_audio, cls_model_name, cls_ground_true],
+            outputs=[cls_label, cls_probability],
+            fn=when_click_cls_button,
+            examples_per_page=5,
+        )
+        cls_button.click(
+            when_click_cls_button,
+            inputs=[cls_audio, cls_model_name, cls_ground_true],
+            outputs=[cls_label, cls_probability],
+        )
+    return locals()
+if __name__ == "__main__":
+    pass

tabs/fs_tab.py ADDED Viewed

	@@ -0,0 +1,62 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import gradio as gr
+from project_settings import project_path
+def get_fs_tab():
+    with gr.TabItem("fs"):
+        with gr.Row():
+            with gr.Column(scale=5):
+                fs_filename = gr.Textbox(label="filename", max_lines=10)
+                fs_file = gr.File(label="file")
+                fs_file_dir = gr.Textbox(value="data", label="file_dir")
+                fs_query = gr.Button("query", variant="primary")
+            with gr.Column(scale=5):
+                fs_filelist_dataset_state = gr.State(value=[])
+                fs_filelist_dataset = gr.Dataset(
+                    components=[fs_filename, fs_file],
+                    samples=fs_filelist_dataset_state.value,
+                )
+        def when_click_query_files(file_dir: str = "data"):
+            file_dir = project_path / file_dir
+            dataset_state = list()
+            for filename in file_dir.glob("**/*.*"):
+                if filename.is_dir():
+                    continue
+                if filename.stem.startswith("."):
+                    continue
+                if filename.name.endswith(".py"):
+                    continue
+                dataset_state.append((
+                    filename.relative_to(file_dir).as_posix(),
+                    filename.as_posix(),
+                ))
+            dataset = gr.Dataset(
+                components=[fs_filename, fs_file],
+                samples=dataset_state,
+            )
+            return dataset_state, dataset
+        fs_filelist_dataset.click(
+            fn=lambda x: (
+                x[1], x[1]
+            ),
+            inputs=[fs_filelist_dataset],
+            outputs=[fs_filename, fs_file]
+        )
+        fs_query.click(
+            fn=when_click_query_files,
+            inputs=[fs_file_dir],
+            outputs=[fs_filelist_dataset_state, fs_filelist_dataset]
+        )
+    return locals()
+if __name__ == "__main__":
+    with gr.Blocks() as block:
+        fs_components = get_fs_tab()
+        block.launch()

tabs/shell_tab.py ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import gradio as gr
+from toolbox.os.command import Command
+def shell(cmd: str):
+    return Command.popen(cmd)
+def get_shell_tab():
+    with gr.TabItem("shell"):
+        shell_text = gr.Textbox(label="cmd")
+        shell_button = gr.Button("run")
+        shell_output = gr.Textbox(label="output", max_lines=100)
+        shell_button.click(
+            shell,
+            inputs=[shell_text, ],
+            outputs=[shell_output],
+        )
+    return locals()
+if __name__ == "__main__":
+    pass

tabs/split_tabs.py ADDED Viewed

	@@ -0,0 +1,305 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import json
+from functools import lru_cache, partial
+from pathlib import Path
+import shutil
+import tempfile
+import zipfile
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn as nn
+from typing import List
+from project_settings import project_path
+from toolbox.cv2.misc import erode, dilate
+from toolbox.torch.utils.data.vocabulary import Vocabulary
+@lru_cache(maxsize=100)
+def load_model(model_file: Path):
+    with zipfile.ZipFile(model_file, "r") as f_zip:
+        out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
+        if out_root.exists():
+            shutil.rmtree(out_root.as_posix())
+        out_root.mkdir(parents=True, exist_ok=True)
+        f_zip.extractall(path=out_root)
+    tgt_path = out_root / model_file.stem
+    jit_model_file = tgt_path / "trace_model.zip"
+    vocab_path = tgt_path / "vocabulary"
+    vocabulary = Vocabulary.from_files(vocab_path.as_posix())
+    with open(jit_model_file.as_posix(), "rb") as f:
+        model = torch.jit.load(f)
+    model.eval()
+    shutil.rmtree(tgt_path)
+    d = {
+        "model": model,
+        "vocabulary": vocabulary
+    }
+    return d
+class Tagger(object):
+    def __init__(self,
+                 model_file: str,
+                 win_size: int,
+                 win_step: int,
+                 sample_rate: int = 8000,
+                 ):
+        self.model_file = Path(model_file)
+        self.win_size = win_size
+        self.win_step = win_step
+        self.sample_rate = sample_rate
+        self.model: nn.Module = None
+        self.vocabulary: Vocabulary = None
+        self.load_models()
+    def load_models(self):
+        m = load_model(self.model_file)
+        model = m["model"]
+        vocabulary = m["vocabulary"]
+        self.model = model
+        self.vocabulary = vocabulary
+        return model, vocabulary
+    def tag(self, signal: np.ndarray):
+        signal_length = len(signal)
+        win_size = int(self.win_size * self.sample_rate)
+        win_step = int(self.win_step * self.sample_rate)
+        signal = np.concatenate([
+            np.zeros(shape=(win_size // 2,), dtype=np.int16),
+            signal,
+            np.zeros(shape=(win_size // 2,), dtype=np.int16),
+        ])
+        result = list()
+        for i in range(0, signal_length, win_step):
+            sub_signal = signal[i: i+win_size]
+            if len(sub_signal) < win_size:
+                break
+            inputs = torch.tensor(sub_signal, dtype=torch.float32)
+            inputs = torch.unsqueeze(inputs, dim=0)
+            probs = self.model(inputs)
+            probs = probs.tolist()[0]
+            argidx = np.argmax(probs)
+            label_str = self.vocabulary.get_token_from_index(argidx, namespace="labels")
+            prob = probs[argidx]
+            result.append(label_str)
+        return result
+def correct_labels(labels: List[str], target_label: str = "noise", n_erode: int = 2, n_dilate: int = 2):
+    labels = erode(labels, erode_label=target_label, n=n_erode)
+    labels = dilate(labels, dilate_label=target_label, n=n_dilate)
+    return labels
+def split_signal_by_labels(signal: np.ndarray, labels: List[str], target_label: str):
+    l = len(labels)
+    noise_list = list()
+    begin = None
+    for idx, label in enumerate(labels):
+        if label == target_label:
+            if begin is None:
+                begin = idx
+        elif label != target_label:
+            if begin is not None:
+                noise_list.append((begin, idx))
+                begin = None
+        else:
+            pass
+    else:
+        if begin is not None:
+            noise_list.append((begin, l))
+    result = list()
+    win_step = signal.shape[0] / l
+    for begin, end in noise_list:
+        begin = int(begin * win_step)
+        end = int(end * win_step)
+        sub_signal = signal[begin: end + 1]
+        result.append({
+            "begin": begin,
+            "end": end + 1,
+            "sub_signal": sub_signal,
+        })
+    return result
+@lru_cache(maxsize=100)
+def get_tagger(model_file: str,
+               win_size: int = 2.0,
+               win_step: int = 0.25,
+               ):
+    tagger = Tagger(
+        model_file=model_file,
+        win_size=win_size,
+        win_step=win_step,
+    )
+    return tagger
+def when_model_name_change(model_name: str, split_trained_model_dir: Path):
+    m = load_model(
+        model_file=(split_trained_model_dir / f"{model_name}.zip")
+    )
+    token_to_index: dict = m["vocabulary"].get_token_to_index_vocabulary(namespace="labels")
+    label_choices = list(token_to_index.keys())
+    split_label = gr.Dropdown(choices=label_choices, value=label_choices[0], label="label")
+    return split_label
+def get_split_tab(examples_dir: str, trained_model_dir: str):
+    split_examples_dir = Path(examples_dir)
+    split_trained_model_dir = Path(trained_model_dir)
+    # models
+    split_model_choices = list()
+    for filename in split_trained_model_dir.glob("*.zip"):
+        model_name = filename.stem
+        if model_name == "examples":
+            continue
+        split_model_choices.append(model_name)
+    model_choices = list(sorted(split_model_choices))
+    # model_labels_choices
+    m = load_model(
+        model_file=(split_trained_model_dir / f"{model_choices[0]}.zip")
+    )
+    token_to_index = m["vocabulary"].get_token_to_index_vocabulary(namespace="labels")
+    model_labels_choices = list(token_to_index.keys())
+    # examples
+    split_examples = list()
+    for filename in split_examples_dir.glob("**/*/*.wav"):
+        label = filename.parts[-2]
+        target_label = m["vocabulary"].get_token_from_index(index=0, namespace="labels")
+        split_examples.append([
+            filename.as_posix(),
+            model_choices[0],
+            model_labels_choices[0]
+        ])
+    with gr.TabItem("split"):
+        with gr.Row():
+            with gr.Column(scale=3):
+                split_audio = gr.Audio(label="audio")
+                with gr.Row():
+                    split_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
+                    split_label = gr.Dropdown(choices=model_labels_choices, value=model_labels_choices[0], label="label")
+                    split_win_size = gr.Number(value=2.0, minimum=0, maximum=5, step=0.05, label="win_size")
+                    split_win_step = gr.Number(value=0.25, minimum=0, maximum=5, step=0.05, label="win_step")
+                    split_n_erode = gr.Number(value=2, minimum=0, maximum=5, step=1, label="n_erode")
+                    split_n_dilate = gr.Number(value=2, minimum=0, maximum=5, step=1, label="n_dilate")
+                split_button = gr.Button("run", variant="primary")
+            with gr.Column(scale=3):
+                split_sub_audio = gr.Audio(label="sub_audio")
+                split_sub_audio_message = gr.Textbox(max_lines=10, label="sub_audio_message")
+                split_sub_audio_dataset_state = gr.State(value=[])
+                split_sub_audio_dataset = gr.Dataset(
+                    components=[split_sub_audio, split_sub_audio_message],
+                    samples=split_sub_audio_dataset_state.value,
+                )
+                split_sub_audio_dataset.click(
+                    fn=lambda x: (
+                        x[0], x[1]
+                    ),
+                    inputs=[split_sub_audio_dataset],
+                    outputs=[split_sub_audio, split_sub_audio_message]
+                )
+        def when_click_split_button(audio_t,
+                                    model_name: str,
+                                    label: str,
+                                    win_size: int,
+                                    win_step: int,
+                                    n_erode: int = 2,
+                                    n_dilate: int = 2
+                                    ):
+            max_wave_value = 32768.0
+            sample_rate, signal = audio_t
+            model_file = project_path / f"trained_models/{model_name}.zip"
+            tagger = get_tagger(model_file.as_posix(), win_size, win_step)
+            signal_ = signal / max_wave_value
+            labels = tagger.tag(signal_)
+            labels = correct_labels(labels, target_label=label, n_erode=n_erode, n_dilate=n_dilate)
+            sub_signal_list = split_signal_by_labels(signal, labels, target_label=label)
+            _split_sub_audio_dataset_state = [
+                [
+                    (sample_rate, item["sub_signal"]),
+                    json.dumps({"begin": item["begin"], "end": item["end"]}, ensure_ascii=False, indent=2),
+                ]
+                for item in sub_signal_list
+            ]
+            _split_sub_audio_dataset = gr.Dataset(
+                components=[split_sub_audio, split_sub_audio_message],
+                samples=_split_sub_audio_dataset_state,
+                visible=True
+            )
+            return _split_sub_audio_dataset_state, _split_sub_audio_dataset
+        gr.Examples(
+            split_examples,
+            inputs=[
+                split_audio,
+                split_model_name, split_label,
+                split_win_size, split_win_step,
+                split_n_erode, split_n_dilate,
+            ],
+            outputs=[split_sub_audio_dataset_state, split_sub_audio_dataset],
+            fn=when_click_split_button,
+            examples_per_page=5,
+        )
+        split_model_name.change(
+            partial(when_model_name_change, split_trained_model_dir=split_trained_model_dir),
+            inputs=[split_model_name],
+            outputs=[split_label],
+        )
+        split_button.click(
+            when_click_split_button,
+            inputs=[
+                split_audio,
+                split_model_name, split_label,
+                split_win_size, split_win_step,
+                split_n_erode, split_n_dilate,
+            ],
+            outputs=[split_sub_audio_dataset_state, split_sub_audio_dataset],
+        )
+    return locals()
+if __name__ == "__main__":
+    pass

toolbox/cv2/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == '__main__':
+    pass

toolbox/cv2/misc.py ADDED Viewed

	@@ -0,0 +1,137 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+from typing import List, Union
+def erode(labels: List[Union[str, int]], erode_label: Union[str, int], n: int = 1):
+    """
+    遍历 labels 列表, 将连续的 erode_label 标签侵蚀 n 个.
+    """
+    result = list()
+    in_span = False
+    count = 0
+    for idx, label in enumerate(labels):
+        if label == erode_label:
+            if not in_span:
+                in_span = True
+                count = 0
+            if count < n:
+                if len(result) == 0:
+                    result.append(label)
+                else:
+                    result.append(result[-1])
+                count += 1
+                continue
+            else:
+                result.append(label)
+                continue
+        elif label != erode_label:
+            if in_span:
+                in_span = False
+                for i in range(min(len(result), n)):
+                    result[-i-1] = label
+                result.append(label)
+                continue
+            else:
+                result.append(label)
+                continue
+        result.append(label)
+    return result
+def dilate(labels: List[Union[str, int]], dilate_label: Union[str, int], n: int = 1):
+    """
+    遍历 labels 列表, 将连续的 dilate_label 标签扩张 n 个.
+    """
+    result = list()
+    in_span = False
+    count = float('inf')
+    for idx, label in enumerate(labels):
+        if count < n:
+            result.append(dilate_label)
+            count += 1
+            continue
+        if label == dilate_label:
+            if not in_span:
+                in_span = True
+                for i in range(min(len(result), n)):
+                    result[-i-1] = label
+                result.append(label)
+                continue
+            else:
+                result.append(label)
+                continue
+        else:
+            if in_span:
+                in_span = False
+                result.append(dilate_label)
+                count = 1
+                continue
+            else:
+                result.append(label)
+                continue
+    return result
+def demo1():
+    labels = [
+        'voice', 'mute', 'mute', 'voice', 'voice', 'voice', 'voice', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'voice',
+    ]
+    result = erode(
+        labels=labels,
+        erode_label='voice',
+        n=1,
+    )
+    print(len(labels))
+    print(len(result))
+    print(result)
+    return
+def demo2():
+    labels = [
+        'voice', 'mute', 'mute', 'voice', 'voice', 'voice', 'voice', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'voice',
+    ]
+    result = dilate(
+        labels=labels,
+        dilate_label='voice',
+        n=2,
+    )
+    print(len(labels))
+    print(len(result))
+    print(result)
+    return
+def demo3():
+    import time
+    labels = ['mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'voice', 'bell', 'bell', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'bell', 'bell', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'bell', 'bell', 'bell', 'bell', 'bell', 'bell', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute', 'mute']
+    begin = time.time()
+    labels = erode(labels, erode_label='music', n=1)
+    labels = dilate(labels, dilate_label='music', n=1)
+    labels = dilate(labels, dilate_label='voice', n=2)
+    labels = erode(labels, erode_label='voice', n=2)
+    labels = erode(labels, erode_label='voice', n=1)
+    labels = dilate(labels, dilate_label='voice', n=3)
+    cost = time.time() - begin
+    print(cost)
+    print(labels)
+    return
+if __name__ == '__main__':
+    # demo1()
+    # demo2()
+    demo3()