File size: 3,426 Bytes
13a1636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import json
from pathlib import Path
from typing import Union

import numpy as np

from style_bert_vits2.constants import DEFAULT_STYLE
from style_bert_vits2.logging import logger


def save_neutral_vector(
    wav_dir: Union[Path, str],
    output_dir: Union[Path, str],
    config_path: Union[Path, str],
    config_output_path: Union[Path, str],
):
    wav_dir = Path(wav_dir)
    output_dir = Path(output_dir)
    embs = []
    for file in wav_dir.rglob("*.npy"):
        xvec = np.load(file)
        embs.append(np.expand_dims(xvec, axis=0))

    x = np.concatenate(embs, axis=0)  # (N, 256)
    mean = np.mean(x, axis=0)  # (256,)
    only_mean = np.stack([mean])  # (1, 256)
    np.save(output_dir / "style_vectors.npy", only_mean)
    logger.info(f"Saved mean style vector to {output_dir}")

    with open(config_path, encoding="utf-8") as f:
        json_dict = json.load(f)
    json_dict["data"]["num_styles"] = 1
    json_dict["data"]["style2id"] = {DEFAULT_STYLE: 0}
    with open(config_output_path, "w", encoding="utf-8") as f:
        json.dump(json_dict, f, indent=2, ensure_ascii=False)
    logger.info(f"Saved style config to {config_output_path}")


def save_styles_by_dirs(
    wav_dir: Union[Path, str],
    output_dir: Union[Path, str],
    config_path: Union[Path, str],
    config_output_path: Union[Path, str],
):
    wav_dir = Path(wav_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    config_path = Path(config_path)
    config_output_path = Path(config_output_path)

    subdirs = [d for d in wav_dir.iterdir() if d.is_dir()]
    subdirs.sort()
    if len(subdirs) in (0, 1):
        logger.info(
            f"At least 2 subdirectories are required for generating style vectors with respect to them, found {len(subdirs)}."
        )
        logger.info("Generating only neutral style vector instead.")
        save_neutral_vector(wav_dir, output_dir, config_path, config_output_path)
        return

    # First get mean of all for Neutral
    embs = []
    for file in wav_dir.rglob("*.npy"):
        xvec = np.load(file)
        embs.append(np.expand_dims(xvec, axis=0))
    x = np.concatenate(embs, axis=0)  # (N, 256)
    mean = np.mean(x, axis=0)  # (256,)
    style_vectors = [mean]

    names = [DEFAULT_STYLE]
    for style_dir in subdirs:
        npy_files = list(style_dir.rglob("*.npy"))
        if not npy_files:
            continue
        embs = []
        for file in npy_files:
            xvec = np.load(file)
            embs.append(np.expand_dims(xvec, axis=0))

        x = np.concatenate(embs, axis=0)  # (N, 256)
        mean = np.mean(x, axis=0)  # (256,)
        style_vectors.append(mean)
        names.append(style_dir.name)

    # Stack them to make (num_styles, 256)
    style_vectors_npy = np.stack(style_vectors, axis=0)
    np.save(output_dir / "style_vectors.npy", style_vectors_npy)
    logger.info(f"Saved style vectors to {output_dir / 'style_vectors.npy'}")

    # Save style2id config to json
    style2id = {name: i for i, name in enumerate(names)}
    with open(config_path, encoding="utf-8") as f:
        json_dict = json.load(f)
    json_dict["data"]["num_styles"] = len(names)
    json_dict["data"]["style2id"] = style2id
    with open(config_output_path, "w", encoding="utf-8") as f:
        json.dump(json_dict, f, indent=2, ensure_ascii=False)
    logger.info(f"Saved style config to {config_output_path}")