File size: 4,772 Bytes
9e538da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Save musdb-XL-train dataset from numpy
import os
import glob
import argparse
import csv

import numpy as np
import librosa
import soundfile as sf
import tqdm


def main():
    parser = argparse.ArgumentParser(
        description="Save musdb-XL-train wave files from the downloaded sample-wise gain parameters"
    )
    parser.add_argument(
        "--root",
        type=str,
        default="/path/to/musdb18hq",
        help="Root directory",
    )
    parser.add_argument(
        "--musdb_XL_train_npy_root",
        type=str,
        default="/path/to/musdb-XL-train",
        help="Directory of numpy arrays of musdb-XL-train's sample-wise ratio ",
    )
    parser.add_argument(
        "--output",
        type=str,
        default="/path/to/musdb-XL-train",
        help="Directory to save musdb-XL-train wave data",
    )

    args = parser.parse_args()

    sources = ["vocals", "bass", "drums", "other"]

    path_csv_fixed = f"{args.musdb_XL_train_npy_root}/ozone_train_fixed.csv"
    list_path_csv_random = sorted(
        glob.glob(f"{args.musdb_XL_train_npy_root}/ozone_train_random_*.csv")
    )

    # read ozone_train_fixed list
    fixed_list = []
    os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
    with open(path_csv_fixed, "r", encoding="utf-8") as f:
        rdr = csv.reader(f)
        for k, line in enumerate(rdr):
            if k == 0:  # song_name, max_threshold, max_character
                pass
            else:
                fixed_list.append(line)

    # save wave files of ozone_train_fixed,
    # which is the limiter-applied version of 100 songs from musdb-HQ train set
    for fixed_song in tqdm.tqdm(fixed_list):
        audio_sources = []
        for source in sources:
            audio, sr = librosa.load(
                f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
            )
            audio_sources.append(audio)
        stems = np.stack(audio_sources, axis=0)
        mixture = stems.sum(0)

        ratio = np.load(
            f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_fixed/{fixed_song[0]}.npy"
        )
        output = mixture * ratio

        sf.write(
            f"{args.output}/ozone_train_fixed/{fixed_song[0]}.wav",
            output.T,
            44100,
            subtype="PCM_16",
        )

    # read ozone_train_random list
    random_list = []
    os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
    for path_csv_random in list_path_csv_random:
        with open(path_csv_random, "r", encoding="utf-8") as f:
            rdr = csv.reader(f)
            for k, line in enumerate(rdr):
                if k == 0:
                    # ['song_name',
                    #  'max_threshold',
                    #  'max_character',
                    #  'vocals_name',
                    #  'vocals_start_sec',
                    #  'vocals_gain',
                    #  'vocals_channelswap',
                    #  'bass_name',
                    #  'bass_start_sec',
                    #  'bass_gain',
                    #  'bass_channelswap',
                    #  'drums_name',
                    #  'drums_start_sec',
                    #  'drums_gain',
                    #  'drums_channelswap',
                    #  'other_name',
                    #  'other_start_sec',
                    #  'other_gain',
                    #  'other_channelswap']
                    pass
                else:
                    random_list.append(line)

    # save wave files of ozone_train_random,
    # which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
    for random_song in tqdm.tqdm(random_list):
        audio_sources = []
        for k, source in enumerate(sources):
            audio, sr = librosa.load(
                f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
                sr=44100,
                mono=False,
                offset=float(random_song[4 + k * 4]),  # 'inst_start_sec'
                duration=4.0,
            )
            audio = audio * float(random_song[5 + k * 4])  # 'inst_gain'
            if random_song[6 + k * 4].lower() == "true":  # 'inst_channelswap'
                audio = np.flip(audio, axis=0)

            audio_sources.append(audio)
        stems = np.stack(audio_sources, axis=0)
        mixture = stems.sum(0)

        ratio = np.load(
            f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_random/{random_song[0]}.npy"
        )
        output = mixture * ratio

        sf.write(
            f"{args.output}/ozone_train_random/{random_song[0]}.wav",
            output.T,
            44100,
            subtype="PCM_16",
        )


if __name__ == "__main__":
    main()