Spaces:
Running
Running
File size: 4,772 Bytes
9e538da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# Save musdb-XL-train dataset from numpy
import os
import glob
import argparse
import csv
import numpy as np
import librosa
import soundfile as sf
import tqdm
def main():
parser = argparse.ArgumentParser(
description="Save musdb-XL-train wave files from the downloaded sample-wise gain parameters"
)
parser.add_argument(
"--root",
type=str,
default="/path/to/musdb18hq",
help="Root directory",
)
parser.add_argument(
"--musdb_XL_train_npy_root",
type=str,
default="/path/to/musdb-XL-train",
help="Directory of numpy arrays of musdb-XL-train's sample-wise ratio ",
)
parser.add_argument(
"--output",
type=str,
default="/path/to/musdb-XL-train",
help="Directory to save musdb-XL-train wave data",
)
args = parser.parse_args()
sources = ["vocals", "bass", "drums", "other"]
path_csv_fixed = f"{args.musdb_XL_train_npy_root}/ozone_train_fixed.csv"
list_path_csv_random = sorted(
glob.glob(f"{args.musdb_XL_train_npy_root}/ozone_train_random_*.csv")
)
# read ozone_train_fixed list
fixed_list = []
os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
with open(path_csv_fixed, "r", encoding="utf-8") as f:
rdr = csv.reader(f)
for k, line in enumerate(rdr):
if k == 0: # song_name, max_threshold, max_character
pass
else:
fixed_list.append(line)
# save wave files of ozone_train_fixed,
# which is the limiter-applied version of 100 songs from musdb-HQ train set
for fixed_song in tqdm.tqdm(fixed_list):
audio_sources = []
for source in sources:
audio, sr = librosa.load(
f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
)
audio_sources.append(audio)
stems = np.stack(audio_sources, axis=0)
mixture = stems.sum(0)
ratio = np.load(
f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_fixed/{fixed_song[0]}.npy"
)
output = mixture * ratio
sf.write(
f"{args.output}/ozone_train_fixed/{fixed_song[0]}.wav",
output.T,
44100,
subtype="PCM_16",
)
# read ozone_train_random list
random_list = []
os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
for path_csv_random in list_path_csv_random:
with open(path_csv_random, "r", encoding="utf-8") as f:
rdr = csv.reader(f)
for k, line in enumerate(rdr):
if k == 0:
# ['song_name',
# 'max_threshold',
# 'max_character',
# 'vocals_name',
# 'vocals_start_sec',
# 'vocals_gain',
# 'vocals_channelswap',
# 'bass_name',
# 'bass_start_sec',
# 'bass_gain',
# 'bass_channelswap',
# 'drums_name',
# 'drums_start_sec',
# 'drums_gain',
# 'drums_channelswap',
# 'other_name',
# 'other_start_sec',
# 'other_gain',
# 'other_channelswap']
pass
else:
random_list.append(line)
# save wave files of ozone_train_random,
# which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
for random_song in tqdm.tqdm(random_list):
audio_sources = []
for k, source in enumerate(sources):
audio, sr = librosa.load(
f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
sr=44100,
mono=False,
offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
duration=4.0,
)
audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
audio = np.flip(audio, axis=0)
audio_sources.append(audio)
stems = np.stack(audio_sources, axis=0)
mixture = stems.sum(0)
ratio = np.load(
f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_random/{random_song[0]}.npy"
)
output = mixture * ratio
sf.write(
f"{args.output}/ozone_train_random/{random_song[0]}.wav",
output.T,
44100,
subtype="PCM_16",
)
if __name__ == "__main__":
main()
|