Spaces:
Running
Running
import os | |
import glob | |
import argparse | |
import csv | |
import numpy as np | |
import librosa | |
import soundfile as sf | |
import tqdm | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Save sample-wise gain parameters for dataset distribution" | |
) | |
parser.add_argument( | |
"--root", | |
type=str, | |
default="/path/to/musdb18hq", | |
help="Root directory", | |
) | |
parser.add_argument( | |
"--musdb_XL_train_root", | |
type=str, | |
default="/path/to/musdb-XL-train", | |
help="Directory of musdb-XL-train dataset", | |
) | |
parser.add_argument( | |
"--output", | |
type=str, | |
default="/path/to/musdb-XL-train/np_ratio", | |
help="Directory to save sample-wise gain ratio", | |
) | |
args = parser.parse_args() | |
sources = ["vocals", "bass", "drums", "other"] | |
path_csv_fixed = f"{args.musdb_XL_train_root}/ozone_train_fixed.csv" | |
list_path_csv_random = sorted( | |
glob.glob(f"{args.musdb_XL_train_root}/ozone_train_random_*.csv") | |
) | |
# read ozone_train_fixed list | |
fixed_list = [] | |
os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True) | |
with open(path_csv_fixed, "r", encoding="utf-8") as f: | |
rdr = csv.reader(f) | |
for k, line in enumerate(rdr): | |
if k == 0: # song_name, max_threshold, max_character | |
pass | |
else: | |
fixed_list.append(line) | |
# save numpy files of ozone_train_fixed | |
# which is the limiter-applied version of 100 songs from musdb-HQ train set | |
# each numpy file contain sample-wise gain ratio parameters | |
for fixed_song in tqdm.tqdm(fixed_list): | |
audio_sources = [] | |
for source in sources: | |
audio, sr = librosa.load( | |
f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False | |
) | |
audio_sources.append(audio) | |
stems = np.stack(audio_sources, axis=0) | |
mixture = stems.sum(0) | |
ozone_mixture, sr = librosa.load( | |
f"{args.musdb_XL_train_root}/ozone_train_fixed/{fixed_song[0]}.wav", | |
sr=44100, | |
mono=False, | |
) | |
mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero' | |
ratio = ozone_mixture / mixture | |
np.save( | |
f"{args.output}/ozone_train_fixed/{fixed_song[0]}.npy", | |
ratio.astype(np.float16), # 16bit is enough... | |
) | |
# read ozone_train_random list | |
random_list = [] | |
os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True) | |
for path_csv_random in list_path_csv_random: | |
with open(path_csv_random, "r", encoding="utf-8") as f: | |
rdr = csv.reader(f) | |
for k, line in enumerate(rdr): | |
if k == 0: | |
# ['song_name', | |
# 'max_threshold', | |
# 'max_character', | |
# 'vocals_name', | |
# 'vocals_start_sec', | |
# 'vocals_gain', | |
# 'vocals_channelswap', | |
# 'bass_name', | |
# 'bass_start_sec', | |
# 'bass_gain', | |
# 'bass_channelswap', | |
# 'drums_name', | |
# 'drums_start_sec', | |
# 'drums_gain', | |
# 'drums_channelswap', | |
# 'other_name', | |
# 'other_start_sec', | |
# 'other_gain', | |
# 'other_channelswap'] | |
pass | |
else: | |
random_list.append(line) | |
# save wave files of ozone_train_random, | |
# which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset | |
for random_song in tqdm.tqdm(random_list): | |
audio_sources = [] | |
for k, source in enumerate(sources): | |
audio, sr = librosa.load( | |
f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav", | |
sr=44100, | |
mono=False, | |
offset=float(random_song[4 + k * 4]), # 'inst_start_sec' | |
duration=4.0, | |
) | |
audio = audio * float(random_song[5 + k * 4]) # 'inst_gain' | |
if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap' | |
audio = np.flip(audio, axis=0) | |
audio_sources.append(audio) | |
stems = np.stack(audio_sources, axis=0) | |
mixture = stems.sum(0) | |
ozone_mixture, sr = librosa.load( | |
f"{args.musdb_XL_train_root}/ozone_train_random/{random_song[0]}.wav", | |
sr=44100, | |
mono=False, | |
) | |
mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero' | |
ratio = ozone_mixture / mixture | |
np.save( | |
f"{args.output}/ozone_train_random/{random_song[0]}.npy", | |
ratio.astype(np.float16), # 16bit is enough... | |
) | |
if __name__ == "__main__": | |
main() | |