Spaces:

jeonchangbin49
/

De-limiter

Running

App Files Files Community

De-limiter / prepro /save_musdb_XL_train_numpy.py

jeonchangbin49

tenth commit

9e538da over 1 year ago

raw

history blame

5.01 kB

	import os
	import glob
	import argparse
	import csv

	import numpy as np
	import librosa
	import soundfile as sf
	import tqdm


	def main():
	parser = argparse.ArgumentParser(
	description="Save sample-wise gain parameters for dataset distribution"
	)
	parser.add_argument(
	"--root",
	type=str,
	default="/path/to/musdb18hq",
	help="Root directory",
	)
	parser.add_argument(
	"--musdb_XL_train_root",
	type=str,
	default="/path/to/musdb-XL-train",
	help="Directory of musdb-XL-train dataset",
	)
	parser.add_argument(
	"--output",
	type=str,
	default="/path/to/musdb-XL-train/np_ratio",
	help="Directory to save sample-wise gain ratio",
	)

	args = parser.parse_args()

	sources = ["vocals", "bass", "drums", "other"]

	path_csv_fixed = f"{args.musdb_XL_train_root}/ozone_train_fixed.csv"
	list_path_csv_random = sorted(
	glob.glob(f"{args.musdb_XL_train_root}/ozone_train_random_*.csv")
	)

	# read ozone_train_fixed list
	fixed_list = []
	os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
	with open(path_csv_fixed, "r", encoding="utf-8") as f:
	rdr = csv.reader(f)
	for k, line in enumerate(rdr):
	if k == 0: # song_name, max_threshold, max_character
	pass
	else:
	fixed_list.append(line)

	# save numpy files of ozone_train_fixed
	# which is the limiter-applied version of 100 songs from musdb-HQ train set
	# each numpy file contain sample-wise gain ratio parameters
	for fixed_song in tqdm.tqdm(fixed_list):
	audio_sources = []
	for source in sources:
	audio, sr = librosa.load(
	f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
	)
	audio_sources.append(audio)
	stems = np.stack(audio_sources, axis=0)
	mixture = stems.sum(0)

	ozone_mixture, sr = librosa.load(
	f"{args.musdb_XL_train_root}/ozone_train_fixed/{fixed_song[0]}.wav",
	sr=44100,
	mono=False,
	)
	mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
	ratio = ozone_mixture / mixture

	np.save(
	f"{args.output}/ozone_train_fixed/{fixed_song[0]}.npy",
	ratio.astype(np.float16), # 16bit is enough...
	)

	# read ozone_train_random list
	random_list = []
	os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
	for path_csv_random in list_path_csv_random:
	with open(path_csv_random, "r", encoding="utf-8") as f:
	rdr = csv.reader(f)
	for k, line in enumerate(rdr):
	if k == 0:
	# ['song_name',
	# 'max_threshold',
	# 'max_character',
	# 'vocals_name',
	# 'vocals_start_sec',
	# 'vocals_gain',
	# 'vocals_channelswap',
	# 'bass_name',
	# 'bass_start_sec',
	# 'bass_gain',
	# 'bass_channelswap',
	# 'drums_name',
	# 'drums_start_sec',
	# 'drums_gain',
	# 'drums_channelswap',
	# 'other_name',
	# 'other_start_sec',
	# 'other_gain',
	# 'other_channelswap']
	pass
	else:
	random_list.append(line)

	# save wave files of ozone_train_random,
	# which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
	for random_song in tqdm.tqdm(random_list):
	audio_sources = []
	for k, source in enumerate(sources):
	audio, sr = librosa.load(
	f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
	sr=44100,
	mono=False,
	offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
	duration=4.0,
	)
	audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
	if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
	audio = np.flip(audio, axis=0)

	audio_sources.append(audio)
	stems = np.stack(audio_sources, axis=0)
	mixture = stems.sum(0)

	ozone_mixture, sr = librosa.load(
	f"{args.musdb_XL_train_root}/ozone_train_random/{random_song[0]}.wav",
	sr=44100,
	mono=False,
	)

	mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
	ratio = ozone_mixture / mixture

	np.save(
	f"{args.output}/ozone_train_random/{random_song[0]}.npy",
	ratio.astype(np.float16), # 16bit is enough...
	)


	if __name__ == "__main__":
	main()