Spaces:

jeonchangbin49
/

De-limiter

Running

App Files Files Community

De-limiter / prepro /save_musdb_XL_train_numpy.py

jeonchangbin49

tenth commit

9e538da over 1 year ago

raw

history blame contribute delete

5.01 kB

	import os
	import glob
	import argparse
	import csv

	import numpy as np
	import librosa
	import soundfile as sf
	import tqdm


	def main():
	parser = argparse.ArgumentParser(
	description="Save sample-wise gain parameters for dataset distribution"
	)
	parser.add_argument(
	"--root",
	type=str,
	default="/path/to/musdb18hq",
	help="Root directory",
	)
	parser.add_argument(
	"--musdb_XL_train_root",
	type=str,
	default="/path/to/musdb-XL-train",
	help="Directory of musdb-XL-train dataset",
	)
	parser.add_argument(
	"--output",
	type=str,
	default="/path/to/musdb-XL-train/np_ratio",
	help="Directory to save sample-wise gain ratio",
	)

	args = parser.parse_args()

	sources = ["vocals", "bass", "drums", "other"]

	path_csv_fixed = f"{args.musdb_XL_train_root}/ozone_train_fixed.csv"
	list_path_csv_random = sorted(
	glob.glob(f"{args.musdb_XL_train_root}/ozone_train_random_*.csv")
	)

	# read ozone_train_fixed list
	fixed_list = []
	os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
	with open(path_csv_fixed, "r", encoding="utf-8") as f:
	rdr = csv.reader(f)
	for k, line in enumerate(rdr):
	if k == 0: # song_name, max_threshold, max_character
	pass
	else:
	fixed_list.append(line)

	# save numpy files of ozone_train_fixed
	# which is the limiter-applied version of 100 songs from musdb-HQ train set
	# each numpy file contain sample-wise gain ratio parameters
	for fixed_song in tqdm.tqdm(fixed_list):
	audio_sources = []
	for source in sources:
	audio, sr = librosa.load(
	f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
	)
	audio_sources.append(audio)
	stems = np.stack(audio_sources, axis=0)
	mixture = stems.sum(0)

	ozone_mixture, sr = librosa.load(
	f"{args.musdb_XL_train_root}/ozone_train_fixed/{fixed_song[0]}.wav",
	sr=44100,
	mono=False,
	)
	mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
	ratio = ozone_mixture / mixture

	np.save(
	f"{args.output}/ozone_train_fixed/{fixed_song[0]}.npy",
	ratio.astype(np.float16), # 16bit is enough...
	)

	# read ozone_train_random list
	random_list = []
	os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
	for path_csv_random in list_path_csv_random:
	with open(path_csv_random, "r", encoding="utf-8") as f:
	rdr = csv.reader(f)
	for k, line in enumerate(rdr):
	if k == 0:
	# ['song_name',
	# 'max_threshold',
	# 'max_character',
	# 'vocals_name',
	# 'vocals_start_sec',
	# 'vocals_gain',
	# 'vocals_channelswap',
	# 'bass_name',
	# 'bass_start_sec',
	# 'bass_gain',
	# 'bass_channelswap',
	# 'drums_name',
	# 'drums_start_sec',
	# 'drums_gain',
	# 'drums_channelswap',
	# 'other_name',
	# 'other_start_sec',
	# 'other_gain',
	# 'other_channelswap']
	pass
	else:
	random_list.append(line)

	# save wave files of ozone_train_random,
	# which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
	for random_song in tqdm.tqdm(random_list):
	audio_sources = []
	for k, source in enumerate(sources):
	audio, sr = librosa.load(
	f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
	sr=44100,
	mono=False,
	offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
	duration=4.0,
	)
	audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
	if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
	audio = np.flip(audio, axis=0)

	audio_sources.append(audio)
	stems = np.stack(audio_sources, axis=0)
	mixture = stems.sum(0)

	ozone_mixture, sr = librosa.load(
	f"{args.musdb_XL_train_root}/ozone_train_random/{random_song[0]}.wav",
	sr=44100,
	mono=False,
	)

	mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
	ratio = ozone_mixture / mixture

	np.save(
	f"{args.output}/ozone_train_random/{random_song[0]}.npy",
	ratio.astype(np.float16), # 16bit is enough...
	)


	if __name__ == "__main__":
	main()