Spaces:
Running
Running
File size: 3,915 Bytes
a00b67a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# Save loudness normalized (-14 LUFS) musdb-XL audio files for evaluations of de-limiter
import os
import argparse
import tqdm
import musdb
import soundfile as sf
import librosa
import pyloudnorm as pyln
from utils import db2linear, str2bool
tqdm.monitor_interval = 0
def main():
parser = argparse.ArgumentParser(description="model test.py")
parser.add_argument(
"--target",
type=str,
default="mixture",
help="target source. all, vocals, drums, bass, other",
)
parser.add_argument("--data_root", type=str, default="/path/to/musdb_XL")
parser.add_argument(
"--data_root_hq",
type=str,
default="/path/to/musdb18hq",
help="this is used when saving loud-norm stem of musdb-XL")
parser.add_argument(
"--output_directory",
type=str,
default="/path/to/musdb_XL_loudnorm",
)
parser.add_argument(
"--loudnorm_input_lufs",
type=float,
default=-14.0,
help="If you want to use loudnorm, input target lufs",
)
parser.add_argument(
"--save_16k_mono",
type=str2bool,
default=True,
help="Save 16k mono wav files for FAD evaluation.",
)
args, _ = parser.parse_known_args()
os.makedirs(args.output_directory, exist_ok=True)
meter = pyln.Meter(44100)
test_tracks = musdb.DB(root=args.data_root, subsets="test", is_wav=True)
if args.target != "mixture":
hq_tracks = musdb.DB(root=args.data_root_hq, subsets='test', is_wav=True)
for idx, track in tqdm.tqdm(enumerate(test_tracks)):
track_name = track.name
if (
os.path.basename(args.data_root) == "musdb18hq"
and track_name == "PR - Oh No"
): # We have to consider this exception because 'PR - Oh No' mixture.wav is left-panned. We will use the linear mixture instead.
# Please refer https://github.com/jeonchangbin49/musdb-XL/blob/main/make_L_and_XL.py
track_audio = (
track.targets["vocals"].audio
+ track.targets["drums"].audio
+ track.targets["bass"].audio
+ track.targets["other"].audio
)
else:
track_audio = track.audio
print(track_name)
augmented_gain = None
track_lufs = meter.integrated_loudness(track_audio)
augmented_gain = args.loudnorm_input_lufs - track_lufs
if os.path.basename(args.data_root) == "musdb18hq":
if args.target != "mixture":
track_audio = track.targets[args.target].audio
track_audio = track_audio * db2linear(augmented_gain, eps=0.0)
elif os.path.basename(args.data_root) == "musdb_XL":
track_audio = track_audio * db2linear(augmented_gain, eps=0.0)
if args.target != "mixture":
hq_track = hq_tracks[idx]
hq_audio = hq_track.audio
hq_stem = hq_track.targets[args.target].audio
samplewise_gain = track_audio / (hq_audio + 1e-8)
track_audio = samplewise_gain * hq_stem
os.makedirs(f"{args.output_directory}/{track_name}", exist_ok=True)
sf.write(
f"{args.output_directory}/{track_name}/{args.target}.wav", track_audio, 44100
)
if args.save_16k_mono:
track_audio_16k_mono = librosa.to_mono(track_audio.T)
track_audio_16k_mono = librosa.resample(
track_audio_16k_mono,
orig_sr=44100,
target_sr=16000,
)
os.makedirs(f"{args.output_directory}_16k_mono/{track_name}", exist_ok=True)
sf.write(
f"{args.output_directory}_16k_mono/{track_name}/{args.target}.wav",
track_audio_16k_mono,
samplerate=16000,
)
if __name__ == "__main__":
main()
|