Spaces:
Running
Running
Commit
·
9e538da
1
Parent(s):
83d6d79
tenth commit
Browse files- app.py +10 -6
- prepro/save_musdb_XL_train_numpy.py +148 -0
- prepro/save_musdb_XL_train_wave.py +145 -0
app.py
CHANGED
@@ -207,7 +207,7 @@ with gr.Blocks() as demo:
|
|
207 |
</div>
|
208 |
<p style="margin-bottom: 10px; font-size: 94%">
|
209 |
A demo for "Music De-limiter via Sample-wise Gain Inversion" to appear in WASPAA 2023.
|
210 |
-
You can first upload a music (.wav or .mp3) file and then press "De-limit" button to apply the De-limiter. Since we use a CPU instead of a GPU, it may require a few minute.
|
211 |
Then, you can apply a Parallel Mix technique, which is a simple linear mixing technique of "loudness normalized input" and the "de-limiter output".
|
212 |
You can modify the mixing coefficient by yourself.
|
213 |
If the coefficient is 0.3 then the output will be "loudness_normalized_input * 0.3 + de-limiter_output * 0.7"
|
@@ -221,11 +221,15 @@ with gr.Blocks() as demo:
|
|
221 |
btn = gr.Button("De-limit")
|
222 |
with gr.Column():
|
223 |
with gr.Box():
|
224 |
-
loud_norm_input = gr.Audio(
|
225 |
-
|
|
|
|
|
226 |
with gr.Box():
|
227 |
-
output_audio = gr.Audio(
|
228 |
-
|
|
|
|
|
229 |
with gr.Box():
|
230 |
output_audio_parallel = gr.Audio(
|
231 |
label="Parallel Mix of the Input and its De-limiter Output",
|
@@ -278,6 +282,6 @@ with gr.Blocks() as demo:
|
|
278 |
],
|
279 |
outputs=plot,
|
280 |
)
|
281 |
-
|
282 |
if __name__ == "__main__":
|
283 |
demo.launch(debug=True)
|
|
|
207 |
</div>
|
208 |
<p style="margin-bottom: 10px; font-size: 94%">
|
209 |
A demo for "Music De-limiter via Sample-wise Gain Inversion" to appear in WASPAA 2023.
|
210 |
+
You can first upload a music (.wav or .mp3, 44.1kHz) file and then press "De-limit" button to apply the De-limiter. Since we use a CPU instead of a GPU, it may require a few minute.
|
211 |
Then, you can apply a Parallel Mix technique, which is a simple linear mixing technique of "loudness normalized input" and the "de-limiter output".
|
212 |
You can modify the mixing coefficient by yourself.
|
213 |
If the coefficient is 0.3 then the output will be "loudness_normalized_input * 0.3 + de-limiter_output * 0.7"
|
|
|
221 |
btn = gr.Button("De-limit")
|
222 |
with gr.Column():
|
223 |
with gr.Box():
|
224 |
+
loud_norm_input = gr.Audio(
|
225 |
+
label="Loudness Normalized Input (-14LUFS)",
|
226 |
+
show_download_button=True,
|
227 |
+
)
|
228 |
with gr.Box():
|
229 |
+
output_audio = gr.Audio(
|
230 |
+
label="De-limiter Output",
|
231 |
+
show_download_button=True,
|
232 |
+
)
|
233 |
with gr.Box():
|
234 |
output_audio_parallel = gr.Audio(
|
235 |
label="Parallel Mix of the Input and its De-limiter Output",
|
|
|
282 |
],
|
283 |
outputs=plot,
|
284 |
)
|
285 |
+
|
286 |
if __name__ == "__main__":
|
287 |
demo.launch(debug=True)
|
prepro/save_musdb_XL_train_numpy.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import glob
|
3 |
+
import argparse
|
4 |
+
import csv
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import librosa
|
8 |
+
import soundfile as sf
|
9 |
+
import tqdm
|
10 |
+
|
11 |
+
|
12 |
+
def main():
|
13 |
+
parser = argparse.ArgumentParser(
|
14 |
+
description="Save sample-wise gain parameters for dataset distribution"
|
15 |
+
)
|
16 |
+
parser.add_argument(
|
17 |
+
"--root",
|
18 |
+
type=str,
|
19 |
+
default="/path/to/musdb18hq",
|
20 |
+
help="Root directory",
|
21 |
+
)
|
22 |
+
parser.add_argument(
|
23 |
+
"--musdb_XL_train_root",
|
24 |
+
type=str,
|
25 |
+
default="/path/to/musdb-XL-train",
|
26 |
+
help="Directory of musdb-XL-train dataset",
|
27 |
+
)
|
28 |
+
parser.add_argument(
|
29 |
+
"--output",
|
30 |
+
type=str,
|
31 |
+
default="/path/to/musdb-XL-train/np_ratio",
|
32 |
+
help="Directory to save sample-wise gain ratio",
|
33 |
+
)
|
34 |
+
|
35 |
+
args = parser.parse_args()
|
36 |
+
|
37 |
+
sources = ["vocals", "bass", "drums", "other"]
|
38 |
+
|
39 |
+
path_csv_fixed = f"{args.musdb_XL_train_root}/ozone_train_fixed.csv"
|
40 |
+
list_path_csv_random = sorted(
|
41 |
+
glob.glob(f"{args.musdb_XL_train_root}/ozone_train_random_*.csv")
|
42 |
+
)
|
43 |
+
|
44 |
+
# read ozone_train_fixed list
|
45 |
+
fixed_list = []
|
46 |
+
os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
|
47 |
+
with open(path_csv_fixed, "r", encoding="utf-8") as f:
|
48 |
+
rdr = csv.reader(f)
|
49 |
+
for k, line in enumerate(rdr):
|
50 |
+
if k == 0: # song_name, max_threshold, max_character
|
51 |
+
pass
|
52 |
+
else:
|
53 |
+
fixed_list.append(line)
|
54 |
+
|
55 |
+
# save numpy files of ozone_train_fixed
|
56 |
+
# which is the limiter-applied version of 100 songs from musdb-HQ train set
|
57 |
+
# each numpy file contain sample-wise gain ratio parameters
|
58 |
+
for fixed_song in tqdm.tqdm(fixed_list):
|
59 |
+
audio_sources = []
|
60 |
+
for source in sources:
|
61 |
+
audio, sr = librosa.load(
|
62 |
+
f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
|
63 |
+
)
|
64 |
+
audio_sources.append(audio)
|
65 |
+
stems = np.stack(audio_sources, axis=0)
|
66 |
+
mixture = stems.sum(0)
|
67 |
+
|
68 |
+
ozone_mixture, sr = librosa.load(
|
69 |
+
f"{args.musdb_XL_train_root}/ozone_train_fixed/{fixed_song[0]}.wav",
|
70 |
+
sr=44100,
|
71 |
+
mono=False,
|
72 |
+
)
|
73 |
+
mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
|
74 |
+
ratio = ozone_mixture / mixture
|
75 |
+
|
76 |
+
np.save(
|
77 |
+
f"{args.output}/ozone_train_fixed/{fixed_song[0]}.npy",
|
78 |
+
ratio.astype(np.float16), # 16bit is enough...
|
79 |
+
)
|
80 |
+
|
81 |
+
# read ozone_train_random list
|
82 |
+
random_list = []
|
83 |
+
os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
|
84 |
+
for path_csv_random in list_path_csv_random:
|
85 |
+
with open(path_csv_random, "r", encoding="utf-8") as f:
|
86 |
+
rdr = csv.reader(f)
|
87 |
+
for k, line in enumerate(rdr):
|
88 |
+
if k == 0:
|
89 |
+
# ['song_name',
|
90 |
+
# 'max_threshold',
|
91 |
+
# 'max_character',
|
92 |
+
# 'vocals_name',
|
93 |
+
# 'vocals_start_sec',
|
94 |
+
# 'vocals_gain',
|
95 |
+
# 'vocals_channelswap',
|
96 |
+
# 'bass_name',
|
97 |
+
# 'bass_start_sec',
|
98 |
+
# 'bass_gain',
|
99 |
+
# 'bass_channelswap',
|
100 |
+
# 'drums_name',
|
101 |
+
# 'drums_start_sec',
|
102 |
+
# 'drums_gain',
|
103 |
+
# 'drums_channelswap',
|
104 |
+
# 'other_name',
|
105 |
+
# 'other_start_sec',
|
106 |
+
# 'other_gain',
|
107 |
+
# 'other_channelswap']
|
108 |
+
pass
|
109 |
+
else:
|
110 |
+
random_list.append(line)
|
111 |
+
|
112 |
+
# save wave files of ozone_train_random,
|
113 |
+
# which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
|
114 |
+
for random_song in tqdm.tqdm(random_list):
|
115 |
+
audio_sources = []
|
116 |
+
for k, source in enumerate(sources):
|
117 |
+
audio, sr = librosa.load(
|
118 |
+
f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
|
119 |
+
sr=44100,
|
120 |
+
mono=False,
|
121 |
+
offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
|
122 |
+
duration=4.0,
|
123 |
+
)
|
124 |
+
audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
|
125 |
+
if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
|
126 |
+
audio = np.flip(audio, axis=0)
|
127 |
+
|
128 |
+
audio_sources.append(audio)
|
129 |
+
stems = np.stack(audio_sources, axis=0)
|
130 |
+
mixture = stems.sum(0)
|
131 |
+
|
132 |
+
ozone_mixture, sr = librosa.load(
|
133 |
+
f"{args.musdb_XL_train_root}/ozone_train_random/{random_song[0]}.wav",
|
134 |
+
sr=44100,
|
135 |
+
mono=False,
|
136 |
+
)
|
137 |
+
|
138 |
+
mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
|
139 |
+
ratio = ozone_mixture / mixture
|
140 |
+
|
141 |
+
np.save(
|
142 |
+
f"{args.output}/ozone_train_random/{random_song[0]}.npy",
|
143 |
+
ratio.astype(np.float16), # 16bit is enough...
|
144 |
+
)
|
145 |
+
|
146 |
+
|
147 |
+
if __name__ == "__main__":
|
148 |
+
main()
|
prepro/save_musdb_XL_train_wave.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Save musdb-XL-train dataset from numpy
|
2 |
+
import os
|
3 |
+
import glob
|
4 |
+
import argparse
|
5 |
+
import csv
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import librosa
|
9 |
+
import soundfile as sf
|
10 |
+
import tqdm
|
11 |
+
|
12 |
+
|
13 |
+
def main():
|
14 |
+
parser = argparse.ArgumentParser(
|
15 |
+
description="Save musdb-XL-train wave files from the downloaded sample-wise gain parameters"
|
16 |
+
)
|
17 |
+
parser.add_argument(
|
18 |
+
"--root",
|
19 |
+
type=str,
|
20 |
+
default="/path/to/musdb18hq",
|
21 |
+
help="Root directory",
|
22 |
+
)
|
23 |
+
parser.add_argument(
|
24 |
+
"--musdb_XL_train_npy_root",
|
25 |
+
type=str,
|
26 |
+
default="/path/to/musdb-XL-train",
|
27 |
+
help="Directory of numpy arrays of musdb-XL-train's sample-wise ratio ",
|
28 |
+
)
|
29 |
+
parser.add_argument(
|
30 |
+
"--output",
|
31 |
+
type=str,
|
32 |
+
default="/path/to/musdb-XL-train",
|
33 |
+
help="Directory to save musdb-XL-train wave data",
|
34 |
+
)
|
35 |
+
|
36 |
+
args = parser.parse_args()
|
37 |
+
|
38 |
+
sources = ["vocals", "bass", "drums", "other"]
|
39 |
+
|
40 |
+
path_csv_fixed = f"{args.musdb_XL_train_npy_root}/ozone_train_fixed.csv"
|
41 |
+
list_path_csv_random = sorted(
|
42 |
+
glob.glob(f"{args.musdb_XL_train_npy_root}/ozone_train_random_*.csv")
|
43 |
+
)
|
44 |
+
|
45 |
+
# read ozone_train_fixed list
|
46 |
+
fixed_list = []
|
47 |
+
os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
|
48 |
+
with open(path_csv_fixed, "r", encoding="utf-8") as f:
|
49 |
+
rdr = csv.reader(f)
|
50 |
+
for k, line in enumerate(rdr):
|
51 |
+
if k == 0: # song_name, max_threshold, max_character
|
52 |
+
pass
|
53 |
+
else:
|
54 |
+
fixed_list.append(line)
|
55 |
+
|
56 |
+
# save wave files of ozone_train_fixed,
|
57 |
+
# which is the limiter-applied version of 100 songs from musdb-HQ train set
|
58 |
+
for fixed_song in tqdm.tqdm(fixed_list):
|
59 |
+
audio_sources = []
|
60 |
+
for source in sources:
|
61 |
+
audio, sr = librosa.load(
|
62 |
+
f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
|
63 |
+
)
|
64 |
+
audio_sources.append(audio)
|
65 |
+
stems = np.stack(audio_sources, axis=0)
|
66 |
+
mixture = stems.sum(0)
|
67 |
+
|
68 |
+
ratio = np.load(
|
69 |
+
f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_fixed/{fixed_song[0]}.npy"
|
70 |
+
)
|
71 |
+
output = mixture * ratio
|
72 |
+
|
73 |
+
sf.write(
|
74 |
+
f"{args.output}/ozone_train_fixed/{fixed_song[0]}.wav",
|
75 |
+
output.T,
|
76 |
+
44100,
|
77 |
+
subtype="PCM_16",
|
78 |
+
)
|
79 |
+
|
80 |
+
# read ozone_train_random list
|
81 |
+
random_list = []
|
82 |
+
os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
|
83 |
+
for path_csv_random in list_path_csv_random:
|
84 |
+
with open(path_csv_random, "r", encoding="utf-8") as f:
|
85 |
+
rdr = csv.reader(f)
|
86 |
+
for k, line in enumerate(rdr):
|
87 |
+
if k == 0:
|
88 |
+
# ['song_name',
|
89 |
+
# 'max_threshold',
|
90 |
+
# 'max_character',
|
91 |
+
# 'vocals_name',
|
92 |
+
# 'vocals_start_sec',
|
93 |
+
# 'vocals_gain',
|
94 |
+
# 'vocals_channelswap',
|
95 |
+
# 'bass_name',
|
96 |
+
# 'bass_start_sec',
|
97 |
+
# 'bass_gain',
|
98 |
+
# 'bass_channelswap',
|
99 |
+
# 'drums_name',
|
100 |
+
# 'drums_start_sec',
|
101 |
+
# 'drums_gain',
|
102 |
+
# 'drums_channelswap',
|
103 |
+
# 'other_name',
|
104 |
+
# 'other_start_sec',
|
105 |
+
# 'other_gain',
|
106 |
+
# 'other_channelswap']
|
107 |
+
pass
|
108 |
+
else:
|
109 |
+
random_list.append(line)
|
110 |
+
|
111 |
+
# save wave files of ozone_train_random,
|
112 |
+
# which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
|
113 |
+
for random_song in tqdm.tqdm(random_list):
|
114 |
+
audio_sources = []
|
115 |
+
for k, source in enumerate(sources):
|
116 |
+
audio, sr = librosa.load(
|
117 |
+
f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
|
118 |
+
sr=44100,
|
119 |
+
mono=False,
|
120 |
+
offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
|
121 |
+
duration=4.0,
|
122 |
+
)
|
123 |
+
audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
|
124 |
+
if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
|
125 |
+
audio = np.flip(audio, axis=0)
|
126 |
+
|
127 |
+
audio_sources.append(audio)
|
128 |
+
stems = np.stack(audio_sources, axis=0)
|
129 |
+
mixture = stems.sum(0)
|
130 |
+
|
131 |
+
ratio = np.load(
|
132 |
+
f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_random/{random_song[0]}.npy"
|
133 |
+
)
|
134 |
+
output = mixture * ratio
|
135 |
+
|
136 |
+
sf.write(
|
137 |
+
f"{args.output}/ozone_train_random/{random_song[0]}.wav",
|
138 |
+
output.T,
|
139 |
+
44100,
|
140 |
+
subtype="PCM_16",
|
141 |
+
)
|
142 |
+
|
143 |
+
|
144 |
+
if __name__ == "__main__":
|
145 |
+
main()
|