jeonchangbin49 commited on
Commit
9e538da
·
1 Parent(s): 83d6d79

tenth commit

Browse files
app.py CHANGED
@@ -207,7 +207,7 @@ with gr.Blocks() as demo:
207
  </div>
208
  <p style="margin-bottom: 10px; font-size: 94%">
209
  A demo for "Music De-limiter via Sample-wise Gain Inversion" to appear in WASPAA 2023.
210
- You can first upload a music (.wav or .mp3) file and then press "De-limit" button to apply the De-limiter. Since we use a CPU instead of a GPU, it may require a few minute.
211
  Then, you can apply a Parallel Mix technique, which is a simple linear mixing technique of "loudness normalized input" and the "de-limiter output".
212
  You can modify the mixing coefficient by yourself.
213
  If the coefficient is 0.3 then the output will be "loudness_normalized_input * 0.3 + de-limiter_output * 0.7"
@@ -221,11 +221,15 @@ with gr.Blocks() as demo:
221
  btn = gr.Button("De-limit")
222
  with gr.Column():
223
  with gr.Box():
224
- loud_norm_input = gr.Audio(label="Loudness Normalized Input (-14LUFS)",
225
- show_download_button=True)
 
 
226
  with gr.Box():
227
- output_audio = gr.Audio(label="De-limiter Output",
228
- show_download_button=True,)
 
 
229
  with gr.Box():
230
  output_audio_parallel = gr.Audio(
231
  label="Parallel Mix of the Input and its De-limiter Output",
@@ -278,6 +282,6 @@ with gr.Blocks() as demo:
278
  ],
279
  outputs=plot,
280
  )
281
-
282
  if __name__ == "__main__":
283
  demo.launch(debug=True)
 
207
  </div>
208
  <p style="margin-bottom: 10px; font-size: 94%">
209
  A demo for "Music De-limiter via Sample-wise Gain Inversion" to appear in WASPAA 2023.
210
+ You can first upload a music (.wav or .mp3, 44.1kHz) file and then press "De-limit" button to apply the De-limiter. Since we use a CPU instead of a GPU, it may require a few minute.
211
  Then, you can apply a Parallel Mix technique, which is a simple linear mixing technique of "loudness normalized input" and the "de-limiter output".
212
  You can modify the mixing coefficient by yourself.
213
  If the coefficient is 0.3 then the output will be "loudness_normalized_input * 0.3 + de-limiter_output * 0.7"
 
221
  btn = gr.Button("De-limit")
222
  with gr.Column():
223
  with gr.Box():
224
+ loud_norm_input = gr.Audio(
225
+ label="Loudness Normalized Input (-14LUFS)",
226
+ show_download_button=True,
227
+ )
228
  with gr.Box():
229
+ output_audio = gr.Audio(
230
+ label="De-limiter Output",
231
+ show_download_button=True,
232
+ )
233
  with gr.Box():
234
  output_audio_parallel = gr.Audio(
235
  label="Parallel Mix of the Input and its De-limiter Output",
 
282
  ],
283
  outputs=plot,
284
  )
285
+
286
  if __name__ == "__main__":
287
  demo.launch(debug=True)
prepro/save_musdb_XL_train_numpy.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import argparse
4
+ import csv
5
+
6
+ import numpy as np
7
+ import librosa
8
+ import soundfile as sf
9
+ import tqdm
10
+
11
+
12
+ def main():
13
+ parser = argparse.ArgumentParser(
14
+ description="Save sample-wise gain parameters for dataset distribution"
15
+ )
16
+ parser.add_argument(
17
+ "--root",
18
+ type=str,
19
+ default="/path/to/musdb18hq",
20
+ help="Root directory",
21
+ )
22
+ parser.add_argument(
23
+ "--musdb_XL_train_root",
24
+ type=str,
25
+ default="/path/to/musdb-XL-train",
26
+ help="Directory of musdb-XL-train dataset",
27
+ )
28
+ parser.add_argument(
29
+ "--output",
30
+ type=str,
31
+ default="/path/to/musdb-XL-train/np_ratio",
32
+ help="Directory to save sample-wise gain ratio",
33
+ )
34
+
35
+ args = parser.parse_args()
36
+
37
+ sources = ["vocals", "bass", "drums", "other"]
38
+
39
+ path_csv_fixed = f"{args.musdb_XL_train_root}/ozone_train_fixed.csv"
40
+ list_path_csv_random = sorted(
41
+ glob.glob(f"{args.musdb_XL_train_root}/ozone_train_random_*.csv")
42
+ )
43
+
44
+ # read ozone_train_fixed list
45
+ fixed_list = []
46
+ os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
47
+ with open(path_csv_fixed, "r", encoding="utf-8") as f:
48
+ rdr = csv.reader(f)
49
+ for k, line in enumerate(rdr):
50
+ if k == 0: # song_name, max_threshold, max_character
51
+ pass
52
+ else:
53
+ fixed_list.append(line)
54
+
55
+ # save numpy files of ozone_train_fixed
56
+ # which is the limiter-applied version of 100 songs from musdb-HQ train set
57
+ # each numpy file contain sample-wise gain ratio parameters
58
+ for fixed_song in tqdm.tqdm(fixed_list):
59
+ audio_sources = []
60
+ for source in sources:
61
+ audio, sr = librosa.load(
62
+ f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
63
+ )
64
+ audio_sources.append(audio)
65
+ stems = np.stack(audio_sources, axis=0)
66
+ mixture = stems.sum(0)
67
+
68
+ ozone_mixture, sr = librosa.load(
69
+ f"{args.musdb_XL_train_root}/ozone_train_fixed/{fixed_song[0]}.wav",
70
+ sr=44100,
71
+ mono=False,
72
+ )
73
+ mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
74
+ ratio = ozone_mixture / mixture
75
+
76
+ np.save(
77
+ f"{args.output}/ozone_train_fixed/{fixed_song[0]}.npy",
78
+ ratio.astype(np.float16), # 16bit is enough...
79
+ )
80
+
81
+ # read ozone_train_random list
82
+ random_list = []
83
+ os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
84
+ for path_csv_random in list_path_csv_random:
85
+ with open(path_csv_random, "r", encoding="utf-8") as f:
86
+ rdr = csv.reader(f)
87
+ for k, line in enumerate(rdr):
88
+ if k == 0:
89
+ # ['song_name',
90
+ # 'max_threshold',
91
+ # 'max_character',
92
+ # 'vocals_name',
93
+ # 'vocals_start_sec',
94
+ # 'vocals_gain',
95
+ # 'vocals_channelswap',
96
+ # 'bass_name',
97
+ # 'bass_start_sec',
98
+ # 'bass_gain',
99
+ # 'bass_channelswap',
100
+ # 'drums_name',
101
+ # 'drums_start_sec',
102
+ # 'drums_gain',
103
+ # 'drums_channelswap',
104
+ # 'other_name',
105
+ # 'other_start_sec',
106
+ # 'other_gain',
107
+ # 'other_channelswap']
108
+ pass
109
+ else:
110
+ random_list.append(line)
111
+
112
+ # save wave files of ozone_train_random,
113
+ # which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
114
+ for random_song in tqdm.tqdm(random_list):
115
+ audio_sources = []
116
+ for k, source in enumerate(sources):
117
+ audio, sr = librosa.load(
118
+ f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
119
+ sr=44100,
120
+ mono=False,
121
+ offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
122
+ duration=4.0,
123
+ )
124
+ audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
125
+ if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
126
+ audio = np.flip(audio, axis=0)
127
+
128
+ audio_sources.append(audio)
129
+ stems = np.stack(audio_sources, axis=0)
130
+ mixture = stems.sum(0)
131
+
132
+ ozone_mixture, sr = librosa.load(
133
+ f"{args.musdb_XL_train_root}/ozone_train_random/{random_song[0]}.wav",
134
+ sr=44100,
135
+ mono=False,
136
+ )
137
+
138
+ mixture[mixture == 0.0] = np.finfo(np.float32).eps # to avoid 'divided by zero'
139
+ ratio = ozone_mixture / mixture
140
+
141
+ np.save(
142
+ f"{args.output}/ozone_train_random/{random_song[0]}.npy",
143
+ ratio.astype(np.float16), # 16bit is enough...
144
+ )
145
+
146
+
147
+ if __name__ == "__main__":
148
+ main()
prepro/save_musdb_XL_train_wave.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Save musdb-XL-train dataset from numpy
2
+ import os
3
+ import glob
4
+ import argparse
5
+ import csv
6
+
7
+ import numpy as np
8
+ import librosa
9
+ import soundfile as sf
10
+ import tqdm
11
+
12
+
13
+ def main():
14
+ parser = argparse.ArgumentParser(
15
+ description="Save musdb-XL-train wave files from the downloaded sample-wise gain parameters"
16
+ )
17
+ parser.add_argument(
18
+ "--root",
19
+ type=str,
20
+ default="/path/to/musdb18hq",
21
+ help="Root directory",
22
+ )
23
+ parser.add_argument(
24
+ "--musdb_XL_train_npy_root",
25
+ type=str,
26
+ default="/path/to/musdb-XL-train",
27
+ help="Directory of numpy arrays of musdb-XL-train's sample-wise ratio ",
28
+ )
29
+ parser.add_argument(
30
+ "--output",
31
+ type=str,
32
+ default="/path/to/musdb-XL-train",
33
+ help="Directory to save musdb-XL-train wave data",
34
+ )
35
+
36
+ args = parser.parse_args()
37
+
38
+ sources = ["vocals", "bass", "drums", "other"]
39
+
40
+ path_csv_fixed = f"{args.musdb_XL_train_npy_root}/ozone_train_fixed.csv"
41
+ list_path_csv_random = sorted(
42
+ glob.glob(f"{args.musdb_XL_train_npy_root}/ozone_train_random_*.csv")
43
+ )
44
+
45
+ # read ozone_train_fixed list
46
+ fixed_list = []
47
+ os.makedirs(f"{args.output}/ozone_train_fixed", exist_ok=True)
48
+ with open(path_csv_fixed, "r", encoding="utf-8") as f:
49
+ rdr = csv.reader(f)
50
+ for k, line in enumerate(rdr):
51
+ if k == 0: # song_name, max_threshold, max_character
52
+ pass
53
+ else:
54
+ fixed_list.append(line)
55
+
56
+ # save wave files of ozone_train_fixed,
57
+ # which is the limiter-applied version of 100 songs from musdb-HQ train set
58
+ for fixed_song in tqdm.tqdm(fixed_list):
59
+ audio_sources = []
60
+ for source in sources:
61
+ audio, sr = librosa.load(
62
+ f"{args.root}/train/{fixed_song[0]}/{source}.wav", sr=44100, mono=False
63
+ )
64
+ audio_sources.append(audio)
65
+ stems = np.stack(audio_sources, axis=0)
66
+ mixture = stems.sum(0)
67
+
68
+ ratio = np.load(
69
+ f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_fixed/{fixed_song[0]}.npy"
70
+ )
71
+ output = mixture * ratio
72
+
73
+ sf.write(
74
+ f"{args.output}/ozone_train_fixed/{fixed_song[0]}.wav",
75
+ output.T,
76
+ 44100,
77
+ subtype="PCM_16",
78
+ )
79
+
80
+ # read ozone_train_random list
81
+ random_list = []
82
+ os.makedirs(f"{args.output}/ozone_train_random", exist_ok=True)
83
+ for path_csv_random in list_path_csv_random:
84
+ with open(path_csv_random, "r", encoding="utf-8") as f:
85
+ rdr = csv.reader(f)
86
+ for k, line in enumerate(rdr):
87
+ if k == 0:
88
+ # ['song_name',
89
+ # 'max_threshold',
90
+ # 'max_character',
91
+ # 'vocals_name',
92
+ # 'vocals_start_sec',
93
+ # 'vocals_gain',
94
+ # 'vocals_channelswap',
95
+ # 'bass_name',
96
+ # 'bass_start_sec',
97
+ # 'bass_gain',
98
+ # 'bass_channelswap',
99
+ # 'drums_name',
100
+ # 'drums_start_sec',
101
+ # 'drums_gain',
102
+ # 'drums_channelswap',
103
+ # 'other_name',
104
+ # 'other_start_sec',
105
+ # 'other_gain',
106
+ # 'other_channelswap']
107
+ pass
108
+ else:
109
+ random_list.append(line)
110
+
111
+ # save wave files of ozone_train_random,
112
+ # which is the limiter-applied version of 4-sec 300,000 segments randomly created from musdb-HQ train subset
113
+ for random_song in tqdm.tqdm(random_list):
114
+ audio_sources = []
115
+ for k, source in enumerate(sources):
116
+ audio, sr = librosa.load(
117
+ f"{args.root}/train/{random_song[3 + k * 4]}/{source}.wav",
118
+ sr=44100,
119
+ mono=False,
120
+ offset=float(random_song[4 + k * 4]), # 'inst_start_sec'
121
+ duration=4.0,
122
+ )
123
+ audio = audio * float(random_song[5 + k * 4]) # 'inst_gain'
124
+ if random_song[6 + k * 4].lower() == "true": # 'inst_channelswap'
125
+ audio = np.flip(audio, axis=0)
126
+
127
+ audio_sources.append(audio)
128
+ stems = np.stack(audio_sources, axis=0)
129
+ mixture = stems.sum(0)
130
+
131
+ ratio = np.load(
132
+ f"{args.musdb_XL_train_npy_root}/np_ratio/ozone_train_random/{random_song[0]}.npy"
133
+ )
134
+ output = mixture * ratio
135
+
136
+ sf.write(
137
+ f"{args.output}/ozone_train_random/{random_song[0]}.wav",
138
+ output.T,
139
+ 44100,
140
+ subtype="PCM_16",
141
+ )
142
+
143
+
144
+ if __name__ == "__main__":
145
+ main()