r3gm commited on
Commit
f5db5c5
·
verified ·
1 Parent(s): a83ce13

Upload 10 files

Browse files
Files changed (10) hide show
  1. README.md +12 -12
  2. app.py +4 -7
  3. pre-requirements.txt +2 -1
  4. requirements.txt +7 -5
  5. src/download_models.py +12 -5
  6. src/infer_pack/models.py +4 -4
  7. src/main.py +112 -27
  8. src/mdx.py +7 -3
  9. src/rvc.py +15 -4
  10. src/webui.py +82 -27
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: AICoverGen
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.35.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: AICoverGen
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,12 +1,9 @@
1
  import os
 
2
 
3
- cmd = """
4
 
5
- pip install onnxruntime-gpu[cuda,cudnn]==1.22.0
6
- find / -name 'libcudnn.so*' 2>/dev/null
7
-
8
- python src/download_models.py
9
- python src/webui.py
10
- """
11
 
12
  os.system(cmd)
 
1
  import os
2
+ import sys
3
 
4
+ os.system("python src/download_models.py")
5
 
6
+ args = " ".join(sys.argv[1:])
7
+ cmd = f"python src/webui.py {args}"
 
 
 
 
8
 
9
  os.system(cmd)
pre-requirements.txt CHANGED
@@ -1 +1,2 @@
1
- pip<=23.1.2
 
 
1
+ pip==23.0.1
2
+ Setuptools<=80.6.0
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- --extra-index-url=https://download.pytorch.org/whl/cu121
2
  torch==2.5.1
3
  torchvision==0.20.1
4
  torchaudio==2.5.1
@@ -6,11 +5,11 @@ deemix
6
  fairseq==0.12.2
7
  faiss-cpu==1.7.3
8
  ffmpeg-python>=0.2.0
9
- # gradio==3.39.0
10
  lib==4.0.0
11
  librosa==0.9.1
12
  numpy==1.23.5
13
- # onnxruntime #onnxruntime_gpu
14
  praat-parselmouth>=0.4.2
15
  pedalboard==0.7.7
16
  pydub==0.25.1
@@ -20,5 +19,8 @@ scipy==1.11.1
20
  soundfile==0.12.1
21
  torchcrepe==0.0.20
22
  tqdm==4.65.0
23
- yt_dlp==2023.7.6
24
- sox==1.4.1
 
 
 
 
 
1
  torch==2.5.1
2
  torchvision==0.20.1
3
  torchaudio==2.5.1
 
5
  fairseq==0.12.2
6
  faiss-cpu==1.7.3
7
  ffmpeg-python>=0.2.0
8
+ gradio==5.44.0
9
  lib==4.0.0
10
  librosa==0.9.1
11
  numpy==1.23.5
12
+ onnxruntime-gpu==1.22.0 # onnxruntime #onnxruntime_gpu
13
  praat-parselmouth>=0.4.2
14
  pedalboard==0.7.7
15
  pydub==0.25.1
 
19
  soundfile==0.12.1
20
  torchcrepe==0.0.20
21
  tqdm==4.65.0
22
+ yt_dlp
23
+ sox==1.4.1
24
+ noisereduce
25
+ spaces
26
+ matplotlib-inline
src/download_models.py CHANGED
@@ -8,11 +8,20 @@ BASE_DIR = Path(__file__).resolve().parent.parent
8
  mdxnet_models_dir = BASE_DIR / 'mdxnet_models'
9
  rvc_models_dir = BASE_DIR / 'rvc_models'
10
 
 
 
 
11
 
12
  def dl_model(link, model_name, dir_name):
13
- with requests.get(f'{link}{model_name}') as r:
 
 
 
 
 
 
14
  r.raise_for_status()
15
- with open(dir_name / model_name, 'wb') as f:
16
  for chunk in r.iter_content(chunk_size=8192):
17
  f.write(chunk)
18
 
@@ -20,12 +29,10 @@ def dl_model(link, model_name, dir_name):
20
  if __name__ == '__main__':
21
  mdx_model_names = ['UVR-MDX-NET-Inst_HQ_4.onnx', 'UVR-MDX-NET-Voc_FT.onnx', 'UVR_MDXNET_KARA_2.onnx', 'Reverb_HQ_By_FoxJoy.onnx']
22
  for model in mdx_model_names:
23
- print(f'Downloading {model}...')
24
  dl_model(MDX_DOWNLOAD_LINK, model, mdxnet_models_dir)
25
 
26
  rvc_model_names = ['hubert_base.pt', 'rmvpe.pt']
27
  for model in rvc_model_names:
28
- print(f'Downloading {model}...')
29
  dl_model(RVC_DOWNLOAD_LINK, model, rvc_models_dir)
30
 
31
- print('All models downloaded!')
 
8
  mdxnet_models_dir = BASE_DIR / 'mdxnet_models'
9
  rvc_models_dir = BASE_DIR / 'rvc_models'
10
 
11
+ mdxnet_models_dir.mkdir(parents=True, exist_ok=True)
12
+ rvc_models_dir.mkdir(parents=True, exist_ok=True)
13
+
14
 
15
  def dl_model(link, model_name, dir_name):
16
+ model_path = dir_name / model_name
17
+ if model_path.exists():
18
+ # print(f"{model_name} already exists, skipping download.")
19
+ return
20
+
21
+ print(f"Downloading {model_name}...")
22
+ with requests.get(f'{link}{model_name}', stream=True) as r:
23
  r.raise_for_status()
24
+ with open(model_path, 'wb') as f:
25
  for chunk in r.iter_content(chunk_size=8192):
26
  f.write(chunk)
27
 
 
29
  if __name__ == '__main__':
30
  mdx_model_names = ['UVR-MDX-NET-Inst_HQ_4.onnx', 'UVR-MDX-NET-Voc_FT.onnx', 'UVR_MDXNET_KARA_2.onnx', 'Reverb_HQ_By_FoxJoy.onnx']
31
  for model in mdx_model_names:
 
32
  dl_model(MDX_DOWNLOAD_LINK, model, mdxnet_models_dir)
33
 
34
  rvc_model_names = ['hubert_base.pt', 'rmvpe.pt']
35
  for model in rvc_model_names:
 
36
  dl_model(RVC_DOWNLOAD_LINK, model, rvc_models_dir)
37
 
38
+ print('All models ready!')
src/infer_pack/models.py CHANGED
@@ -607,7 +607,7 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
607
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
608
  )
609
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
610
- print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
611
 
612
  def remove_weight_norm(self):
613
  self.dec.remove_weight_norm()
@@ -718,7 +718,7 @@ class SynthesizerTrnMs768NSFsid(nn.Module):
718
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
719
  )
720
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
721
- print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
722
 
723
  def remove_weight_norm(self):
724
  self.dec.remove_weight_norm()
@@ -826,7 +826,7 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
826
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
827
  )
828
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
829
- print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
830
 
831
  def remove_weight_norm(self):
832
  self.dec.remove_weight_norm()
@@ -928,7 +928,7 @@ class SynthesizerTrnMs768NSFsid_nono(nn.Module):
928
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
929
  )
930
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
931
- print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
932
 
933
  def remove_weight_norm(self):
934
  self.dec.remove_weight_norm()
 
607
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
608
  )
609
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
610
+ # print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
611
 
612
  def remove_weight_norm(self):
613
  self.dec.remove_weight_norm()
 
718
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
719
  )
720
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
721
+ # print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
722
 
723
  def remove_weight_norm(self):
724
  self.dec.remove_weight_norm()
 
826
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
827
  )
828
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
829
+ # print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
830
 
831
  def remove_weight_norm(self):
832
  self.dec.remove_weight_norm()
 
928
  inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
929
  )
930
  self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
931
+ # print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
932
 
933
  def remove_weight_norm(self):
934
  self.dec.remove_weight_norm()
src/main.py CHANGED
@@ -9,6 +9,8 @@ import shlex
9
  import subprocess
10
  from contextlib import suppress
11
  from urllib.parse import urlparse, parse_qs
 
 
12
 
13
  import gradio as gr
14
  import librosa
@@ -19,6 +21,7 @@ import yt_dlp
19
  from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
20
  from pedalboard.io import AudioFile
21
  from pydub import AudioSegment
 
22
 
23
  from mdx import run_mdx
24
  from rvc import Config, load_hubert, get_vc, rvc_infer
@@ -27,12 +30,29 @@ import logging
27
  logging.getLogger("httpx").setLevel(logging.WARNING)
28
 
29
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
30
 
31
  mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
32
  rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
33
  output_dir = os.path.join(BASE_DIR, 'song_output')
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def get_youtube_video_id(url, ignore_playlist=True):
37
  """
38
  Examples:
@@ -68,6 +88,9 @@ def get_youtube_video_id(url, ignore_playlist=True):
68
 
69
 
70
  def yt_download(link):
 
 
 
71
  ydl_opts = {
72
  'format': 'bestaudio',
73
  'outtmpl': '%(title)s',
@@ -95,12 +118,12 @@ def raise_exception(error_msg, is_webui):
95
  def get_rvc_model(voice_model, is_webui):
96
  rvc_model_filename, rvc_index_filename = None, None
97
  model_dir = os.path.join(rvc_models_dir, voice_model)
98
- print(model_dir)
99
  for file in os.listdir(model_dir):
100
- print(file)
101
  if os.path.isdir(file):
102
  for ff in os.listdir(file):
103
- print("subfile", ff)
104
  ext = os.path.splitext(ff)[1]
105
  if ext == '.pth':
106
  rvc_model_filename = ff
@@ -136,9 +159,21 @@ def get_audio_paths(song_dir):
136
  elif file.endswith('_Vocals_Backup.wav'):
137
  backup_vocals_path = os.path.join(song_dir, file)
138
 
 
139
  return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
140
 
141
 
 
 
 
 
 
 
 
 
 
 
 
142
  def convert_to_stereo(audio_path):
143
  wave, sr = librosa.load(audio_path, mono=False, sr=44100)
144
 
@@ -216,7 +251,7 @@ hubert_model = load_hubert("cuda", config.is_half, os.path.join(rvc_models_dir,
216
  print(device, "half>>", config.is_half)
217
 
218
  # @spaces.GPU(enable_queue=True)
219
- def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
220
  rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
221
 
222
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -227,8 +262,8 @@ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
227
 
228
  # convert main vocals
229
  global hubert_model
230
- rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
231
- del hubert_model, cpt
232
  gc.collect()
233
 
234
 
@@ -267,9 +302,9 @@ def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, o
267
  def process_song(
268
  song_dir, song_input, mdx_model_params, song_id, is_webui, input_type, progress,
269
  keep_files, pitch_change, pitch_change_all, voice_model, index_rate, filter_radius,
270
- rms_mix_rate, protect, f0_method, crepe_hop_length, output_format, keep_orig, orig_song_path
271
  ):
272
-
273
  if not os.path.exists(song_dir):
274
  os.makedirs(song_dir)
275
  orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress, keep_orig, orig_song_path)
@@ -278,29 +313,72 @@ def process_song(
278
  paths = get_audio_paths(song_dir)
279
 
280
  # if any of the audio files aren't available or keep intermediate files, rerun preprocess
281
- if any(path is None for path in paths) or keep_files:
282
  orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress, keep_orig, orig_song_path)
283
  else:
284
  orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path = paths
285
 
286
  pitch_change = pitch_change * 12 + pitch_change_all
287
- ai_vocals_path = os.path.join(song_dir, f'{os.path.splitext(os.path.basename(orig_song_path))[0]}_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_rms{rms_mix_rate}_pro{protect}_{f0_method}{"" if f0_method != "mangio-crepe" else f"_{crepe_hop_length}"}.wav')
288
  ai_cover_path = os.path.join(song_dir, f'{os.path.splitext(os.path.basename(orig_song_path))[0]} ({voice_model} Ver).{output_format}')
289
 
290
  if not os.path.exists(ai_vocals_path):
291
  display_progress('[~] Converting voice using RVC...', 0.5, is_webui, progress)
292
- voice_change(voice_model, main_vocals_dereverb_path, ai_vocals_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
293
 
294
  return ai_vocals_path, ai_cover_path, instrumentals_path, backup_vocals_path, vocals_path, main_vocals_path
295
 
296
- # process_song.zerogpu = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
  # @spaces.GPU(duration=140)
299
  def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
300
  is_webui=0, main_gain=0, backup_gain=0, inst_gain=0, index_rate=0.5, filter_radius=3,
301
  rms_mix_rate=0.25, f0_method='rmvpe', crepe_hop_length=128, protect=0.33, pitch_change_all=0,
302
  reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
 
303
  progress=gr.Progress()):
 
 
 
 
 
 
304
  try:
305
  if not song_input or not voice_model:
306
  raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
@@ -334,9 +412,8 @@ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
334
  keep_orig, orig_song_path = get_audio_file(song_input, is_webui, input_type, progress)
335
  orig_song_path = convert_to_stereo(orig_song_path)
336
 
337
- import time
338
  start = time.time()
339
-
340
  (
341
  ai_vocals_path,
342
  ai_cover_path,
@@ -365,6 +442,7 @@ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
365
  output_format,
366
  keep_orig,
367
  orig_song_path,
 
368
  )
369
 
370
  end = time.time()
@@ -374,20 +452,27 @@ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
374
  print(f"Audio duration: {duration__:.2f} seconds")
375
 
376
  display_progress('[~] Applying audio effects to Vocals...', 0.8, is_webui, progress)
 
 
 
 
 
377
  ai_vocals_mixed_path = add_audio_effects(ai_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
378
 
379
- instrumentals_path, _ = run_mdx(
380
- mdx_model_params,
381
- os.path.join(output_dir, song_id),
382
- os.path.join(mdxnet_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
383
- instrumentals_path,
384
- # exclude_main=False,
385
- exclude_inversion=True,
386
- suffix="Voiceless",
387
- denoise=False,
388
- keep_orig=True,
389
- base_device=""
390
- )
 
 
391
 
392
  if pitch_change_all != 0:
393
  display_progress('[~] Applying overall pitch change', 0.85, is_webui, progress)
@@ -399,7 +484,7 @@ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
399
 
400
  if not keep_files:
401
  display_progress('[~] Removing intermediate audio files...', 0.95, is_webui, progress)
402
- intermediate_files = [vocals_path, main_vocals_path, ai_vocals_mixed_path]
403
  if pitch_change_all != 0:
404
  intermediate_files += [instrumentals_path, backup_vocals_path]
405
  for file in intermediate_files:
 
9
  import subprocess
10
  from contextlib import suppress
11
  from urllib.parse import urlparse, parse_qs
12
+ import time
13
+ import shutil
14
 
15
  import gradio as gr
16
  import librosa
 
21
  from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
22
  from pedalboard.io import AudioFile
23
  from pydub import AudioSegment
24
+ import noisereduce as nr
25
 
26
  from mdx import run_mdx
27
  from rvc import Config, load_hubert, get_vc, rvc_infer
 
30
  logging.getLogger("httpx").setLevel(logging.WARNING)
31
 
32
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
33
+ IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
34
 
35
  mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
36
  rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
37
  output_dir = os.path.join(BASE_DIR, 'song_output')
38
 
39
 
40
+ def clean_old_folders(base_path: str, max_age_seconds: int = 10800):
41
+ if not os.path.isdir(base_path):
42
+ print(f"Error: {base_path} is not a valid directory.")
43
+ return
44
+
45
+ now = time.time()
46
+
47
+ for folder_name in os.listdir(base_path):
48
+ folder_path = os.path.join(base_path, folder_name)
49
+ if os.path.isdir(folder_path):
50
+ last_modified = os.path.getmtime(folder_path)
51
+ if now - last_modified > max_age_seconds:
52
+ # print(f"Deleting folder: {folder_path}")
53
+ shutil.rmtree(folder_path)
54
+
55
+
56
  def get_youtube_video_id(url, ignore_playlist=True):
57
  """
58
  Examples:
 
88
 
89
 
90
  def yt_download(link):
91
+ if not link.strip():
92
+ gr.Info("You need to provide a download link.")
93
+ return None
94
  ydl_opts = {
95
  'format': 'bestaudio',
96
  'outtmpl': '%(title)s',
 
118
  def get_rvc_model(voice_model, is_webui):
119
  rvc_model_filename, rvc_index_filename = None, None
120
  model_dir = os.path.join(rvc_models_dir, voice_model)
121
+ # print(model_dir)
122
  for file in os.listdir(model_dir):
123
+ # print(file)
124
  if os.path.isdir(file):
125
  for ff in os.listdir(file):
126
+ # print("subfile", ff)
127
  ext = os.path.splitext(ff)[1]
128
  if ext == '.pth':
129
  rvc_model_filename = ff
 
159
  elif file.endswith('_Vocals_Backup.wav'):
160
  backup_vocals_path = os.path.join(song_dir, file)
161
 
162
+ # print(orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path)
163
  return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
164
 
165
 
166
+ def get_audio_with_suffix(song_dir, suffix="_mysuffix.wav"):
167
+ target_path = None
168
+
169
+ for file in os.listdir(song_dir):
170
+ if file.endswith(suffix):
171
+ target_path = os.path.join(song_dir, file)
172
+ break
173
+
174
+ return target_path
175
+
176
+
177
  def convert_to_stereo(audio_path):
178
  wave, sr = librosa.load(audio_path, mono=False, sr=44100)
179
 
 
251
  print(device, "half>>", config.is_half)
252
 
253
  # @spaces.GPU(enable_queue=True)
254
+ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui, steps):
255
  rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
256
 
257
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
262
 
263
  # convert main vocals
264
  global hubert_model
265
+ rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model, steps)
266
+ del cpt
267
  gc.collect()
268
 
269
 
 
302
  def process_song(
303
  song_dir, song_input, mdx_model_params, song_id, is_webui, input_type, progress,
304
  keep_files, pitch_change, pitch_change_all, voice_model, index_rate, filter_radius,
305
+ rms_mix_rate, protect, f0_method, crepe_hop_length, output_format, keep_orig, orig_song_path, steps
306
  ):
307
+
308
  if not os.path.exists(song_dir):
309
  os.makedirs(song_dir)
310
  orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress, keep_orig, orig_song_path)
 
313
  paths = get_audio_paths(song_dir)
314
 
315
  # if any of the audio files aren't available or keep intermediate files, rerun preprocess
316
+ if any(path is None for path in paths):
317
  orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress, keep_orig, orig_song_path)
318
  else:
319
  orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path = paths
320
 
321
  pitch_change = pitch_change * 12 + pitch_change_all
322
+ ai_vocals_path = os.path.join(song_dir, f'{os.path.splitext(os.path.basename(orig_song_path))[0]}_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_rms{rms_mix_rate}_pro{protect}_{f0_method}{"" if f0_method != "mangio-crepe" else f"_{crepe_hop_length}"}_s{steps}.wav')
323
  ai_cover_path = os.path.join(song_dir, f'{os.path.splitext(os.path.basename(orig_song_path))[0]} ({voice_model} Ver).{output_format}')
324
 
325
  if not os.path.exists(ai_vocals_path):
326
  display_progress('[~] Converting voice using RVC...', 0.5, is_webui, progress)
327
+ voice_change(voice_model, main_vocals_dereverb_path, ai_vocals_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui, steps)
328
 
329
  return ai_vocals_path, ai_cover_path, instrumentals_path, backup_vocals_path, vocals_path, main_vocals_path
330
 
331
+
332
+ def apply_noisereduce(audio_list, type_output="wav"):
333
+ # https://github.com/sa-if/Audio-Denoiser
334
+ print("Noice reduce")
335
+
336
+ result = []
337
+ for audio_path in audio_list:
338
+ out_path = f"{os.path.splitext(audio_path)[0]}_nr.{type_output}"
339
+
340
+ try:
341
+ # Load audio file
342
+ audio = AudioSegment.from_file(audio_path)
343
+
344
+ # Convert audio to numpy array
345
+ samples = np.array(audio.get_array_of_samples())
346
+
347
+ # Reduce noise
348
+ reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
349
+
350
+ # Convert reduced noise signal back to audio
351
+ reduced_audio = AudioSegment(
352
+ reduced_noise.tobytes(),
353
+ frame_rate=audio.frame_rate,
354
+ sample_width=audio.sample_width,
355
+ channels=audio.channels
356
+ )
357
+
358
+ # Save reduced audio to file
359
+ reduced_audio.export(out_path, format=type_output)
360
+ result.append(out_path)
361
+
362
+ except Exception as e:
363
+ print(f"Error noisereduce: {str(e)}")
364
+ result.append(audio_path)
365
+
366
+ return result
367
+
368
 
369
  # @spaces.GPU(duration=140)
370
  def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
371
  is_webui=0, main_gain=0, backup_gain=0, inst_gain=0, index_rate=0.5, filter_radius=3,
372
  rms_mix_rate=0.25, f0_method='rmvpe', crepe_hop_length=128, protect=0.33, pitch_change_all=0,
373
  reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
374
+ extra_denoise=False, steps=1,
375
  progress=gr.Progress()):
376
+ if not keep_files or IS_ZERO_GPU:
377
+ clean_old_folders("./song_output", 14400)
378
+
379
+ if IS_ZERO_GPU:
380
+ clean_old_folders("./rvc_models", 10800)
381
+
382
  try:
383
  if not song_input or not voice_model:
384
  raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
 
412
  keep_orig, orig_song_path = get_audio_file(song_input, is_webui, input_type, progress)
413
  orig_song_path = convert_to_stereo(orig_song_path)
414
 
 
415
  start = time.time()
416
+
417
  (
418
  ai_vocals_path,
419
  ai_cover_path,
 
442
  output_format,
443
  keep_orig,
444
  orig_song_path,
445
+ steps,
446
  )
447
 
448
  end = time.time()
 
452
  print(f"Audio duration: {duration__:.2f} seconds")
453
 
454
  display_progress('[~] Applying audio effects to Vocals...', 0.8, is_webui, progress)
455
+
456
+ nr_path = ai_vocals_path # get_audio_with_suffix(song_dir, "_nr.wav")
457
+ if extra_denoise:
458
+ ai_vocals_path = apply_noisereduce([ai_vocals_path])[0]
459
+
460
  ai_vocals_mixed_path = add_audio_effects(ai_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
461
 
462
+ ins_path = get_audio_with_suffix(song_dir, "_Voiceless.wav")
463
+ if not ins_path:
464
+ instrumentals_path, _ = run_mdx(
465
+ mdx_model_params,
466
+ os.path.join(output_dir, song_id),
467
+ os.path.join(mdxnet_models_dir, "UVR-MDX-NET-Inst_HQ_4.onnx"),
468
+ instrumentals_path,
469
+ # exclude_main=False,
470
+ exclude_inversion=True,
471
+ suffix="Voiceless",
472
+ denoise=False,
473
+ keep_orig=True,
474
+ base_device=("" if IS_ZERO_GPU else "cuda")
475
+ )
476
 
477
  if pitch_change_all != 0:
478
  display_progress('[~] Applying overall pitch change', 0.85, is_webui, progress)
 
484
 
485
  if not keep_files:
486
  display_progress('[~] Removing intermediate audio files...', 0.95, is_webui, progress)
487
+ intermediate_files = [vocals_path, main_vocals_path, ai_vocals_mixed_path, ins_path, nr_path]
488
  if pitch_change_all != 0:
489
  intermediate_files += [instrumentals_path, backup_vocals_path]
490
  for file in intermediate_files:
src/mdx.py CHANGED
@@ -246,20 +246,19 @@ class MDX:
246
 
247
 
248
  def run_mdx(model_params, output_dir, model_path, filename, exclude_main=False, exclude_inversion=False, suffix=None, invert_suffix=None, denoise=False, keep_orig=True, m_threads=2, base_device="cuda"):
249
-
 
250
  if base_device == "cuda" and torch.cuda.is_available():
251
  device = torch.device("cuda:0")
252
  device_properties = torch.cuda.get_device_properties(device)
253
  vram_gb = device_properties.total_memory / 1024**3
254
  m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
255
- print(f"threads: {m_threads} vram: {vram_gb}")
256
  processor_num = 0
257
  else:
258
  device = torch.device("cpu")
259
  m_threads = 2
260
  if torch.cuda.is_available():
261
  m_threads = 8
262
- print(f"threads: {m_threads}")
263
  processor_num = -1
264
 
265
  model_hash = MDX.get_hash(model_path)
@@ -275,6 +274,11 @@ def run_mdx(model_params, output_dir, model_path, filename, exclude_main=False,
275
 
276
  mdx_sess = MDX(model_path, model, processor=processor_num)
277
  wave, sr = librosa.load(filename, mono=False, sr=44100)
 
 
 
 
 
278
  # normalizing input wave gives better output
279
  peak = max(np.max(wave), abs(np.min(wave)))
280
  wave /= peak
 
246
 
247
 
248
  def run_mdx(model_params, output_dir, model_path, filename, exclude_main=False, exclude_inversion=False, suffix=None, invert_suffix=None, denoise=False, keep_orig=True, m_threads=2, base_device="cuda"):
249
+ vram_gb = 0
250
+
251
  if base_device == "cuda" and torch.cuda.is_available():
252
  device = torch.device("cuda:0")
253
  device_properties = torch.cuda.get_device_properties(device)
254
  vram_gb = device_properties.total_memory / 1024**3
255
  m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
 
256
  processor_num = 0
257
  else:
258
  device = torch.device("cpu")
259
  m_threads = 2
260
  if torch.cuda.is_available():
261
  m_threads = 8
 
262
  processor_num = -1
263
 
264
  model_hash = MDX.get_hash(model_path)
 
274
 
275
  mdx_sess = MDX(model_path, model, processor=processor_num)
276
  wave, sr = librosa.load(filename, mono=False, sr=44100)
277
+ duration = librosa.get_duration(y=wave, sr=sr)
278
+ if duration < 60:
279
+ m_threads = 1
280
+ print(f"threads: {m_threads} vram: {vram_gb}")
281
+
282
  # normalizing input wave gives better output
283
  peak = max(np.max(wave), abs(np.min(wave)))
284
  wave /= peak
src/rvc.py CHANGED
@@ -157,9 +157,20 @@ def get_vc(device, is_half, config, model_path):
157
  return cpt, version, net_g, tgt_sr, vc
158
 
159
 
160
- def rvc_infer(index_path, index_rate, input_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model):
161
- audio = load_audio(input_path, 16000)
162
  times = [0, 0, 0]
163
  if_f0 = cpt.get('f0', 1)
164
- audio_opt = vc.pipeline(hubert_model, net_g, 0, audio, input_path, times, pitch_change, f0_method, index_path, index_rate, if_f0, filter_radius, tgt_sr, 0, rms_mix_rate, version, protect, crepe_hop_length)
165
- wavfile.write(output_path, tgt_sr, audio_opt)
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  return cpt, version, net_g, tgt_sr, vc
158
 
159
 
160
+ def rvc_infer(index_path, index_rate, input_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model, steps):
 
161
  times = [0, 0, 0]
162
  if_f0 = cpt.get('f0', 1)
163
+
164
+ working_path = input_path
165
+
166
+ for step in range(steps):
167
+ audio = load_audio(working_path, 16000)
168
+
169
+ audio_opt = vc.pipeline(
170
+ hubert_model, net_g, step, audio, working_path, times, pitch_change,
171
+ f0_method, index_path, index_rate, if_f0, filter_radius, tgt_sr,
172
+ 0, rms_mix_rate, version, protect, crepe_hop_length
173
+ )
174
+
175
+ wavfile.write(output_path, tgt_sr, audio_opt)
176
+ working_path = output_path
src/webui.py CHANGED
@@ -6,10 +6,28 @@ import zipfile
6
  from argparse import ArgumentParser
7
  import spaces
8
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- from main import song_cover_pipeline
11
 
12
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
13
 
14
  mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
15
  rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
@@ -78,10 +96,21 @@ def download_online_model(url, dir_name, progress=gr.Progress()):
78
  if 'pixeldrain.com' in url:
79
  url = f'https://pixeldrain.com/api/file/{zip_name}'
80
 
81
- urllib.request.urlretrieve(url, zip_name)
82
-
83
- progress(0.5, desc='[~] Extracting zip...')
84
- extract_zip(extraction_folder, zip_name)
 
 
 
 
 
 
 
 
 
 
 
85
  return f'[+] {dir_name} Model successfully downloaded!'
86
 
87
  except Exception as e:
@@ -157,33 +186,37 @@ def show_hop_slider(pitch_detection_algo):
157
  if __name__ == '__main__':
158
  parser = ArgumentParser(description='Generate a AI cover song in the song_output/id directory.', add_help=True)
159
  parser.add_argument("--share", action="store_true", dest="share_enabled", default=False, help="Enable sharing")
 
160
  parser.add_argument("--listen", action="store_true", default=False, help="Make the WebUI reachable from your local network.")
161
  parser.add_argument('--listen-host', type=str, help='The hostname that the server will use.')
162
  parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
 
 
163
  args = parser.parse_args()
164
 
165
  voice_models = get_current_models(rvc_models_dir)
166
  with open(os.path.join(rvc_models_dir, 'public_models.json'), encoding='utf8') as infile:
167
  public_models = json.load(infile)
168
 
169
- with gr.Blocks(title='AICoverGenWebUI') as app:
170
-
171
- gr.Label('AICoverGen WebUI ZeroGPU mode created with ❤️', show_label=False)
172
- gr.Markdown(
173
- """
174
- <details>
175
- <summary style="font-size: 1.5em;">⚠️ Important (click to expand)</summary>
176
- <ul>
177
- <li>🚀 This demo use a Zero GPU, which is available only for a limited time. It's recommended to use audio files that are no longer than 5 minutes. If you want to use it without time restrictions, you can duplicate the 'old CPU space'. ⏳</li>
178
- </ul>
179
- </details>
180
- """
181
- )
182
- gr.Markdown("Duplicate the old CPU space for use in private: [![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/r3gm/AICoverGen_old_stable_cpu?duplicate=true)\n\n")
 
183
 
184
  # main tab
185
  with gr.Tab("Generate"):
186
-
187
  with gr.Accordion('Main Options'):
188
  with gr.Row():
189
  with gr.Column():
@@ -191,12 +224,19 @@ if __name__ == '__main__':
191
  ref_btn = gr.Button('Refresh Models 🔁', variant='primary')
192
 
193
  with gr.Column(visible=False) as yt_link_col:
194
- song_input = gr.Text(label='Song input', info='Link to a song on YouTube or full path to a local file. For file upload, click the button below. Example: https://www.youtube.com/watch?v=M-mtdN6R3bQ')
195
  show_file_upload_button = gr.Button('Upload file instead')
196
 
197
  with gr.Column(visible=True) as file_upload_col:
198
  audio_extensions = ['.mp3', '.m4a', '.flac', '.wav', '.aac', '.ogg', '.wma', '.alac', '.aiff', '.opus', 'amr']
199
- local_file = gr.File(label='Audio file', interactive=True, type="filepath", file_types=audio_extensions)
 
 
 
 
 
 
 
200
  song_input_file = gr.UploadButton('Upload 📂', file_types=['audio'], variant='primary', visible=False)
201
  show_yt_link_button = gr.Button('Paste YouTube link/Path to local file instead', visible=False)
202
  song_input_file.upload(process_file_upload, inputs=[song_input_file], outputs=[local_file, song_input])
@@ -217,7 +257,12 @@ if __name__ == '__main__':
217
  f0_method = gr.Dropdown(['rmvpe+', 'rmvpe', 'mangio-crepe'], value='rmvpe+', label='Pitch detection algorithm', info='Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals), rmvpe+ use a minimum and maximum allowed pitch values.')
218
  crepe_hop_length = gr.Slider(32, 320, value=128, step=1, visible=False, label='Crepe hop length', info='Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy.')
219
  f0_method.change(show_hop_slider, inputs=f0_method, outputs=crepe_hop_length)
220
- keep_files = gr.Checkbox(True, label='Keep intermediate files', info='Keep all audio files generated in the song_output/id directory, e.g. Isolated Vocals/Instrumentals. Leave unchecked to save space')
 
 
 
 
 
221
 
222
  with gr.Accordion('Audio mixing options', open=False):
223
  gr.Markdown('### Volume Change (decibels)')
@@ -239,7 +284,12 @@ if __name__ == '__main__':
239
  with gr.Row():
240
  clear_btn = gr.ClearButton(value='Clear', components=[song_input, rvc_model, keep_files, local_file])
241
  generate_btn = gr.Button("Generate", variant='primary')
242
- ai_cover = gr.Audio(label='AI Cover', show_share_button=False)
 
 
 
 
 
243
 
244
  ref_btn.click(update_models_list, None, outputs=rvc_model)
245
  is_webui = gr.Number(value=1, visible=False)
@@ -247,12 +297,12 @@ if __name__ == '__main__':
247
  inputs=[local_file, rvc_model, pitch, keep_files, is_webui, main_gain, backup_gain,
248
  inst_gain, index_rate, filter_radius, rms_mix_rate, f0_method, crepe_hop_length,
249
  protect, pitch_all, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping,
250
- output_format],
251
  outputs=[ai_cover])
252
- clear_btn.click(lambda: [0, 0, 0, 0, 0.5, 3, 0.25, 0.33, 'rmvpe+', 128, 0, 0.15, 0.2, 0.8, 0.7, 'mp3', None],
253
  outputs=[pitch, main_gain, backup_gain, inst_gain, index_rate, filter_radius, rms_mix_rate,
254
  protect, f0_method, crepe_hop_length, pitch_all, reverb_rm_size, reverb_wet,
255
- reverb_dry, reverb_damping, output_format, ai_cover])
256
 
257
  # Download tab
258
  with gr.Tab('Download model'):
@@ -271,6 +321,8 @@ if __name__ == '__main__':
271
  gr.Markdown('## Input Examples')
272
  gr.Examples(
273
  [
 
 
274
  ['https://huggingface.co/phant0m4r/LiSA/resolve/main/LiSA.zip', 'Lisa'],
275
  ['https://pixeldrain.com/u/3tJmABXA', 'Gura'],
276
  ['https://huggingface.co/Kit-Lemonfoot/kitlemonfoot_rvc_models/resolve/main/AZKi%20(Hybrid).zip', 'Azki']
@@ -329,7 +381,10 @@ if __name__ == '__main__':
329
 
330
  app.launch(
331
  share=args.share_enabled,
 
 
332
  # enable_queue=True,
333
  server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),
334
  server_port=args.listen_port,
 
335
  )
 
6
  from argparse import ArgumentParser
7
  import spaces
8
  import gradio as gr
9
+ import logging
10
+ def configure_logging_libs(debug=False):
11
+ modules = [
12
+ "numba",
13
+ "httpx",
14
+ "markdown_it",
15
+ "fairseq",
16
+ "faiss",
17
+ ]
18
+ try:
19
+ for module in modules:
20
+ logging.getLogger(module).setLevel(logging.WARNING)
21
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3" if not debug else "1"
22
+
23
+ except Exception as error:
24
+ pass
25
+ configure_logging_libs()
26
 
27
+ from main import song_cover_pipeline, yt_download
28
 
29
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
30
+ IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
31
 
32
  mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
33
  rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
 
96
  if 'pixeldrain.com' in url:
97
  url = f'https://pixeldrain.com/api/file/{zip_name}'
98
 
99
+
100
+ if "," in url:
101
+ urls = [u.strip() for u in url.split(",") if u.strip()]
102
+ os.makedirs(extraction_folder, exist_ok=True)
103
+ for u in urls:
104
+ u = u.replace("?download=true", "")
105
+ file_name = u.split('/')[-1]
106
+ file_path = os.path.join(extraction_folder, file_name)
107
+ if not os.path.exists(file_path): # avoid re-downloading
108
+ urllib.request.urlretrieve(u, file_path)
109
+ else:
110
+ urllib.request.urlretrieve(url, zip_name)
111
+
112
+ progress(0.5, desc='[~] Extracting zip...')
113
+ extract_zip(extraction_folder, zip_name)
114
  return f'[+] {dir_name} Model successfully downloaded!'
115
 
116
  except Exception as e:
 
186
  if __name__ == '__main__':
187
  parser = ArgumentParser(description='Generate a AI cover song in the song_output/id directory.', add_help=True)
188
  parser.add_argument("--share", action="store_true", dest="share_enabled", default=False, help="Enable sharing")
189
+ parser.add_argument("--builtin-player", action="store_true", default=False, help="Use the builtin audio player")
190
  parser.add_argument("--listen", action="store_true", default=False, help="Make the WebUI reachable from your local network.")
191
  parser.add_argument('--listen-host', type=str, help='The hostname that the server will use.')
192
  parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
193
+ parser.add_argument('--theme', type=str, default="NoCrypt/miku", help='Set the theme (default: NoCrypt/miku)')
194
+ parser.add_argument("--ssr", action="store_true", help="Enable SSR (Server-Side Rendering)")
195
  args = parser.parse_args()
196
 
197
  voice_models = get_current_models(rvc_models_dir)
198
  with open(os.path.join(rvc_models_dir, 'public_models.json'), encoding='utf8') as infile:
199
  public_models = json.load(infile)
200
 
201
+ with gr.Blocks(title='AICoverGenWebUI', theme=args.theme, fill_width=True, fill_height=False) as app:
202
+
203
+ gr.Label(f'AICoverGen WebUI {"ZeroGPU mode" if IS_ZERO_GPU else ""} created with ❤️', show_label=False)
204
+ if IS_ZERO_GPU:
205
+ gr.Markdown(
206
+ """
207
+ <details>
208
+ <summary style="font-size: 1.5em;">⚠️ Important (click to expand)</summary>
209
+ <ul>
210
+ <li>🚀 This demo use a Zero GPU, which is available only for a limited time. It's recommended to use audio files that are no longer than 5 minutes. If you want to use it without time restrictions, you can duplicate the 'old CPU space'. ⏳</li>
211
+ </ul>
212
+ </details>
213
+ """
214
+ )
215
+ gr.Markdown("Duplicate the old CPU space for use in private: [![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/r3gm/AICoverGen_old_stable_cpu?duplicate=true)\n\n")
216
 
217
  # main tab
218
  with gr.Tab("Generate"):
219
+
220
  with gr.Accordion('Main Options'):
221
  with gr.Row():
222
  with gr.Column():
 
224
  ref_btn = gr.Button('Refresh Models 🔁', variant='primary')
225
 
226
  with gr.Column(visible=False) as yt_link_col:
227
+ song_input = gr.Text(label='Song input', info='Link to a song on YouTube or full path to a local file. For file upload, click the button below.')
228
  show_file_upload_button = gr.Button('Upload file instead')
229
 
230
  with gr.Column(visible=True) as file_upload_col:
231
  audio_extensions = ['.mp3', '.m4a', '.flac', '.wav', '.aac', '.ogg', '.wma', '.alac', '.aiff', '.opus', 'amr']
232
+ local_file = gr.File(label='Audio file', interactive=True, type="filepath", file_types=audio_extensions, height=150)
233
+ if not IS_ZERO_GPU:
234
+ with gr.Row():
235
+ with gr.Row(scale=2):
236
+ url_media_gui = gr.Textbox(value="", label="Enter URL", placeholder="www.youtube.com/watch?v=g_9rPvbENUw", lines=1)
237
+ with gr.Row(scale=1):
238
+ url_button_gui = gr.Button("Process URL", variant="secondary")
239
+ url_button_gui.click(yt_download, [url_media_gui], [local_file])
240
  song_input_file = gr.UploadButton('Upload 📂', file_types=['audio'], variant='primary', visible=False)
241
  show_yt_link_button = gr.Button('Paste YouTube link/Path to local file instead', visible=False)
242
  song_input_file.upload(process_file_upload, inputs=[song_input_file], outputs=[local_file, song_input])
 
257
  f0_method = gr.Dropdown(['rmvpe+', 'rmvpe', 'mangio-crepe'], value='rmvpe+', label='Pitch detection algorithm', info='Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals), rmvpe+ use a minimum and maximum allowed pitch values.')
258
  crepe_hop_length = gr.Slider(32, 320, value=128, step=1, visible=False, label='Crepe hop length', info='Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy.')
259
  f0_method.change(show_hop_slider, inputs=f0_method, outputs=crepe_hop_length)
260
+ with gr.Row():
261
+ with gr.Row():
262
+ steps = gr.Slider(minimum=1, maximum=3, label="Steps", value=1, step=1, interactive=True)
263
+ with gr.Row():
264
+ extra_denoise = gr.Checkbox(True, label='Denoise', info='Apply an additional noise reduction step to clean up the audio further.')
265
+ keep_files = gr.Checkbox((False if IS_ZERO_GPU else True), label='Keep intermediate files', info='Keep all audio files generated in the song_output/id directory, e.g. Isolated Vocals/Instrumentals. Leave unchecked to save space', interactive=(False if IS_ZERO_GPU else True))
266
 
267
  with gr.Accordion('Audio mixing options', open=False):
268
  gr.Markdown('### Volume Change (decibels)')
 
284
  with gr.Row():
285
  clear_btn = gr.ClearButton(value='Clear', components=[song_input, rvc_model, keep_files, local_file])
286
  generate_btn = gr.Button("Generate", variant='primary')
287
+ ai_cover = (
288
+ gr.Audio(label='AI Cover', show_share_button=True)
289
+ if args.builtin_player else
290
+ gr.File(label="AI Cover", interactive=False)
291
+ )
292
+ gr.Markdown("- You can also try `AICoverGen❤️` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/AICoverGen?tab=readme-ov-file#aicovergen).")
293
 
294
  ref_btn.click(update_models_list, None, outputs=rvc_model)
295
  is_webui = gr.Number(value=1, visible=False)
 
297
  inputs=[local_file, rvc_model, pitch, keep_files, is_webui, main_gain, backup_gain,
298
  inst_gain, index_rate, filter_radius, rms_mix_rate, f0_method, crepe_hop_length,
299
  protect, pitch_all, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping,
300
+ output_format, extra_denoise, steps],
301
  outputs=[ai_cover])
302
+ clear_btn.click(lambda: [0, 0, 0, 0, 0.5, 3, 0.25, 0.33, 'rmvpe+', 128, 0, 0.15, 0.2, 0.8, 0.7, 'mp3', None, True, 1],
303
  outputs=[pitch, main_gain, backup_gain, inst_gain, index_rate, filter_radius, rms_mix_rate,
304
  protect, f0_method, crepe_hop_length, pitch_all, reverb_rm_size, reverb_wet,
305
+ reverb_dry, reverb_damping, output_format, ai_cover, extra_denoise, steps])
306
 
307
  # Download tab
308
  with gr.Tab('Download model'):
 
321
  gr.Markdown('## Input Examples')
322
  gr.Examples(
323
  [
324
+ ['https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true', 'ToothBrushing'],
325
+ ['https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true', 'Minecraft_Villager'],
326
  ['https://huggingface.co/phant0m4r/LiSA/resolve/main/LiSA.zip', 'Lisa'],
327
  ['https://pixeldrain.com/u/3tJmABXA', 'Gura'],
328
  ['https://huggingface.co/Kit-Lemonfoot/kitlemonfoot_rvc_models/resolve/main/AZKi%20(Hybrid).zip', 'Azki']
 
381
 
382
  app.launch(
383
  share=args.share_enabled,
384
+ debug=args.share_enabled,
385
+ show_error=True,
386
  # enable_queue=True,
387
  server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),
388
  server_port=args.listen_port,
389
+ ssr_mode=args.ssr
390
  )