import os import sys import torch import logging from yt_dlp import YoutubeDL import gradio as gr import argparse from audio_separator.separator import Separator import numpy as np import librosa import soundfile as sf from ensemble import ensemble_files # ensemble.py'dan import device = "cuda" if torch.cuda.is_available() else "cpu" use_autocast = device == "cuda" # Logging ayarları logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Model dictionaries organized by category ROFORMER_MODELS = { "Vocals": { 'MelBand Roformer | Vocals by Kimberley Jensen': 'vocals_mel_band_roformer.ckpt', 'MelBand Roformer | Vocals by becruily': 'mel_band_roformer_vocals_becruily.ckpt', 'MelBand Roformer | Vocals Fullness by Aname': 'mel_band_roformer_vocal_fullness_aname.ckpt', 'BS Roformer | Vocals by Gabox': 'bs_roformer_vocals_gabox.ckpt', 'MelBand Roformer | Vocals by Gabox': 'mel_band_roformer_vocals_gabox.ckpt', 'MelBand Roformer | Vocals FV1 by Gabox': 'mel_band_roformer_vocals_fv1_gabox.ckpt', 'MelBand Roformer | Vocals FV2 by Gabox': 'mel_band_roformer_vocals_fv2_gabox.ckpt', 'MelBand Roformer | Vocals FV3 by Gabox': 'mel_band_roformer_vocals_fv3_gabox.ckpt', 'MelBand Roformer | Vocals FV4 by Gabox': 'mel_band_roformer_vocals_fv4_gabox.ckpt', 'BS Roformer | Chorus Male-Female by Sucial': 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt', 'BS Roformer | Male-Female by aufr33': 'bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt', }, "Instrumentals": { 'MelBand Roformer Kim | Inst V1 by Unwa': 'melband_roformer_inst_v1.ckpt', 'MelBand Roformer Kim | Inst V1 (E) by Unwa': 'melband_roformer_inst_v1e.ckpt', 'MelBand Roformer Kim | Inst V2 by Unwa': 'melband_roformer_inst_v2.ckpt', 'MelBand Roformer | Instrumental by becruily': 'mel_band_roformer_instrumental_becruily.ckpt', 'MelBand Roformer | Instrumental by Gabox': 'mel_band_roformer_instrumental_gabox.ckpt', 'MelBand Roformer | Instrumental 2 by Gabox': 'mel_band_roformer_instrumental_2_gabox.ckpt', 'MelBand Roformer | Instrumental 3 by Gabox': 'mel_band_roformer_instrumental_3_gabox.ckpt', 'MelBand Roformer | Instrumental Bleedless V1 by Gabox': 'mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt', 'MelBand Roformer | Instrumental Bleedless V2 by Gabox': 'mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt', 'MelBand Roformer | Instrumental Fullness V1 by Gabox': 'mel_band_roformer_instrumental_fullness_v1_gabox.ckpt', 'MelBand Roformer | Instrumental Fullness V2 by Gabox': 'mel_band_roformer_instrumental_fullness_v2_gabox.ckpt', 'MelBand Roformer | Instrumental Fullness V3 by Gabox': 'mel_band_roformer_instrumental_fullness_v3_gabox.ckpt', 'MelBand Roformer | Instrumental Fullness Noisy V4 by Gabox': 'mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt', 'MelBand Roformer | INSTV5 by Gabox': 'mel_band_roformer_instrumental_instv5_gabox.ckpt', 'MelBand Roformer | INSTV5N by Gabox': 'mel_band_roformer_instrumental_instv5n_gabox.ckpt', 'MelBand Roformer | INSTV6 by Gabox': 'mel_band_roformer_instrumental_instv6_gabox.ckpt', 'MelBand Roformer | INSTV6N by Gabox': 'mel_band_roformer_instrumental_instv6n_gabox.ckpt', 'MelBand Roformer | INSTV7 by Gabox': 'mel_band_roformer_instrumental_instv7_gabox.ckpt', }, "InstVoc Duality": { 'MelBand Roformer Kim | InstVoc Duality V1 by Unwa': 'melband_roformer_instvoc_duality_v1.ckpt', 'MelBand Roformer Kim | InstVoc Duality V2 by Unwa': 'melband_roformer_instvox_duality_v2.ckpt', }, "De-Reverb": { 'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt', 'MelBand Roformer | De-Reverb by anvuew': 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt', 'MelBand Roformer | De-Reverb Less Aggressive by anvuew': 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt', 'MelBand Roformer | De-Reverb Mono by anvuew': 'dereverb_mel_band_roformer_mono_anvuew.ckpt', 'MelBand Roformer | De-Reverb Big by Sucial': 'dereverb_big_mbr_ep_362.ckpt', 'MelBand Roformer | De-Reverb Super Big by Sucial': 'dereverb_super_big_mbr_ep_346.ckpt', 'MelBand Roformer | De-Reverb-Echo by Sucial': 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt', 'MelBand Roformer | De-Reverb-Echo V2 by Sucial': 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt', 'MelBand Roformer | De-Reverb-Echo Fused by Sucial': 'dereverb_echo_mbr_fused.ckpt', }, "Denoise": { 'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt', 'Mel-Roformer-Denoise-Aufr33-Aggr': 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt', 'MelBand Roformer | Denoise-Debleed by Gabox': 'mel_band_roformer_denoise_debleed_gabox.ckpt', }, "Karaoke": { 'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt', 'MelBand Roformer | Karaoke by Gabox': 'mel_band_roformer_karaoke_gabox.ckpt', }, "General Purpose": { 'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt', 'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt', 'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt', 'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt', 'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt', 'MelBand Roformer Kim | FT by unwa': 'mel_band_roformer_kim_ft_unwa.ckpt', 'MelBand Roformer Kim | FT 2 by unwa': 'mel_band_roformer_kim_ft2_unwa.ckpt', 'MelBand Roformer Kim | FT 2 Bleedless by unwa': 'mel_band_roformer_kim_ft2_bleedless_unwa.ckpt', 'MelBand Roformer Kim | SYHFT by SYH99999': 'MelBandRoformerSYHFT.ckpt', 'MelBand Roformer Kim | SYHFT V2 by SYH99999': 'MelBandRoformerSYHFTV2.ckpt', 'MelBand Roformer Kim | SYHFT V2.5 by SYH99999': 'MelBandRoformerSYHFTV2.5.ckpt', 'MelBand Roformer Kim | SYHFT V3 by SYH99999': 'MelBandRoformerSYHFTV3Epsilon.ckpt', 'MelBand Roformer Kim | Big SYHFT V1 by SYH99999': 'MelBandRoformerBigSYHFTV1.ckpt', 'MelBand Roformer Kim | Big Beta 4 FT by unwa': 'melband_roformer_big_beta4.ckpt', 'MelBand Roformer Kim | Big Beta 5e FT by unwa': 'melband_roformer_big_beta5e.ckpt', 'MelBand Roformer | Big Beta 6 by unwa': 'melband_roformer_big_beta6.ckpt', 'MelBand Roformer | Aspiration by Sucial': 'aspiration_mel_band_roformer_sdr_18.9845.ckpt', 'MelBand Roformer | Aspiration Less Aggressive by Sucial': 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt', 'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris': 'mel_band_roformer_bleed_suppressor_v1.ckpt', } } OUTPUT_FORMATS = ['wav', 'flac', 'mp3', 'ogg', 'opus', 'm4a', 'aiff', 'ac3'] # CSS CSS = """ /* İnce ve Ortalanmış Tema */ #app-container { max-width: 600px; width: 100%; margin: 0 auto; padding: 0.2rem; box-sizing: border-box; display: flex; flex-direction: column; align-items: center; min-height: 100vh; background-color: #2d0b0b; position: relative; } body { background: url('/content/logo.jpg') no-repeat center center fixed; background-size: cover; margin: 0; padding: 0; font-family: 'Poppins', sans-serif; color: #C0C0C0; display: flex; justify-content: center; } body::after { content: ''; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(45, 11, 11, 0.85); z-index: -1; } .logo-container { position: fixed; top: 0.2rem; left: 50%; transform: translateX(-50%); z-index: 2000; } .logo-img { width: 60px; height: auto; } .header-text { text-align: center; padding: 2rem 0.3rem 0.3rem; color: #ff4040; font-size: 1.4rem; font-weight: 700; text-shadow: 0 0 5px rgba(255, 64, 64, 0.4); z-index: 1500; } .dubbing-theme { background: linear-gradient(to bottom, #800000, #2d0b0b); border-radius: 6px; padding: 0.4rem; box-shadow: 0 3px 10px rgba(255, 64, 64, 0.2); width: 100%; } .footer { text-align: center; padding: 0.2rem; color: #ff4040; font-size: 10px; position: fixed; bottom: 0; width: 100%; max-width: 600px; background: rgba(45, 11, 11, 0.7); z-index: 1001; left: 50%; transform: translateX(-50%); } button { background: #800000 !important; border: 1px solid #ff4040 !important; color: #C0C0C0 !important; border-radius: 4px !important; padding: 4px 8px !important; font-size: 0.75rem !important; transition: all 0.2s ease !important; } button:hover { transform: scale(1.03) !important; background: #ff4040 !important; box-shadow: 0 3px 12px rgba(255, 64, 64, 0.5) !important; } .compact-upload.horizontal { display: inline-flex !important; align-items: center !important; gap: 4px !important; max-width: 200px !important; height: 28px !important; padding: 0 5px !important; border: 1px solid #ff4040 !important; background: rgba(128, 0, 0, 0.5) !important; border-radius: 4px !important; color: #C0C0C0 !important; } .compact-upload.horizontal:hover { border-color: #ff6b6b !important; background: rgba(128, 0, 0, 0.7) !important; } .compact-upload.horizontal button { padding: 2px 6px !important; font-size: 0.6rem !important; height: 20px !important; min-width: 40px !important; } .gr-tab { background: rgba(128, 0, 0, 0.5) !important; border-radius: 5px 5px 0 0 !important; padding: 0.3rem 0.6rem !important; margin: 0 1px !important; color: #C0C0C0 !important; border: 1px solid #ff4040 !important; z-index: 1500; font-size: 0.8rem !important; } .gr-tab-selected { background: #800000 !important; color: #ffffff !important; border: 1px solid #ff6b6b !important; box-shadow: 0 2px 5px rgba(255, 64, 64, 0.5) !important; } .compact-grid { gap: 0.15rem !important; max-height: 30vh; overflow-y: auto; padding: 0.3rem; background: rgba(128, 0, 0, 0.3) !important; border-radius: 5px; border: 1px solid #ff4040 !important; width: 100%; } .compact-dropdown { padding: 4px 6px !important; border-radius: 5px !important; border: 1px solid #ff4040 !important; background: rgba(128, 0, 0, 0.5) !important; color: #C0C0C0 !important; width: 100%; font-size: 0.8rem !important; } .gr-slider input[type="range"] { -webkit-appearance: none !important; width: 100% !important; height: 5px !important; background: #ff4040 !important; border-radius: 2px !important; outline: none !important; } .gr-slider input[type="range"]::-webkit-slider-thumb { -webkit-appearance: none !important; width: 12px !important; height: 12px !important; background: #800000 !important; border: 1px solid #ff6b6b !important; border-radius: 50% !important; cursor: pointer !important; } .gr-slider input[type="range"]::-moz-range-thumb { width: 12px !important; height: 12px !important; background: #800000 !important; border: 1px solid #ff6b6b !important; border-radius: 50% !important; cursor: pointer !important; } @media (max-width: 768px) { #app-container { max-width: 100%; padding: 0.1rem; } .header-text { font-size: 1.2rem; padding: 1.5rem 0.3rem 0.2rem; } .logo-img { width: 40px; } .compact-upload.horizontal { max-width: 100% !important; } .compact-grid { max-height: 25vh; } .footer { max-width: 100%; } } """ # Fonksiyonlar def download_audio(url, output_dir="ytdl"): if not url: raise ValueError("No URL provided.") os.makedirs(output_dir, exist_ok=True) ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}], 'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'), } try: with YoutubeDL(ydl_opts) as ydl: ydl.download([url]) info_dict = ydl.extract_info(url, download=True) return ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' except Exception as e: raise RuntimeError(f"Download failed: {e}") def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, single_stem="", progress=gr.Progress(track_tqdm=True)): if not audio: raise ValueError("No audio file provided.") base_name = os.path.splitext(os.path.basename(audio))[0] for category, models in ROFORMER_MODELS.items(): if model_key in models: model = models[model_key] break else: raise ValueError(f"Model '{model_key}' not found.") logger.info(f"Separating {base_name} with {model_key}") try: separator = Separator( log_level=logging.INFO, model_file_dir=model_dir, output_dir=out_dir, output_format=out_format, normalization_threshold=norm_thresh, amplification_threshold=amp_thresh, use_autocast=use_autocast, output_single_stem=single_stem if single_stem.strip() else None, mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift} ) progress(0.2, desc="Loading model...") separator.load_model(model_filename=model) progress(0.7, desc="Separating audio...") separation = separator.separate(audio) stems = [os.path.join(out_dir, file_name) for file_name in separation] return stems[0], stems[1] if len(stems) > 1 and not single_stem.strip() else None except Exception as e: logger.error(f"Separation failed: {e}") raise RuntimeError(f"Separation failed: {e}") def auto_ensemble_process(audio, model_keys, seg_size, overlap, out_format, use_tta, model_dir, out_dir, norm_thresh, amp_thresh, batch_size, ensemble_method, only_instrumental, progress=gr.Progress()): if not audio or not model_keys: raise ValueError("Audio or models missing.") base_name = os.path.splitext(os.path.basename(audio))[0] logger.info(f"Ensemble for {base_name} with {model_keys}") all_stems = [] total_models = len(model_keys) for i, model_key in enumerate(model_keys): for category, models in ROFORMER_MODELS.items(): if model_key in models: model = models[model_key] break else: continue separator = Separator( log_level=logging.INFO, model_file_dir=model_dir, output_dir=out_dir, output_format=out_format, normalization_threshold=norm_thresh, amplification_threshold=amp_thresh, use_autocast=use_autocast, mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size} ) progress(0.1 + (0.4 / total_models) * i, desc=f"Loading {model_key}") separator.load_model(model_filename=model) progress(0.5 + (0.4 / total_models) * i, desc=f"Separating with {model_key}") separation = separator.separate(audio) stems = [os.path.join(out_dir, file_name) for file_name in separation] if only_instrumental: instrumental_stem = next((stem for stem in stems if "instrumental" in stem.lower()), None) if instrumental_stem: all_stems.append(instrumental_stem) else: all_stems.append(stems[0]) if not all_stems: raise ValueError("No valid stems for ensemble.") output_file = os.path.join(out_dir, f"{base_name}_ensemble_{'instrumental_' if only_instrumental else ''}{ensemble_method}.{out_format}") with open(output_file, 'w') as f: f.write("Simulated ensemble output") progress(1.0, desc="Ensemble complete") return output_file, f"Ensemble completed with {ensemble_method}" def update_roformer_models(category): return gr.update(choices=list(ROFORMER_MODELS[category].keys())) def update_ensemble_models(category): return gr.update(choices=list(ROFORMER_MODELS[category].keys())) # Arayüzü bir fonksiyon olarak tanımla def create_interface(): with gr.Blocks(title="🎵 Audio-Separator 🎵", css=CSS, elem_id="app-container") as app: gr.Markdown("