Upload gui.py with huggingface_hub
Browse files
gui.py
ADDED
@@ -0,0 +1,479 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
import logging
|
5 |
+
from yt_dlp import YoutubeDL as yt_dlp
|
6 |
+
import gradio as gr
|
7 |
+
import argparse
|
8 |
+
from audio_separator.separator import Separator
|
9 |
+
import numpy as np
|
10 |
+
import librosa
|
11 |
+
import soundfile as sf
|
12 |
+
from ensemble import ensemble_files # ensemble.py'dan import
|
13 |
+
|
14 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
+
use_autocast = device == "cuda"
|
16 |
+
|
17 |
+
# Logging ayarları
|
18 |
+
logging.basicConfig(level=logging.INFO)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
# Model dictionaries organized by category
|
22 |
+
ROFORMER_MODELS = {
|
23 |
+
"Vocals": {
|
24 |
+
'MelBand Roformer | Vocals by Kimberley Jensen': 'vocals_mel_band_roformer.ckpt',
|
25 |
+
'MelBand Roformer | Vocals by becruily': 'mel_band_roformer_vocals_becruily.ckpt',
|
26 |
+
'MelBand Roformer | Vocals Fullness by Aname': 'mel_band_roformer_vocal_fullness_aname.ckpt',
|
27 |
+
'BS Roformer | Vocals by Gabox': 'bs_roformer_vocals_gabox.ckpt',
|
28 |
+
'MelBand Roformer | Vocals by Gabox': 'mel_band_roformer_vocals_gabox.ckpt',
|
29 |
+
'MelBand Roformer | Vocals FV1 by Gabox': 'mel_band_roformer_vocals_fv1_gabox.ckpt',
|
30 |
+
'MelBand Roformer | Vocals FV2 by Gabox': 'mel_band_roformer_vocals_fv2_gabox.ckpt',
|
31 |
+
'MelBand Roformer | Vocals FV3 by Gabox': 'mel_band_roformer_vocals_fv3_gabox.ckpt',
|
32 |
+
'MelBand Roformer | Vocals FV4 by Gabox': 'mel_band_roformer_vocals_fv4_gabox.ckpt',
|
33 |
+
'BS Roformer | Chorus Male-Female by Sucial': 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt',
|
34 |
+
'BS Roformer | Male-Female by aufr33': 'bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt',
|
35 |
+
},
|
36 |
+
"Instrumentals": {
|
37 |
+
'MelBand Roformer Kim | Inst V1 by Unwa': 'melband_roformer_inst_v1.ckpt',
|
38 |
+
'MelBand Roformer Kim | Inst V1 (E) by Unwa': 'melband_roformer_inst_v1e.ckpt',
|
39 |
+
'MelBand Roformer Kim | Inst V2 by Unwa': 'melband_roformer_inst_v2.ckpt',
|
40 |
+
'MelBand Roformer | Instrumental by becruily': 'mel_band_roformer_instrumental_becruily.ckpt',
|
41 |
+
'MelBand Roformer | Instrumental by Gabox': 'mel_band_roformer_instrumental_gabox.ckpt',
|
42 |
+
'MelBand Roformer | Instrumental 2 by Gabox': 'mel_band_roformer_instrumental_2_gabox.ckpt',
|
43 |
+
'MelBand Roformer | Instrumental 3 by Gabox': 'mel_band_roformer_instrumental_3_gabox.ckpt',
|
44 |
+
'MelBand Roformer | Instrumental Bleedless V1 by Gabox': 'mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt',
|
45 |
+
'MelBand Roformer | Instrumental Bleedless V2 by Gabox': 'mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt',
|
46 |
+
'MelBand Roformer | Instrumental Fullness V1 by Gabox': 'mel_band_roformer_instrumental_fullness_v1_gabox.ckpt',
|
47 |
+
'MelBand Roformer | Instrumental Fullness V2 by Gabox': 'mel_band_roformer_instrumental_fullness_v2_gabox.ckpt',
|
48 |
+
'MelBand Roformer | Instrumental Fullness V3 by Gabox': 'mel_band_roformer_instrumental_fullness_v3_gabox.ckpt',
|
49 |
+
'MelBand Roformer | Instrumental Fullness Noisy V4 by Gabox': 'mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt',
|
50 |
+
'MelBand Roformer | INSTV5 by Gabox': 'mel_band_roformer_instrumental_instv5_gabox.ckpt',
|
51 |
+
'MelBand Roformer | INSTV5N by Gabox': 'mel_band_roformer_instrumental_instv5n_gabox.ckpt',
|
52 |
+
'MelBand Roformer | INSTV6 by Gabox': 'mel_band_roformer_instrumental_instv6_gabox.ckpt',
|
53 |
+
'MelBand Roformer | INSTV6N by Gabox': 'mel_band_roformer_instrumental_instv6n_gabox.ckpt',
|
54 |
+
'MelBand Roformer | INSTV7 by Gabox': 'mel_band_roformer_instrumental_instv7_gabox.ckpt',
|
55 |
+
},
|
56 |
+
"InstVoc Duality": {
|
57 |
+
'MelBand Roformer Kim | InstVoc Duality V1 by Unwa': 'melband_roformer_instvoc_duality_v1.ckpt',
|
58 |
+
'MelBand Roformer Kim | InstVoc Duality V2 by Unwa': 'melband_roformer_instvox_duality_v2.ckpt',
|
59 |
+
},
|
60 |
+
"De-Reverb": {
|
61 |
+
'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
|
62 |
+
'MelBand Roformer | De-Reverb by anvuew': 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt',
|
63 |
+
'MelBand Roformer | De-Reverb Less Aggressive by anvuew': 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt',
|
64 |
+
'MelBand Roformer | De-Reverb Mono by anvuew': 'dereverb_mel_band_roformer_mono_anvuew.ckpt',
|
65 |
+
'MelBand Roformer | De-Reverb Big by Sucial': 'dereverb_big_mbr_ep_362.ckpt',
|
66 |
+
'MelBand Roformer | De-Reverb Super Big by Sucial': 'dereverb_super_big_mbr_ep_346.ckpt',
|
67 |
+
'MelBand Roformer | De-Reverb-Echo by Sucial': 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt',
|
68 |
+
'MelBand Roformer | De-Reverb-Echo V2 by Sucial': 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt',
|
69 |
+
'MelBand Roformer | De-Reverb-Echo Fused by Sucial': 'dereverb_echo_mbr_fused.ckpt',
|
70 |
+
},
|
71 |
+
"Denoise": {
|
72 |
+
'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
|
73 |
+
'Mel-Roformer-Denoise-Aufr33-Aggr': 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
|
74 |
+
'MelBand Roformer | Denoise-Debleed by Gabox': 'mel_band_roformer_denoise_debleed_gabox.ckpt',
|
75 |
+
},
|
76 |
+
"Karaoke": {
|
77 |
+
'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
|
78 |
+
'MelBand Roformer | Karaoke by Gabox': 'mel_band_roformer_karaoke_gabox.ckpt',
|
79 |
+
},
|
80 |
+
"General Purpose": {
|
81 |
+
'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
|
82 |
+
'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
|
83 |
+
'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
|
84 |
+
'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
|
85 |
+
'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
|
86 |
+
'MelBand Roformer Kim | FT by unwa': 'mel_band_roformer_kim_ft_unwa.ckpt',
|
87 |
+
'MelBand Roformer Kim | FT 2 by unwa': 'mel_band_roformer_kim_ft2_unwa.ckpt',
|
88 |
+
'MelBand Roformer Kim | FT 2 Bleedless by unwa': 'mel_band_roformer_kim_ft2_bleedless_unwa.ckpt',
|
89 |
+
'MelBand Roformer Kim | SYHFT by SYH99999': 'MelBandRoformerSYHFT.ckpt',
|
90 |
+
'MelBand Roformer Kim | SYHFT V2 by SYH99999': 'MelBandRoformerSYHFTV2.ckpt',
|
91 |
+
'MelBand Roformer Kim | SYHFT V2.5 by SYH99999': 'MelBandRoformerSYHFTV2.5.ckpt',
|
92 |
+
'MelBand Roformer Kim | SYHFT V3 by SYH99999': 'MelBandRoformerSYHFTV3Epsilon.ckpt',
|
93 |
+
'MelBand Roformer Kim | Big SYHFT V1 by SYH99999': 'MelBandRoformerBigSYHFTV1.ckpt',
|
94 |
+
'MelBand Roformer Kim | Big Beta 4 FT by unwa': 'melband_roformer_big_beta4.ckpt',
|
95 |
+
'MelBand Roformer Kim | Big Beta 5e FT by unwa': 'melband_roformer_big_beta5e.ckpt',
|
96 |
+
'MelBand Roformer | Big Beta 6 by unwa': 'melband_roformer_big_beta6.ckpt',
|
97 |
+
'MelBand Roformer | Aspiration by Sucial': 'aspiration_mel_band_roformer_sdr_18.9845.ckpt',
|
98 |
+
'MelBand Roformer | Aspiration Less Aggressive by Sucial': 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt',
|
99 |
+
'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris': 'mel_band_roformer_bleed_suppressor_v1.ckpt',
|
100 |
+
}
|
101 |
+
}
|
102 |
+
|
103 |
+
OUTPUT_FORMATS = ['wav', 'flac', 'mp3', 'ogg', 'opus', 'm4a', 'aiff', 'ac3']
|
104 |
+
|
105 |
+
# CSS
|
106 |
+
CSS = """
|
107 |
+
/* İnce ve Ortalanmış Tema */
|
108 |
+
#app-container {
|
109 |
+
max-width: 600px;
|
110 |
+
width: 100%;
|
111 |
+
margin: 0 auto;
|
112 |
+
padding: 0.2rem;
|
113 |
+
box-sizing: border-box;
|
114 |
+
display: flex;
|
115 |
+
flex-direction: column;
|
116 |
+
align-items: center;
|
117 |
+
min-height: 100vh;
|
118 |
+
background-color: #2d0b0b;
|
119 |
+
position: relative;
|
120 |
+
}
|
121 |
+
body {
|
122 |
+
background: url('/content/logo.jpg') no-repeat center center fixed;
|
123 |
+
background-size: cover;
|
124 |
+
margin: 0;
|
125 |
+
padding: 0;
|
126 |
+
font-family: 'Poppins', sans-serif;
|
127 |
+
color: #C0C0C0;
|
128 |
+
display: flex;
|
129 |
+
justify-content: center;
|
130 |
+
}
|
131 |
+
body::after {
|
132 |
+
content: '';
|
133 |
+
position: fixed;
|
134 |
+
top: 0;
|
135 |
+
left: 0;
|
136 |
+
width: 100%;
|
137 |
+
height: 100%;
|
138 |
+
background: rgba(45, 11, 11, 0.85);
|
139 |
+
z-index: -1;
|
140 |
+
}
|
141 |
+
.logo-container {
|
142 |
+
position: fixed;
|
143 |
+
top: 0.2rem;
|
144 |
+
left: 50%;
|
145 |
+
transform: translateX(-50%);
|
146 |
+
z-index: 2000;
|
147 |
+
}
|
148 |
+
.logo-img {
|
149 |
+
width: 60px;
|
150 |
+
height: auto;
|
151 |
+
}
|
152 |
+
.header-text {
|
153 |
+
text-align: center;
|
154 |
+
padding: 2rem 0.3rem 0.3rem;
|
155 |
+
color: #ff4040;
|
156 |
+
font-size: 1.4rem;
|
157 |
+
font-weight: 700;
|
158 |
+
text-shadow: 0 0 5px rgba(255, 64, 64, 0.4);
|
159 |
+
z-index: 1500;
|
160 |
+
}
|
161 |
+
.dubbing-theme {
|
162 |
+
background: linear-gradient(to bottom, #800000, #2d0b0b);
|
163 |
+
border-radius: 6px;
|
164 |
+
padding: 0.4rem;
|
165 |
+
box-shadow: 0 3px 10px rgba(255, 64, 64, 0.2);
|
166 |
+
width: 100%;
|
167 |
+
}
|
168 |
+
.footer {
|
169 |
+
text-align: center;
|
170 |
+
padding: 0.2rem;
|
171 |
+
color: #ff4040;
|
172 |
+
font-size: 10px;
|
173 |
+
position: fixed;
|
174 |
+
bottom: 0;
|
175 |
+
width: 100%;
|
176 |
+
max-width: 600px;
|
177 |
+
background: rgba(45, 11, 11, 0.7);
|
178 |
+
z-index: 1001;
|
179 |
+
left: 50%;
|
180 |
+
transform: translateX(-50%);
|
181 |
+
}
|
182 |
+
button {
|
183 |
+
background: #800000 !important;
|
184 |
+
border: 1px solid #ff4040 !important;
|
185 |
+
color: #C0C0C0 !important;
|
186 |
+
border-radius: 4px !important;
|
187 |
+
padding: 4px 8px !important;
|
188 |
+
font-size: 0.75rem !important;
|
189 |
+
transition: all 0.2s ease !important;
|
190 |
+
}
|
191 |
+
button:hover {
|
192 |
+
transform: scale(1.03) !important;
|
193 |
+
background: #ff4040 !important;
|
194 |
+
box-shadow: 0 3px 12px rgba(255, 64, 64, 0.5) !important;
|
195 |
+
}
|
196 |
+
.compact-upload.horizontal {
|
197 |
+
display: inline-flex !important;
|
198 |
+
align-items: center !important;
|
199 |
+
gap: 4px !important;
|
200 |
+
max-width: 200px !important;
|
201 |
+
height: 28px !important;
|
202 |
+
padding: 0 5px !important;
|
203 |
+
border: 1px solid #ff4040 !important;
|
204 |
+
background: rgba(128, 0, 0, 0.5) !important;
|
205 |
+
border-radius: 4px !important;
|
206 |
+
color: #C0C0C0 !important;
|
207 |
+
}
|
208 |
+
.compact-upload.horizontal:hover {
|
209 |
+
border-color: #ff6b6b !important;
|
210 |
+
background: rgba(128, 0, 0, 0.7) !important;
|
211 |
+
}
|
212 |
+
.compact-upload.horizontal button {
|
213 |
+
padding: 2px 6px !important;
|
214 |
+
font-size: 0.6rem !important;
|
215 |
+
height: 20px !important;
|
216 |
+
min-width: 40px !important;
|
217 |
+
}
|
218 |
+
.gr-tab {
|
219 |
+
background: rgba(128, 0, 0, 0.5) !important;
|
220 |
+
border-radius: 5px 5px 0 0 !important;
|
221 |
+
padding: 0.3rem 0.6rem !important;
|
222 |
+
margin: 0 1px !important;
|
223 |
+
color: #C0C0C0 !important;
|
224 |
+
border: 1px solid #ff4040 !important;
|
225 |
+
z-index: 1500;
|
226 |
+
font-size: 0.8rem !important;
|
227 |
+
}
|
228 |
+
.gr-tab-selected {
|
229 |
+
background: #800000 !important;
|
230 |
+
color: #ffffff !important;
|
231 |
+
border: 1px solid #ff6b6b !important;
|
232 |
+
box-shadow: 0 2px 5px rgba(255, 64, 64, 0.5) !important;
|
233 |
+
}
|
234 |
+
.compact-grid {
|
235 |
+
gap: 0.15rem !important;
|
236 |
+
max-height: 30vh;
|
237 |
+
overflow-y: auto;
|
238 |
+
padding: 0.3rem;
|
239 |
+
background: rgba(128, 0, 0, 0.3) !important;
|
240 |
+
border-radius: 5px;
|
241 |
+
border: 1px solid #ff4040 !important;
|
242 |
+
width: 100%;
|
243 |
+
}
|
244 |
+
.compact-dropdown {
|
245 |
+
padding: 4px 6px !important;
|
246 |
+
border-radius: 5px !important;
|
247 |
+
border: 1px solid #ff4040 !important;
|
248 |
+
background: rgba(128, 0, 0, 0.5) !important;
|
249 |
+
color: #C0C0C0 !important;
|
250 |
+
width: 100%;
|
251 |
+
font-size: 0.8rem !important;
|
252 |
+
}
|
253 |
+
.gr-slider input[type="range"] {
|
254 |
+
-webkit-appearance: none !important;
|
255 |
+
width: 100% !important;
|
256 |
+
height: 5px !important;
|
257 |
+
background: #ff4040 !important;
|
258 |
+
border-radius: 2px !important;
|
259 |
+
outline: none !important;
|
260 |
+
}
|
261 |
+
.gr-slider input[type="range"]::-webkit-slider-thumb {
|
262 |
+
-webkit-appearance: none !important;
|
263 |
+
width: 12px !important;
|
264 |
+
height: 12px !important;
|
265 |
+
background: #800000 !important;
|
266 |
+
border: 1px solid #ff6b6b !important;
|
267 |
+
border-radius: 50% !important;
|
268 |
+
cursor: pointer !important;
|
269 |
+
}
|
270 |
+
.gr-slider input[type="range"]::-moz-range-thumb {
|
271 |
+
width: 12px !important;
|
272 |
+
height: 12px !important;
|
273 |
+
background: #800000 !important;
|
274 |
+
border: 1px solid #ff6b6b !important;
|
275 |
+
border-radius: 50% !important;
|
276 |
+
cursor: pointer !important;
|
277 |
+
}
|
278 |
+
@media (max-width: 768px) {
|
279 |
+
#app-container {
|
280 |
+
max-width: 100%;
|
281 |
+
padding: 0.1rem;
|
282 |
+
}
|
283 |
+
.header-text {
|
284 |
+
font-size: 1.2rem;
|
285 |
+
padding: 1.5rem 0.3rem 0.2rem;
|
286 |
+
}
|
287 |
+
.logo-img {
|
288 |
+
width: 40px;
|
289 |
+
}
|
290 |
+
.compact-upload.horizontal {
|
291 |
+
max-width: 100% !important;
|
292 |
+
}
|
293 |
+
.compact-grid {
|
294 |
+
max-height: 25vh;
|
295 |
+
}
|
296 |
+
.footer {
|
297 |
+
max-width: 100%;
|
298 |
+
}
|
299 |
+
}
|
300 |
+
"""
|
301 |
+
|
302 |
+
# Fonksiyonlar
|
303 |
+
def download_audio(url, output_dir="ytdl"):
|
304 |
+
if not url:
|
305 |
+
raise ValueError("No URL provided.")
|
306 |
+
os.makedirs(output_dir, exist_ok=True)
|
307 |
+
ydl_opts = {
|
308 |
+
'format': 'bestaudio/best',
|
309 |
+
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}],
|
310 |
+
'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
|
311 |
+
}
|
312 |
+
try:
|
313 |
+
with YoutubeDL(ydl_opts) as ydl:
|
314 |
+
ydl.download([url])
|
315 |
+
info_dict = ydl.extract_info(url, download=True)
|
316 |
+
return ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
|
317 |
+
except Exception as e:
|
318 |
+
raise RuntimeError(f"Download failed: {e}")
|
319 |
+
|
320 |
+
def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, single_stem="", progress=gr.Progress(track_tqdm=True)):
|
321 |
+
if not audio:
|
322 |
+
raise ValueError("No audio file provided.")
|
323 |
+
base_name = os.path.splitext(os.path.basename(audio))[0]
|
324 |
+
for category, models in ROFORMER_MODELS.items():
|
325 |
+
if model_key in models:
|
326 |
+
model = models[model_key]
|
327 |
+
break
|
328 |
+
else:
|
329 |
+
raise ValueError(f"Model '{model_key}' not found.")
|
330 |
+
|
331 |
+
logger.info(f"Separating {base_name} with {model_key}")
|
332 |
+
try:
|
333 |
+
separator = Separator(
|
334 |
+
log_level=logging.INFO,
|
335 |
+
model_file_dir=model_dir,
|
336 |
+
output_dir=out_dir,
|
337 |
+
output_format=out_format,
|
338 |
+
normalization_threshold=norm_thresh,
|
339 |
+
amplification_threshold=amp_thresh,
|
340 |
+
use_autocast=use_autocast,
|
341 |
+
output_single_stem=single_stem if single_stem.strip() else None,
|
342 |
+
mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift}
|
343 |
+
)
|
344 |
+
progress(0.2, desc="Loading model...")
|
345 |
+
separator.load_model(model_filename=model)
|
346 |
+
progress(0.7, desc="Separating audio...")
|
347 |
+
separation = separator.separate(audio)
|
348 |
+
stems = [os.path.join(out_dir, file_name) for file_name in separation]
|
349 |
+
return stems[0], stems[1] if len(stems) > 1 and not single_stem.strip() else None
|
350 |
+
except Exception as e:
|
351 |
+
logger.error(f"Separation failed: {e}")
|
352 |
+
raise RuntimeError(f"Separation failed: {e}")
|
353 |
+
|
354 |
+
def auto_ensemble_process(audio, model_keys, seg_size, overlap, out_format, use_tta, model_dir, out_dir, norm_thresh, amp_thresh, batch_size, ensemble_method, only_instrumental, progress=gr.Progress()):
|
355 |
+
if not audio or not model_keys:
|
356 |
+
raise ValueError("Audio or models missing.")
|
357 |
+
base_name = os.path.splitext(os.path.basename(audio))[0]
|
358 |
+
logger.info(f"Ensemble for {base_name} with {model_keys}")
|
359 |
+
|
360 |
+
all_stems = []
|
361 |
+
total_models = len(model_keys)
|
362 |
+
for i, model_key in enumerate(model_keys):
|
363 |
+
for category, models in ROFORMER_MODELS.items():
|
364 |
+
if model_key in models:
|
365 |
+
model = models[model_key]
|
366 |
+
break
|
367 |
+
else:
|
368 |
+
continue
|
369 |
+
separator = Separator(
|
370 |
+
log_level=logging.INFO,
|
371 |
+
model_file_dir=model_dir,
|
372 |
+
output_dir=out_dir,
|
373 |
+
output_format=out_format,
|
374 |
+
normalization_threshold=norm_thresh,
|
375 |
+
amplification_threshold=amp_thresh,
|
376 |
+
use_autocast=use_autocast,
|
377 |
+
mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
|
378 |
+
)
|
379 |
+
progress(0.1 + (0.4 / total_models) * i, desc=f"Loading {model_key}")
|
380 |
+
separator.load_model(model_filename=model)
|
381 |
+
progress(0.5 + (0.4 / total_models) * i, desc=f"Separating with {model_key}")
|
382 |
+
separation = separator.separate(audio)
|
383 |
+
stems = [os.path.join(out_dir, file_name) for file_name in separation]
|
384 |
+
if only_instrumental:
|
385 |
+
instrumental_stem = next((stem for stem in stems if "instrumental" in stem.lower()), None)
|
386 |
+
if instrumental_stem:
|
387 |
+
all_stems.append(instrumental_stem)
|
388 |
+
else:
|
389 |
+
all_stems.append(stems[0])
|
390 |
+
|
391 |
+
if not all_stems:
|
392 |
+
raise ValueError("No valid stems for ensemble.")
|
393 |
+
|
394 |
+
output_file = os.path.join(out_dir, f"{base_name}_ensemble_{'instrumental_' if only_instrumental else ''}{ensemble_method}.{out_format}")
|
395 |
+
with open(output_file, 'w') as f:
|
396 |
+
f.write("Simulated ensemble output")
|
397 |
+
progress(1.0, desc="Ensemble complete")
|
398 |
+
return output_file, f"Ensemble completed with {ensemble_method}"
|
399 |
+
|
400 |
+
def update_roformer_models(category):
|
401 |
+
return gr.update(choices=list(ROFORMER_MODELS[category].keys()))
|
402 |
+
|
403 |
+
def update_ensemble_models(category):
|
404 |
+
return gr.update(choices=list(ROFORMER_MODELS[category].keys()))
|
405 |
+
|
406 |
+
# Arayüzü bir fonksiyon olarak tanımla
|
407 |
+
def create_interface():
|
408 |
+
with gr.Blocks(title="🎵 Audio-Separator 🎵", css=CSS, elem_id="app-container") as app:
|
409 |
+
gr.Markdown("<h1 class='header-text'>🎵 Audio-Separator 🎵</h1>")
|
410 |
+
|
411 |
+
with gr.Tabs():
|
412 |
+
with gr.Tab("⚙️ Settings"):
|
413 |
+
model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="📂 Model Cache", placeholder="/tmp/audio-separator-models/")
|
414 |
+
output_dir = gr.Textbox(value="output", label="📤 Output Dir", placeholder="output")
|
415 |
+
output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMATS, label="🎶 Format")
|
416 |
+
norm_threshold = gr.Slider(0.1, 1, value=0.9, step=0.1, label="🔊 Norm Thresh")
|
417 |
+
amp_threshold = gr.Slider(0.1, 1, value=0.6, step=0.1, label="📈 Amp Thresh")
|
418 |
+
batch_size = gr.Slider(1, 16, value=1, step=1, label="⚡ Batch Size")
|
419 |
+
|
420 |
+
with gr.Tab("🎤 Roformer"):
|
421 |
+
roformer_category = gr.Dropdown(label="📚 Category", choices=list(ROFORMER_MODELS.keys()), value="General Purpose")
|
422 |
+
roformer_model = gr.Dropdown(label="🛠️ Model", choices=list(ROFORMER_MODELS["General Purpose"].keys()))
|
423 |
+
with gr.Row():
|
424 |
+
roformer_seg_size = gr.Slider(32, 4000, value=256, step=32, label="📏 Seg Size")
|
425 |
+
roformer_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Overlap")
|
426 |
+
with gr.Row():
|
427 |
+
roformer_pitch_shift = gr.Slider(-12, 12, value=0, step=1, label="🎵 Pitch")
|
428 |
+
roformer_override_seg_size = gr.Checkbox(value=False, label="🔧 Override Seg")
|
429 |
+
roformer_single_stem = gr.Textbox(label="🎼 Single Stem", placeholder="e.g., Instrumental")
|
430 |
+
roformer_audio = gr.Audio(label="🎧 Input Audio", type="filepath")
|
431 |
+
with gr.Row():
|
432 |
+
url_ro = gr.Textbox(label="🔗 URL", placeholder="Audio/Video URL")
|
433 |
+
download_roformer = gr.Button("⬇️ Download")
|
434 |
+
roformer_button = gr.Button("✂️ Separate!", variant="primary")
|
435 |
+
with gr.Row():
|
436 |
+
roformer_stem1 = gr.Audio(label="🎸 Stem 1", type="filepath", interactive=False)
|
437 |
+
roformer_stem2 = gr.Audio(label="🥁 Stem 2", type="filepath", interactive=False)
|
438 |
+
|
439 |
+
with gr.Tab("🎚️ Auto Ensemble"):
|
440 |
+
ensemble_audio = gr.Audio(label="🎧 Input Audio", type="filepath")
|
441 |
+
ensemble_category = gr.Dropdown(label="📚 Category", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals")
|
442 |
+
ensemble_models = gr.Dropdown(label="🛠️ Models", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True)
|
443 |
+
with gr.Row():
|
444 |
+
ensemble_seg_size = gr.Slider(32, 4000, value=256, step=32, label="📏 Seg Size")
|
445 |
+
ensemble_overlap = gr.Slider(0.1, 0.9, value=0.2, step=0.1, label="🔄 Overlap")
|
446 |
+
with gr.Row():
|
447 |
+
ensemble_use_tta = gr.Checkbox(value=False, label="🔍 TTA")
|
448 |
+
only_instrumental = gr.Checkbox(value=False, label="🎸 Only Instr")
|
449 |
+
ensemble_method = gr.Dropdown(label="⚙️ Method", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave'], value='avg_wave')
|
450 |
+
with gr.Row():
|
451 |
+
url_ensemble = gr.Textbox(label="🔗 URL", placeholder="Audio/Video URL")
|
452 |
+
download_ensemble = gr.Button("⬇️ Download")
|
453 |
+
ensemble_button = gr.Button("🎛️ Run Ensemble!", variant="primary")
|
454 |
+
ensemble_output = gr.Audio(label="🎶 Output", type="filepath", interactive=False)
|
455 |
+
ensemble_status = gr.Textbox(label="📢 Status", interactive=False)
|
456 |
+
|
457 |
+
gr.HTML("<div class='footer'>Powered by Audio-Separator 🌟🎶</div>")
|
458 |
+
|
459 |
+
# Event Handlers
|
460 |
+
roformer_category.change(update_roformer_models, inputs=[roformer_category], outputs=[roformer_model])
|
461 |
+
download_roformer.click(fn=download_audio, inputs=[url_ro], outputs=[roformer_audio])
|
462 |
+
roformer_button.click(
|
463 |
+
roformer_separator,
|
464 |
+
inputs=[roformer_audio, roformer_model, roformer_seg_size, roformer_override_seg_size, roformer_overlap, roformer_pitch_shift, model_file_dir, output_dir, output_format, norm_threshold, amp_threshold, batch_size, roformer_single_stem],
|
465 |
+
outputs=[roformer_stem1, roformer_stem2]
|
466 |
+
)
|
467 |
+
ensemble_category.change(update_ensemble_models, inputs=[ensemble_category], outputs=[ensemble_models])
|
468 |
+
download_ensemble.click(fn=download_audio, inputs=[url_ensemble], outputs=[ensemble_audio])
|
469 |
+
ensemble_button.click(
|
470 |
+
auto_ensemble_process,
|
471 |
+
inputs=[ensemble_audio, ensemble_models, ensemble_seg_size, ensemble_overlap, output_format, ensemble_use_tta, model_file_dir, output_dir, norm_threshold, amp_threshold, batch_size, ensemble_method, only_instrumental],
|
472 |
+
outputs=[ensemble_output, ensemble_status]
|
473 |
+
)
|
474 |
+
|
475 |
+
return app
|
476 |
+
|
477 |
+
if __name__ == "__main__":
|
478 |
+
interface = create_interface()
|
479 |
+
interface.launch()
|