seawolf2357
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,194 +1,153 @@
|
|
1 |
-
# Copyright (c) 2023 Amphion.
|
2 |
-
#
|
3 |
-
# This source code is licensed under the MIT license found in the
|
4 |
-
# LICENSE file in the root directory of this source tree.
|
5 |
-
|
6 |
import gradio as gr
|
7 |
-
import
|
8 |
-
import
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
"Eason Chan ้ๅฅ่ฟ
": "vocalist_l1_้ๅฅ่ฟ
",
|
25 |
-
"David Tao ้ถๅ": "vocalist_l1_้ถๅ",
|
26 |
}
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
)
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
[![arXiv](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/abs/2310.11160)
|
77 |
-
|
78 |
-
This demo provides an Amphion [DiffWaveNetSVC](https://github.com/open-mmlab/Amphion/tree/main/egs/svc/MultipleContentsSVC) pretrained model for you to play. The training data has been detailed [here](https://huggingface.co/amphion/singing_voice_conversion).
|
79 |
-
"""
|
80 |
-
)
|
81 |
-
|
82 |
-
gr.Markdown(
|
83 |
-
"""
|
84 |
-
## Source Audio
|
85 |
-
**Hint**: We recommend using dry vocals (e.g., studio recordings or source-separated voices from music) as the input for this demo. At the bottom of this page, we provide some examples for your reference.
|
86 |
-
"""
|
87 |
-
)
|
88 |
-
source_audio_input = gr.Audio(
|
89 |
-
sources=["upload", "microphone"],
|
90 |
-
label="Source Audio",
|
91 |
-
type="filepath",
|
92 |
)
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
)
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
106 |
)
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
value=
|
114 |
-
step=1,
|
115 |
-
label="Key Shift Values",
|
116 |
-
info='How many semitones you want to transpose. This parameter will work only if you choose "Key Shift"',
|
117 |
)
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
step=1,
|
123 |
-
|
124 |
-
info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
|
125 |
-
)
|
126 |
-
btn = gr.ClearButton(
|
127 |
-
components=[
|
128 |
-
config_target_singer,
|
129 |
-
config_keyshift_choice,
|
130 |
-
config_keyshift_value,
|
131 |
-
config_diff_infer_steps,
|
132 |
-
]
|
133 |
)
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
inputs=[
|
142 |
-
source_audio_input,
|
143 |
-
config_target_singer,
|
144 |
-
config_keyshift_choice,
|
145 |
-
config_keyshift_value,
|
146 |
-
config_diff_infer_steps,
|
147 |
-
],
|
148 |
-
outputs=demo_outputs,
|
149 |
-
)
|
150 |
-
|
151 |
-
gr.Markdown("## Examples")
|
152 |
gr.Examples(
|
153 |
-
examples=
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
1000,
|
159 |
-
"examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav",
|
160 |
-
],
|
161 |
-
[
|
162 |
-
"examples/chinese_male_seperated.wav",
|
163 |
-
"Taylor Swift",
|
164 |
-
"Auto Shift",
|
165 |
-
1000,
|
166 |
-
"examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav",
|
167 |
-
],
|
168 |
-
[
|
169 |
-
"examples/english_female_seperated.wav",
|
170 |
-
"Feng Wang ๆฑชๅณฐ",
|
171 |
-
"Auto Shift",
|
172 |
-
1000,
|
173 |
-
"examples/output/english_female_seperated_vocalist_l1_ๆฑชๅณฐ.wav",
|
174 |
-
],
|
175 |
-
[
|
176 |
-
"examples/english_male_recordings.wav",
|
177 |
-
"Yijie Shi ็ณๅๆด",
|
178 |
-
"Auto Shift",
|
179 |
-
1000,
|
180 |
-
"examples/output/english_male_recordings_vocalist_l1_็ณๅๆด.wav",
|
181 |
-
],
|
182 |
-
],
|
183 |
-
inputs=[
|
184 |
-
source_audio_input,
|
185 |
-
config_target_singer,
|
186 |
-
config_keyshift_choice,
|
187 |
-
config_diff_infer_steps,
|
188 |
-
demo_outputs,
|
189 |
-
],
|
190 |
)
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from diffusers import StableDiffusionXLPipeline, AutoencoderKL, KDPM2AncestralDiscreteScheduler
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
import spaces
|
6 |
+
from PIL import Image
|
7 |
+
import requests
|
8 |
+
from translatepy import Translator
|
9 |
+
|
10 |
+
translator = Translator()
|
11 |
+
|
12 |
+
# Constants
|
13 |
+
model = "Corcelio/mobius"
|
14 |
+
vae_model = "madebyollin/sdxl-vae-fp16-fix"
|
15 |
+
|
16 |
+
CSS = """
|
17 |
+
.gradio-container {
|
18 |
+
max-width: 690px !important;
|
|
|
|
|
19 |
}
|
20 |
+
footer {
|
21 |
+
visibility: hidden;
|
22 |
+
}
|
23 |
+
"""
|
24 |
+
|
25 |
+
JS = """function () {
|
26 |
+
gradioURL = window.location.href
|
27 |
+
if (!gradioURL.endsWith('?__theme=dark')) {
|
28 |
+
window.location.replace(gradioURL + '?__theme=dark');
|
29 |
+
}
|
30 |
+
}"""
|
31 |
+
|
32 |
+
# Load VAE component
|
33 |
+
vae = AutoencoderKL.from_pretrained(
|
34 |
+
vae_model,
|
35 |
+
torch_dtype=torch.float16
|
36 |
+
)
|
37 |
+
|
38 |
+
# Ensure model and scheduler are initialized in GPU-enabled function
|
39 |
+
if torch.cuda.is_available():
|
40 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(model, vae=vae, torch_dtype=torch.float16).to("cuda")
|
41 |
+
|
42 |
+
pipe.scheduler = KDPM2AncestralDiscreteScheduler.from_config(pipe.scheduler.config)
|
43 |
+
|
44 |
+
|
45 |
+
# Function
|
46 |
+
@spaces.GPU()
|
47 |
+
def generate_image(
|
48 |
+
prompt,
|
49 |
+
negative="low quality",
|
50 |
+
width=1024,
|
51 |
+
height=1024,
|
52 |
+
scale=1.5,
|
53 |
+
steps=30,
|
54 |
+
clip=3):
|
55 |
+
|
56 |
+
prompt = str(translator.translate(prompt, 'English'))
|
57 |
+
|
58 |
+
print(f'prompt:{prompt}')
|
59 |
+
|
60 |
+
image = pipe(
|
61 |
+
prompt,
|
62 |
+
negative_prompt=negative,
|
63 |
+
width=width,
|
64 |
+
height=height,
|
65 |
+
guidance_scale=scale,
|
66 |
+
num_inference_steps=steps,
|
67 |
+
clip_skip=clip,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
)
|
69 |
+
return image.images[0]
|
70 |
+
|
71 |
+
|
72 |
+
examples = [
|
73 |
+
"์๋ฆ๋ค์ด 20์ธ ํ๊ตญ ์ฌ์ ๋ชจ๋ธ, 'ํ๊ตญ ์ฌ์๊ฐ์ ์์ด์ ๋ฎ์ ์ผ๊ตด', ๊ฒ์์ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ํฐ ๊ณจ๋ฐ, ๊ฐ์ ์ ๋ํผ, ๋ฐฐ๊ฒฝ ํฐ์, ์ค๋ง์ผ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ ์ ๋
ธ์ถ, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
74 |
+
"์๋ฆ๋ค์ด 20์ธ ์๊ตญ ์ฌ์ ๋ชจ๋ธ, '์ ๋ง์์จ ๋ฎ์ ์ผ๊ตด', ๊ธ๋ฐ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, ์ด๋ธ๋ ๋๋ ์ค, ๋ฐฐ๊ฒฝ ์์์, ์ค๋ง์ผ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ ์ ๋
ธ์ถ, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
75 |
+
"์๋ฆ๋ค์ด 20์ธ ํ๊ตญ ์ฌ์ ๋ชจ๋ธ, 'ํ๊ตญ ์ฌ์ ์์ด๋ ๋ฎ์ ์ผ๊ตด', ๊ฒ์์ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, ๋นํค๋ ์์๋ณต, ๋ฐฐ๊ฒฝ ์์์ฅ, ์ค๋ง์ผ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ ์ ๋
ธ์ถ, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
76 |
+
"์๋ฆ๋ค์ด 23์ธ ์ค๊ตญ๊ตญ ์ฌ์ ๋ชจ๋ธ, ๊ฐ์ ๊ธด ์๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ๋ฐฐ๊ฒฝ ์คํ๋์ค, ์ง์งํ ํ์ , ์คํผ์ค ์ ๋ํผ, ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
77 |
+
"์๋ฆ๋ค์ด 18์ธ ์ผ๋ณธ ์ฌ์ ๋ชจ๋ธ, ๊ฒ์์ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, ์ค๋ง์ผ ํ์ , ๊ต๋ณต ์ ๋ํผ, ๋ฐฐ๊ฒฝ ํ๊ต ๊ต์ค, ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
78 |
+
"์๋ฆ๋ค์ด 20์ธ ๋ธ๋ผ์ง ์ฌ์ ๋ชจ๋ธ, ๊ฒ์์ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ํฐ ๊ณจ๋ฐ, ๊ฐํธ์ฌ ์ ๋ํผ, ๋ฐฐ๊ฒฝ ํฐ์, ์ค๋ง์ผ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
79 |
+
"์๋ฆ๋ค์ด 20์ธ ์ค์จ๋ด ์ฌ์ ๋ชจ๋ธ, ๊ธ๋ฐ ๊ธด ์๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ํฐ ๊ณจ๋ฐ, ๋นํค๋ ์์๋ณต, ๋ฐฐ๊ฒฝ ํด๋ณ๊ฐ, ์ค๋ง์ผ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
80 |
+
"์๋ฆ๋ค์ด 18์ธ ๋ฌ์์ ์ฌ์ ๋ชจ๋ธ, ๊ธ๋ฐ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ํฐ ๊ณจ๋ฐ, ๋นํค๋ ์์๋ณต, ๋ฐฐ๊ฒฝ ์์์ฅ, ์์ํ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
81 |
+
"์๋ฆ๋ค์ด 20์ธ ํ๋์ค ์ฌ์ ๋ชจ๋ธ, ๊ฐ์ ์งง์ ๋จ๋ฐ๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ํฐ ๊ณจ๋ฐ, ๋น์ฆ๋์ค ์ ์ฅ, ๋ฐฐ๊ฒฝ ์ฌ๋ฌด์ค, ํฌ๊ฒ ์๋ ํ์ , ๋ชจ๋ธ ํฌ์ฆ, ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง",
|
82 |
+
"์๋ฆ๋ค์ด 16์ธ ์ฐํฌ๋ผ์ด๋ ์ฌ์ ๋ชจ๋ธ, ๊ฐ์ ๊ธด ์๋จธ๋ฆฌ, C์ปต ์ฌ์ด์ฆ์ ํฐ ๊ฐ์ด, ํฐ ๊ณจ๋ฐ, ์คํผ์ค ์ ๋ํผ, ์น์ค ํฌ์ฆ, ๋ฐฐ๊ฒฝ ํธํ
, ํ๋ณตํ ํ์ , ์ ๋ฉด ์์, ์ด๊ณ ํด์๋ ์ฌ์ง"
|
83 |
+
]
|
84 |
+
|
85 |
+
|
86 |
+
# Gradio Interface
|
87 |
+
|
88 |
+
with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
|
89 |
+
gr.HTML("<h1><center>๋๋ง์ ๋ชจ๋ธ ์บ๋ฆญํฐ ์์ฑ</center></h1>")
|
90 |
+
with gr.Group():
|
91 |
+
with gr.Row():
|
92 |
+
prompt = gr.Textbox(label='Enter Your Prompt', value="best quality, HD, aesthetic", scale=6)
|
93 |
+
submit = gr.Button(scale=1, variant='primary')
|
94 |
+
img = gr.Image(label='Generated Image')
|
95 |
+
with gr.Accordion("Advanced Options", open=False):
|
96 |
+
with gr.Row():
|
97 |
+
negative = gr.Textbox(label="Negative prompt", value="low quality, low quality, (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, (NSFW:1.25)")
|
98 |
+
with gr.Row():
|
99 |
+
width = gr.Slider(
|
100 |
+
label="Width",
|
101 |
+
minimum=512,
|
102 |
+
maximum=1280,
|
103 |
+
step=8,
|
104 |
+
value=1024,
|
105 |
)
|
106 |
+
height = gr.Slider(
|
107 |
+
label="Height",
|
108 |
+
minimum=512,
|
109 |
+
maximum=1280,
|
110 |
+
step=8,
|
111 |
+
value=1024,
|
112 |
)
|
113 |
+
with gr.Row():
|
114 |
+
scale = gr.Slider(
|
115 |
+
label="Guidance",
|
116 |
+
minimum=3.5,
|
117 |
+
maximum=7,
|
118 |
+
step=0.1,
|
119 |
+
value=7,
|
|
|
|
|
|
|
120 |
)
|
121 |
+
steps = gr.Slider(
|
122 |
+
label="Steps",
|
123 |
+
minimum=1,
|
124 |
+
maximum=50,
|
125 |
step=1,
|
126 |
+
value=50,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
)
|
128 |
+
clip = gr.Slider(
|
129 |
+
label="Clip Skip",
|
130 |
+
minimum=1,
|
131 |
+
maximum=10,
|
132 |
+
step=1,
|
133 |
+
value=3,
|
134 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
gr.Examples(
|
136 |
+
examples=examples,
|
137 |
+
inputs=prompt,
|
138 |
+
outputs=img,
|
139 |
+
fn=generate_image,
|
140 |
+
cache_examples="lazy",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
)
|
142 |
|
143 |
+
prompt.submit(fn=generate_image,
|
144 |
+
inputs=[prompt, negative, width, height, scale, steps, clip],
|
145 |
+
outputs=img,
|
146 |
+
)
|
147 |
+
submit.click(fn=generate_image,
|
148 |
+
inputs=[prompt, negative, width, height, scale, steps, clip],
|
149 |
+
outputs=img,
|
150 |
+
)
|
151 |
+
|
152 |
+
#demo.queue().launch()
|
153 |
+
demo.queue().launch(auth=("gini", "pick"))
|