Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -84,10 +84,11 @@ def convert_wav_to_mp3(wav_file):
|
|
| 84 |
os.system(" ".join(command))
|
| 85 |
return output_path_mp3
|
| 86 |
|
| 87 |
-
def tts_generator(text,
|
| 88 |
-
global net_g
|
| 89 |
model_path = models[model]
|
| 90 |
net_g, _, _, _ = utils.load_checkpoint(model_path, net_g, None, skip_optimizer=True)
|
|
|
|
| 91 |
try:
|
| 92 |
with torch.no_grad():
|
| 93 |
audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker,model_dir=model)
|
|
@@ -111,10 +112,18 @@ if __name__ == "__main__":
|
|
| 111 |
_ = net_g.eval()
|
| 112 |
|
| 113 |
speaker_ids = hps.data.spk2id
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
with gr.Column():
|
| 119 |
|
| 120 |
gr.Markdown("测试用")
|
|
@@ -123,8 +132,8 @@ if __name__ == "__main__":
|
|
| 123 |
info="使用huggingface的免费CPU进行推理,因此速度不快,一次性不要输入超过500汉字")
|
| 124 |
model = gr.Radio(choices=list(models.keys()), value=list(models.keys())[0], label='音声模型')
|
| 125 |
#model = gr.Dropdown(choices=models,value=models[0], label='音声模型')
|
| 126 |
-
speaker = gr.Radio(choices=speakers, value=speakers[0], label='Speaker')
|
| 127 |
-
gr.Markdown(value="
|
| 128 |
sdp_ratio = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.01, label='语调变化')
|
| 129 |
noise_scale = gr.Slider(minimum=0.1, maximum=1.5, value=0.5, step=0.01, label='感情变化')
|
| 130 |
noise_scale_w = gr.Slider(minimum=0.1, maximum=1.4, value=0.9, step=0.01, label='音节长度')
|
|
@@ -139,7 +148,7 @@ if __name__ == "__main__":
|
|
| 139 |
""")
|
| 140 |
btn.click(
|
| 141 |
tts_generator,
|
| 142 |
-
inputs=[text,
|
| 143 |
outputs=[text_output, audio_output,MP3_output]
|
| 144 |
)
|
| 145 |
|
|
|
|
| 84 |
os.system(" ".join(command))
|
| 85 |
return output_path_mp3
|
| 86 |
|
| 87 |
+
def tts_generator(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, model):
|
| 88 |
+
global net_g,speakers
|
| 89 |
model_path = models[model]
|
| 90 |
net_g, _, _, _ = utils.load_checkpoint(model_path, net_g, None, skip_optimizer=True)
|
| 91 |
+
|
| 92 |
try:
|
| 93 |
with torch.no_grad():
|
| 94 |
audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker,model_dir=model)
|
|
|
|
| 112 |
_ = net_g.eval()
|
| 113 |
|
| 114 |
speaker_ids = hps.data.spk2id
|
| 115 |
+
speaker = list(speaker_ids.keys())[0]
|
| 116 |
+
|
| 117 |
+
css='''
|
| 118 |
+
#main {background-color: #ffffff;opacity: 0.8;background-image: repeating-linear-gradient(45deg, #edffe1 25%, transparent 25%, transparent 75%, #edffe1 75%, #edffe1), repeating-linear-gradient(45deg, #edffe1 25%, #ffffff 25%, #ffffff 75%, #edffe1 75%, #edffe1);
|
| 119 |
+
background-position: 0 0, 40px 40px;background-size: 80px 80px;}
|
| 120 |
+
#mainContainer {max-width: 700px; margin-left: auto; margin-right: auto;background-color:transparent;}
|
| 121 |
+
#btn {border: 2px solid #3ed6e500; margin-left: auto; margin-right: auto;background-color:#3ed6e500;border-radius: 5px;
|
| 122 |
+
:hover{color: #92ccd8; } }
|
| 123 |
+
'''
|
| 124 |
+
|
| 125 |
+
with gr.Blocks(css=css) as app:
|
| 126 |
+
with gr.Row(elem_id="main"):
|
| 127 |
with gr.Column():
|
| 128 |
|
| 129 |
gr.Markdown("测试用")
|
|
|
|
| 132 |
info="使用huggingface的免费CPU进行推理,因此速度不快,一次性不要输入超过500汉字")
|
| 133 |
model = gr.Radio(choices=list(models.keys()), value=list(models.keys())[0], label='音声模型')
|
| 134 |
#model = gr.Dropdown(choices=models,value=models[0], label='音声模型')
|
| 135 |
+
#speaker = gr.Radio(choices=speakers, value=speakers[0], label='Speaker')
|
| 136 |
+
gr.Markdown(value="生成参数")
|
| 137 |
sdp_ratio = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.01, label='语调变化')
|
| 138 |
noise_scale = gr.Slider(minimum=0.1, maximum=1.5, value=0.5, step=0.01, label='感情变化')
|
| 139 |
noise_scale_w = gr.Slider(minimum=0.1, maximum=1.4, value=0.9, step=0.01, label='音节长度')
|
|
|
|
| 148 |
""")
|
| 149 |
btn.click(
|
| 150 |
tts_generator,
|
| 151 |
+
inputs=[text, sdp_ratio, noise_scale, noise_scale_w, length_scale, model],
|
| 152 |
outputs=[text_output, audio_output,MP3_output]
|
| 153 |
)
|
| 154 |
|