VALLE

Runtime error

App Files Files Community

kevinwang676 commited on Aug 26, 2023

Commit

bd045fa

1 Parent(s): 3439fae

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -72

app.py CHANGED Viewed

@@ -160,7 +160,7 @@ def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
     # save as npz file
     np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
              audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
-    return message, os.path.join(tempfile.gettempdir(), f"{name}.npz")
 def make_prompt(name, wav, sr, save=True):
@@ -472,99 +472,64 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
 def main():
     app = gr.Blocks()
     with app:
-        gr.Markdown(top_md)
-        with gr.Tab("Infer from audio"):
-            gr.Markdown(infer_from_audio_md)
-            with gr.Row():
-                with gr.Column():
-                    textbox = gr.TextArea(label="Text",
-                                          placeholder="Type your sentence here",
-                                          value="Welcome back, Master. What can I do for you today?", elem_id=f"tts-input")
-                    language_dropdown = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語'], value='English', label='auto-detect')
-                    accent_dropdown = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent', label='accent')
-                    textbox_transcript = gr.TextArea(label="Transcript",
-                                          placeholder="Write transcript here. (leave empty to use whisper)",
-                                          value="", elem_id=f"prompt-name")
-                    upload_audio_prompt = gr.Audio(label='uploaded audio prompt', source='upload', interactive=True)
-                    record_audio_prompt = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
-                with gr.Column():
-                    text_output = gr.Textbox(label="Message")
-                    audio_output = gr.Audio(label="Output Audio", elem_id="tts-audio")
-                    btn = gr.Button("Generate!")
-                    btn.click(infer_from_audio,
-                              inputs=[textbox, language_dropdown, accent_dropdown, upload_audio_prompt, record_audio_prompt, textbox_transcript],
-                              outputs=[text_output, audio_output])
-                    textbox_mp = gr.TextArea(label="Prompt name",
-                                          placeholder="Name your prompt here",
-                                          value="prompt_1", elem_id=f"prompt-name")
-                    btn_mp = gr.Button("Make prompt!")
-                    prompt_output = gr.File(interactive=False)
-                    btn_mp.click(make_npz_prompt,
-                                inputs=[textbox_mp, upload_audio_prompt, record_audio_prompt, textbox_transcript],
-                                outputs=[text_output, prompt_output])
-        with gr.Tab("Make prompt"):
-            gr.Markdown(make_prompt_md)
             with gr.Row():
                 with gr.Column():
                     textbox2 = gr.TextArea(label="Prompt name",
                                           placeholder="Name your prompt here",
-                                          value="prompt_1", elem_id=f"prompt-name")
                     # 添加选择语言和输入台本的地方
                     textbox_transcript2 = gr.TextArea(label="Transcript",
                                           placeholder="Write transcript here. (leave empty to use whisper)",
-                                          value="", elem_id=f"prompt-name")
                     upload_audio_prompt_2 = gr.Audio(label='uploaded audio prompt', source='upload', interactive=True)
                     record_audio_prompt_2 = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
                 with gr.Column():
-                    text_output_2 = gr.Textbox(label="Message")
-                    prompt_output_2 = gr.File(interactive=False)
-                    btn_2 = gr.Button("Make!")
                     btn_2.click(make_npz_prompt,
                               inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
                               outputs=[text_output_2, prompt_output_2])
-        with gr.Tab("Infer from prompt"):
-            gr.Markdown(infer_from_prompt_md)
-            with gr.Row():
-                with gr.Column():
-                    textbox_3 = gr.TextArea(label="Text",
-                                          placeholder="Type your sentence here",
-                                          value="Welcome back, Master. What can I do for you today?", elem_id=f"tts-input")
-                    language_dropdown_3 = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語', 'Mix'], value='auto-detect',
-                                                    label='language')
-                    accent_dropdown_3 = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent',
-                                                  label='accent')
-                    preset_dropdown_3 = gr.Dropdown(choices=preset_list, value=None, label='Voice preset')
-                    prompt_file = gr.File(file_count='single', file_types=['.npz'], interactive=True)
-                with gr.Column():
-                    text_output_3 = gr.Textbox(label="Message")
-                    audio_output_3 = gr.Audio(label="Output Audio", elem_id="tts-audio")
-                    btn_3 = gr.Button("Generate!")
-                    btn_3.click(infer_from_prompt,
-                              inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
-                              outputs=[text_output_3, audio_output_3])
-        with gr.Tab("Infer long text"):
-            gr.Markdown("This is a long text generation demo. You can use this to generate long audio. ")
             with gr.Row():
                 with gr.Column():
-                    textbox_4 = gr.TextArea(label="Text",
-                                          placeholder="Type your sentence here",
                                           value=long_text_example, elem_id=f"tts-input")
                     language_dropdown_4 = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語'], value='auto-detect',
-                                                    label='language')
                     accent_dropdown_4 = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent',
-                                                    label='accent')
-                    preset_dropdown_4 = gr.Dropdown(choices=preset_list, value=None, label='Voice preset')
-                    prompt_file_4 = gr.File(file_count='single', file_types=['.npz'], interactive=True)
                 with gr.Column():
-                    text_output_4 = gr.TextArea(label="Message")
-                    audio_output_4 = gr.Audio(label="Output Audio", elem_id="tts-audio")
-                    btn_4 = gr.Button("Generate!")
                     btn_4.click(infer_long_text,
                               inputs=[textbox_4, preset_dropdown_4, prompt_file_4, language_dropdown_4, accent_dropdown_4],
                               outputs=[text_output_4, audio_output_4])
-    app.launch()
 if __name__ == "__main__":
     formatter = (

     # save as npz file
     np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
              audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
+    return "提取音色成功！", os.path.join(tempfile.gettempdir(), f"{name}.npz")
 def make_prompt(name, wav, sr, save=True):
 def main():
     app = gr.Blocks()
     with app:
+        gr.HTML("<center>"
+                "<h1>🌊💕🎶 - VALL-E X 3秒声音克隆，支持中日英三语</h1>"
+                "</center>")
+        gr.Markdown("### <center>⚡ - 只需3秒语音，快速复刻您喜欢的声音；Powered by [VALL-E-X](https://github.com/Plachtaa/VALL-E-X)</center>")
+        gr.Markdown("### <center>更多精彩应用，尽在[滔滔AI](http://www.talktalkai.com)；滔滔AI，为爱滔滔！💕</center>")
+        with gr.Tab("🎶 - 提取音色"):
+            gr.Markdown("请上传一段3~10秒的语音，并点击”提取音色“")
             with gr.Row():
                 with gr.Column():
                     textbox2 = gr.TextArea(label="Prompt name",
                                           placeholder="Name your prompt here",
+                                          value="prompt_1", elem_id=f"prompt-name", visible=False)
                     # 添加选择语言和输入台本的地方
                     textbox_transcript2 = gr.TextArea(label="Transcript",
                                           placeholder="Write transcript here. (leave empty to use whisper)",
+                                          value="", elem_id=f"prompt-name", visible=False)
                     upload_audio_prompt_2 = gr.Audio(label='uploaded audio prompt', source='upload', interactive=True)
                     record_audio_prompt_2 = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
                 with gr.Column():
+                    text_output_2 = gr.Textbox(label="音色提取进度")
+                    prompt_output_2 = gr.File(interactive=False, visible=False)
+                    btn_2 = gr.Button("提取音色", variant="primary")
                     btn_2.click(make_npz_prompt,
                               inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
                               outputs=[text_output_2, prompt_output_2])
+        with gr.Tab("💕 - 声音克隆"):
+            gr.Markdown("现在开始奇妙的声音克隆之旅吧！输入您想合成的文本后，点击”声音克隆“即可快速复刻喜欢的声音！")
             with gr.Row():
                 with gr.Column():
+                    textbox_4 = gr.TextArea(label="请输入您想合成的文本",
+                                          placeholder="说点什么吧...",
                                           value=long_text_example, elem_id=f"tts-input")
                     language_dropdown_4 = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語'], value='auto-detect',
+                                                    label='language', visible=False)
                     accent_dropdown_4 = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent',
+                                                    label='accent', visible=False)
+                    preset_dropdown_4 = gr.Dropdown(choices=preset_list, value=None, label='更多语音包', visible=False)
+                    prompt_file_4 = prompt_output_2
                 with gr.Column():
+                    text_output_4 = gr.TextArea(label="Message", visible=False)
+                    audio_output_4 = gr.Audio(label="为您合成的专属语音", elem_id="tts-audio")
+                    btn_4 = gr.Button("声音克隆", variant="primary")
                     btn_4.click(infer_long_text,
                               inputs=[textbox_4, preset_dropdown_4, prompt_file_4, language_dropdown_4, accent_dropdown_4],
                               outputs=[text_output_4, audio_output_4])
+        gr.Markdown("### <center>注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。</center>")
+        gr.Markdown("<center>🧸 - 如何使用此程序：在“提取音色”模块上传一段语音并提取音色之后，就可以在“声音克隆”模块一键克隆您喜欢的声音啦！</center>")
+        gr.HTML('''
+            <div class="footer">
+                        <p>🌊🏞️🎶 - 江水东流急，滔滔无尽声。 明·顾璘
+                        </p>
+            </div>
+        ''')
+    app.launch(show_error=True)
 if __name__ == "__main__":
     formatter = (