kevinwang676 commited on
Commit
bd045fa
·
1 Parent(s): 3439fae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -72
app.py CHANGED
@@ -160,7 +160,7 @@ def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
160
  # save as npz file
161
  np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
162
  audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
163
- return message, os.path.join(tempfile.gettempdir(), f"{name}.npz")
164
 
165
 
166
  def make_prompt(name, wav, sr, save=True):
@@ -472,99 +472,64 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
472
  def main():
473
  app = gr.Blocks()
474
  with app:
475
- gr.Markdown(top_md)
476
- with gr.Tab("Infer from audio"):
477
- gr.Markdown(infer_from_audio_md)
478
- with gr.Row():
479
- with gr.Column():
480
-
481
- textbox = gr.TextArea(label="Text",
482
- placeholder="Type your sentence here",
483
- value="Welcome back, Master. What can I do for you today?", elem_id=f"tts-input")
484
- language_dropdown = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語'], value='English', label='auto-detect')
485
- accent_dropdown = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent', label='accent')
486
- textbox_transcript = gr.TextArea(label="Transcript",
487
- placeholder="Write transcript here. (leave empty to use whisper)",
488
- value="", elem_id=f"prompt-name")
489
- upload_audio_prompt = gr.Audio(label='uploaded audio prompt', source='upload', interactive=True)
490
- record_audio_prompt = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
491
- with gr.Column():
492
- text_output = gr.Textbox(label="Message")
493
- audio_output = gr.Audio(label="Output Audio", elem_id="tts-audio")
494
- btn = gr.Button("Generate!")
495
- btn.click(infer_from_audio,
496
- inputs=[textbox, language_dropdown, accent_dropdown, upload_audio_prompt, record_audio_prompt, textbox_transcript],
497
- outputs=[text_output, audio_output])
498
- textbox_mp = gr.TextArea(label="Prompt name",
499
- placeholder="Name your prompt here",
500
- value="prompt_1", elem_id=f"prompt-name")
501
- btn_mp = gr.Button("Make prompt!")
502
- prompt_output = gr.File(interactive=False)
503
- btn_mp.click(make_npz_prompt,
504
- inputs=[textbox_mp, upload_audio_prompt, record_audio_prompt, textbox_transcript],
505
- outputs=[text_output, prompt_output])
506
- with gr.Tab("Make prompt"):
507
- gr.Markdown(make_prompt_md)
508
  with gr.Row():
509
  with gr.Column():
510
  textbox2 = gr.TextArea(label="Prompt name",
511
  placeholder="Name your prompt here",
512
- value="prompt_1", elem_id=f"prompt-name")
513
  # 添加选择语言和输入台本的地方
514
  textbox_transcript2 = gr.TextArea(label="Transcript",
515
  placeholder="Write transcript here. (leave empty to use whisper)",
516
- value="", elem_id=f"prompt-name")
517
  upload_audio_prompt_2 = gr.Audio(label='uploaded audio prompt', source='upload', interactive=True)
518
  record_audio_prompt_2 = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
519
  with gr.Column():
520
- text_output_2 = gr.Textbox(label="Message")
521
- prompt_output_2 = gr.File(interactive=False)
522
- btn_2 = gr.Button("Make!")
523
  btn_2.click(make_npz_prompt,
524
  inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
525
  outputs=[text_output_2, prompt_output_2])
526
- with gr.Tab("Infer from prompt"):
527
- gr.Markdown(infer_from_prompt_md)
528
- with gr.Row():
529
- with gr.Column():
530
- textbox_3 = gr.TextArea(label="Text",
531
- placeholder="Type your sentence here",
532
- value="Welcome back, Master. What can I do for you today?", elem_id=f"tts-input")
533
- language_dropdown_3 = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語', 'Mix'], value='auto-detect',
534
- label='language')
535
- accent_dropdown_3 = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent',
536
- label='accent')
537
- preset_dropdown_3 = gr.Dropdown(choices=preset_list, value=None, label='Voice preset')
538
- prompt_file = gr.File(file_count='single', file_types=['.npz'], interactive=True)
539
- with gr.Column():
540
- text_output_3 = gr.Textbox(label="Message")
541
- audio_output_3 = gr.Audio(label="Output Audio", elem_id="tts-audio")
542
- btn_3 = gr.Button("Generate!")
543
- btn_3.click(infer_from_prompt,
544
- inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
545
- outputs=[text_output_3, audio_output_3])
546
- with gr.Tab("Infer long text"):
547
- gr.Markdown("This is a long text generation demo. You can use this to generate long audio. ")
548
  with gr.Row():
549
  with gr.Column():
550
- textbox_4 = gr.TextArea(label="Text",
551
- placeholder="Type your sentence here",
552
  value=long_text_example, elem_id=f"tts-input")
553
  language_dropdown_4 = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語'], value='auto-detect',
554
- label='language')
555
  accent_dropdown_4 = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent',
556
- label='accent')
557
- preset_dropdown_4 = gr.Dropdown(choices=preset_list, value=None, label='Voice preset')
558
- prompt_file_4 = gr.File(file_count='single', file_types=['.npz'], interactive=True)
559
  with gr.Column():
560
- text_output_4 = gr.TextArea(label="Message")
561
- audio_output_4 = gr.Audio(label="Output Audio", elem_id="tts-audio")
562
- btn_4 = gr.Button("Generate!")
563
  btn_4.click(infer_long_text,
564
  inputs=[textbox_4, preset_dropdown_4, prompt_file_4, language_dropdown_4, accent_dropdown_4],
565
  outputs=[text_output_4, audio_output_4])
566
-
567
- app.launch()
 
 
 
 
 
 
 
 
568
 
569
  if __name__ == "__main__":
570
  formatter = (
 
160
  # save as npz file
161
  np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
162
  audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
163
+ return "提取音色成功!", os.path.join(tempfile.gettempdir(), f"{name}.npz")
164
 
165
 
166
  def make_prompt(name, wav, sr, save=True):
 
472
  def main():
473
  app = gr.Blocks()
474
  with app:
475
+ gr.HTML("<center>"
476
+ "<h1>🌊💕🎶 - VALL-E X 3秒声音克隆,支持中日英三语</h1>"
477
+ "</center>")
478
+ gr.Markdown("### <center>⚡ - 只需3秒语音,快速复刻您喜欢的声音;Powered by [VALL-E-X](https://github.com/Plachtaa/VALL-E-X)</center>")
479
+ gr.Markdown("### <center>更多精彩应用,尽在[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
480
+
481
+
482
+ with gr.Tab("🎶 - 提取音色"):
483
+ gr.Markdown("请上传一段3~10秒的语音,并点击”提取音色“")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  with gr.Row():
485
  with gr.Column():
486
  textbox2 = gr.TextArea(label="Prompt name",
487
  placeholder="Name your prompt here",
488
+ value="prompt_1", elem_id=f"prompt-name", visible=False)
489
  # 添加选择语言和输入台本的地方
490
  textbox_transcript2 = gr.TextArea(label="Transcript",
491
  placeholder="Write transcript here. (leave empty to use whisper)",
492
+ value="", elem_id=f"prompt-name", visible=False)
493
  upload_audio_prompt_2 = gr.Audio(label='uploaded audio prompt', source='upload', interactive=True)
494
  record_audio_prompt_2 = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
495
  with gr.Column():
496
+ text_output_2 = gr.Textbox(label="音色提取进度")
497
+ prompt_output_2 = gr.File(interactive=False, visible=False)
498
+ btn_2 = gr.Button("提取音色", variant="primary")
499
  btn_2.click(make_npz_prompt,
500
  inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
501
  outputs=[text_output_2, prompt_output_2])
502
+
503
+ with gr.Tab("💕 - 声音克隆"):
504
+ gr.Markdown("现在开始奇妙的声音克隆之旅吧!输入您想合成的文本后,点击”声音克隆“即可快速复刻喜欢的声音!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  with gr.Row():
506
  with gr.Column():
507
+ textbox_4 = gr.TextArea(label="请输入您想合成的文本",
508
+ placeholder="说点什么吧...",
509
  value=long_text_example, elem_id=f"tts-input")
510
  language_dropdown_4 = gr.Dropdown(choices=['auto-detect', 'English', '中文', '日本語'], value='auto-detect',
511
+ label='language', visible=False)
512
  accent_dropdown_4 = gr.Dropdown(choices=['no-accent', 'English', '中文', '日本語'], value='no-accent',
513
+ label='accent', visible=False)
514
+ preset_dropdown_4 = gr.Dropdown(choices=preset_list, value=None, label='更多语音包', visible=False)
515
+ prompt_file_4 = prompt_output_2
516
  with gr.Column():
517
+ text_output_4 = gr.TextArea(label="Message", visible=False)
518
+ audio_output_4 = gr.Audio(label="为您合成的专属语音", elem_id="tts-audio")
519
+ btn_4 = gr.Button("声音克隆", variant="primary")
520
  btn_4.click(infer_long_text,
521
  inputs=[textbox_4, preset_dropdown_4, prompt_file_4, language_dropdown_4, accent_dropdown_4],
522
  outputs=[text_output_4, audio_output_4])
523
+
524
+ gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
525
+ gr.Markdown("<center>🧸 - 如何使用此程序:在“提取音色”模块上传一段语音并提取音色之后,就可以在“声音克隆”模块一键克隆您喜欢的声音啦!</center>")
526
+ gr.HTML('''
527
+ <div class="footer">
528
+ <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
529
+ </p>
530
+ </div>
531
+ ''')
532
+ app.launch(show_error=True)
533
 
534
  if __name__ == "__main__":
535
  formatter = (