txya900619 commited on
Commit
d476512
·
1 Parent(s): 787f71f

feat: enhance UI/UX and fix custom_speaker_ref_text_input empty bug

Browse files
Files changed (2) hide show
  1. DEMO.md +3 -2
  2. app.py +66 -24
DEMO.md CHANGED
@@ -3,5 +3,6 @@
3
  ILRDF Formosan Text-To-Speech System
4
 
5
  \
6
- 本系統為初步開發成果的試用版本,仍處於**測試階段**。**合成結果可能在發音、語調或流暢度存在不盡理想之處,甚至可能出現錯誤**。
7
- 我們誠摯邀請您試用本系統,並請務必謹慎**檢視合成結果**,切勿直接作為正式或關鍵資訊使用,感謝您的理解與支持。
 
 
3
  ILRDF Formosan Text-To-Speech System
4
 
5
  \
6
+ 這是「將文字轉換為聲音」的系統,請按照下方步驟操作,或查看操作手冊及操作影片。
7
+ 本系統為初步開發成果的測試版,**合成結果可能於拼寫、斷句處有不盡理想之處,甚至可能出現錯誤**。
8
+ 試用時請務必**謹慎檢視合成結果**,切勿直接作為正式或關鍵資訊使用,感謝您的理解與支持,並請不吝留下系統回報與建議。
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import tempfile
2
  from importlib.resources import files
3
 
@@ -123,6 +124,8 @@ examples_config = OmegaConf.to_object(OmegaConf.load("configs/examples.yaml"))
123
 
124
  DEFAULT_MODEL_ID = list(models_config.keys())[0]
125
 
 
 
126
 
127
  @gpu_decorator
128
  def infer(
@@ -219,19 +222,21 @@ with demo:
219
  with gr.Tab("預設配音員"):
220
  with gr.Row():
221
  with gr.Column():
222
- default_speaker_language = gr.Dropdown(
223
- choices=g2p_object.keys(),
224
- label="步驟一:選擇語言",
225
- value="阿美_秀姑巒",
 
226
  )
227
 
228
- def get_refs_by_language(language: str):
229
- return [r for r in refs_config.keys() if r.startswith(language)]
230
 
231
  default_speaker_refs = gr.Dropdown(
232
- choices=get_refs_by_language(default_speaker_language.value),
233
  label="步驟二:選擇配音員",
234
- value=get_refs_by_language(default_speaker_language.value)[0],
 
235
  )
236
 
237
  default_speaker_gen_text_input = gr.Textbox(
@@ -244,26 +249,46 @@ with demo:
244
  )
245
 
246
  with gr.Column():
247
- default_speaker_audio_output = gr.Audio(label="合成結果")
 
 
248
 
249
  with gr.Tab("自己當配音員"):
250
  with gr.Row():
251
  with gr.Column():
 
 
 
 
 
 
 
252
  custom_speaker_language = gr.Dropdown(
253
- choices=g2p_object.keys(),
254
- label="步驟一:選擇語言",
255
- value="阿美_秀姑巒",
 
 
 
 
 
 
 
 
 
256
  )
257
 
258
  custom_speaker_ref_text_input = gr.Textbox(
259
- value=refs_config.get(f"{custom_speaker_language.value}_1", {}).get(
260
- "text", ""
261
- ),
 
262
  label="步驟二:點選🎙️錄製下方句子,或上傳與句子相符的音檔",
263
  )
264
 
265
  custom_speaker_audio_input = gr.Audio(
266
  type="filepath",
 
267
  waveform_options=gr.WaveformOptions(
268
  sample_rate=24000,
269
  ),
@@ -280,23 +305,25 @@ with demo:
280
  )
281
 
282
  with gr.Column():
283
- custom_speaker_audio_output = gr.Audio(label="合成結果")
 
 
284
 
285
- default_speaker_language.change(
286
- lambda lang: gr.Dropdown(
287
- choices=get_refs_by_language(lang),
288
- value=get_refs_by_language(lang)[0],
289
  ),
290
- inputs=[default_speaker_language],
291
  outputs=[default_speaker_refs],
292
  )
293
 
294
  @gpu_decorator
295
  def default_speaker_tts(
296
- language: str,
297
  ref: str,
298
  gen_text_input: str,
299
  ):
 
300
  ref_text_input = refs_config[ref]["text"]
301
  ref_audio_input = refs_config[ref]["wav"]
302
 
@@ -304,6 +331,9 @@ with demo:
304
  if len(gen_text_input) == 0:
305
  raise gr.Error("請勿輸入空字串。")
306
 
 
 
 
307
  ignore_punctuation = False
308
  ipa_with_ng = False
309
 
@@ -325,16 +355,25 @@ with demo:
325
  default_speaker_generate_btn.click(
326
  default_speaker_tts,
327
  inputs=[
328
- default_speaker_language,
329
  default_speaker_refs,
330
  default_speaker_gen_text_input,
331
  ],
332
  outputs=[default_speaker_audio_output],
333
  )
334
 
 
 
 
 
 
 
 
 
 
 
335
  custom_speaker_language.change(
336
  lambda lang: gr.Textbox(
337
- value=refs_config.get(f"{lang}_1", {}).get("text", ""),
338
  ),
339
  inputs=[custom_speaker_language],
340
  outputs=[custom_speaker_ref_text_input],
@@ -358,6 +397,9 @@ with demo:
358
  ignore_punctuation = False
359
  ipa_with_ng = False
360
 
 
 
 
361
  ref_text_input = text_to_ipa(
362
  ref_text_input, language, ignore_punctuation, ipa_with_ng
363
  )
 
1
+ import re
2
  import tempfile
3
  from importlib.resources import files
4
 
 
124
 
125
  DEFAULT_MODEL_ID = list(models_config.keys())[0]
126
 
127
+ ETHNICITIES = list(set([k.split("_")[0] for k in g2p_object.keys()]))
128
+
129
 
130
  @gpu_decorator
131
  def infer(
 
222
  with gr.Tab("預設配音員"):
223
  with gr.Row():
224
  with gr.Column():
225
+ default_speaker_ethnicity = gr.Dropdown(
226
+ choices=ETHNICITIES,
227
+ label="步驟一:選擇族別",
228
+ value="阿美",
229
+ filterable=False,
230
  )
231
 
232
+ def get_refs_by_perfix(prefix: str):
233
+ return [r for r in refs_config.keys() if r.startswith(prefix)]
234
 
235
  default_speaker_refs = gr.Dropdown(
236
+ choices=get_refs_by_perfix(default_speaker_ethnicity.value),
237
  label="步驟二:選擇配音員",
238
+ value=get_refs_by_perfix(default_speaker_ethnicity.value)[0],
239
+ filterable=False,
240
  )
241
 
242
  default_speaker_gen_text_input = gr.Textbox(
 
249
  )
250
 
251
  with gr.Column():
252
+ default_speaker_audio_output = gr.Audio(
253
+ label="合成結果", show_share_button=False, show_download_button=True
254
+ )
255
 
256
  with gr.Tab("自己當配音員"):
257
  with gr.Row():
258
  with gr.Column():
259
+ custom_speaker_ethnicity = gr.Dropdown(
260
+ choices=ETHNICITIES,
261
+ label="步驟一:選擇族別與語別",
262
+ value="阿美",
263
+ filterable=False,
264
+ )
265
+
266
  custom_speaker_language = gr.Dropdown(
267
+ choices=[
268
+ k
269
+ for k in g2p_object.keys()
270
+ if k.startswith(custom_speaker_ethnicity.value)
271
+ ],
272
+ value=[
273
+ k
274
+ for k in g2p_object.keys()
275
+ if k.startswith(custom_speaker_ethnicity.value)
276
+ ][0],
277
+ filterable=False,
278
+ show_label=False,
279
  )
280
 
281
  custom_speaker_ref_text_input = gr.Textbox(
282
+ value=refs_config[
283
+ get_refs_by_perfix(custom_speaker_language.value)[0]
284
+ ]["text"],
285
+ interactive=False,
286
  label="步驟二:點選🎙️錄製下方句子,或上傳與句子相符的音檔",
287
  )
288
 
289
  custom_speaker_audio_input = gr.Audio(
290
  type="filepath",
291
+ sources=["microphone", "upload"],
292
  waveform_options=gr.WaveformOptions(
293
  sample_rate=24000,
294
  ),
 
305
  )
306
 
307
  with gr.Column():
308
+ custom_speaker_audio_output = gr.Audio(
309
+ label="合成結果", show_share_button=False, show_download_button=True
310
+ )
311
 
312
+ default_speaker_ethnicity.change(
313
+ lambda ethnicity: gr.Dropdown(
314
+ choices=get_refs_by_perfix(ethnicity),
315
+ value=get_refs_by_perfix(ethnicity)[0],
316
  ),
317
+ inputs=[default_speaker_ethnicity],
318
  outputs=[default_speaker_refs],
319
  )
320
 
321
  @gpu_decorator
322
  def default_speaker_tts(
 
323
  ref: str,
324
  gen_text_input: str,
325
  ):
326
+ language = re.sub(r"_[男女]聲[12]", "", ref)
327
  ref_text_input = refs_config[ref]["text"]
328
  ref_audio_input = refs_config[ref]["wav"]
329
 
 
331
  if len(gen_text_input) == 0:
332
  raise gr.Error("請勿輸入空字串。")
333
 
334
+ if gen_text_input[-1] not in [".", "?", "!", ",", ";", ":"]:
335
+ gen_text_input += "."
336
+
337
  ignore_punctuation = False
338
  ipa_with_ng = False
339
 
 
355
  default_speaker_generate_btn.click(
356
  default_speaker_tts,
357
  inputs=[
 
358
  default_speaker_refs,
359
  default_speaker_gen_text_input,
360
  ],
361
  outputs=[default_speaker_audio_output],
362
  )
363
 
364
+ custom_speaker_ethnicity.change(
365
+ lambda ethnicity: gr.Dropdown(
366
+ choices=[k for k in g2p_object.keys() if k.startswith(ethnicity)],
367
+ value=[k for k in g2p_object.keys() if k.startswith(ethnicity)][0],
368
+ visible=len([k for k in g2p_object.keys() if k.startswith(ethnicity)]) > 1,
369
+ ),
370
+ inputs=[custom_speaker_ethnicity],
371
+ outputs=[custom_speaker_language],
372
+ )
373
+
374
  custom_speaker_language.change(
375
  lambda lang: gr.Textbox(
376
+ value=refs_config[get_refs_by_perfix(lang)[0]]["text"],
377
  ),
378
  inputs=[custom_speaker_language],
379
  outputs=[custom_speaker_ref_text_input],
 
397
  ignore_punctuation = False
398
  ipa_with_ng = False
399
 
400
+ if gen_text_input[-1] not in [".", "?", "!", ",", ";", ":"]:
401
+ gen_text_input += "."
402
+
403
  ref_text_input = text_to_ipa(
404
  ref_text_input, language, ignore_punctuation, ipa_with_ng
405
  )