Gregniuki commited on
Commit
f819e92
·
verified ·
1 Parent(s): 3d7bc1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -223,7 +223,7 @@ def text_to_ipa(text, language=language):
223
 
224
 
225
  @gpu_decorator
226
- def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress(), language=language):
227
  if exp_name == "Multi":
228
  ema_model = F5TTS_ema_model
229
  # elif exp_name == "Polish":
@@ -263,6 +263,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
263
  punctuation_weights = {",": 0, ".": 0, " ": 0} # Add more punctuation as needed
264
  progress = tqdm(gen_text_batches)
265
  ipa_text_ref = text_to_ipa(ref_text, language=language)
 
266
 
267
  for i, gen_text in enumerate(progress):
268
 
@@ -411,7 +412,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
411
  return (target_sample_rate, final_wave), spectrogram_path
412
 
413
  @gpu_decorator
414
- def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fade_duration=0.15, language=language # Set the desired language code dynamically
415
  ):
416
 
417
  print(gen_text)
@@ -469,7 +470,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
469
  print(f'gen_text {i}', batch_text)
470
 
471
  gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
472
- return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration, language)
473
 
474
 
475
  @gpu_decorator
@@ -495,7 +496,7 @@ def generate_podcast(script, speaker1_name, ref_audio1, ref_text1, speaker2_name
495
  continue # Skip if the speaker is neither speaker1 nor speaker2
496
 
497
  # Generate audio for this block
498
- audio, _ = infer(ref_audio, ref_text, text, exp_name, remove_silence, language)
499
 
500
  # Convert the generated audio to a numpy array
501
  sr, audio_data = audio
@@ -618,7 +619,7 @@ with gr.Blocks() as app_tts:
618
  model_choice,
619
  remove_silence,
620
  cross_fade_duration_slider,
621
- language_choice,
622
  ],
623
  outputs=[audio_output, spectrogram_output],
624
  )
@@ -824,7 +825,7 @@ with gr.Blocks() as app_emotional:
824
  ref_text = speech_types[current_emotion].get('ref_text', '')
825
 
826
  # Generate speech for this segment
827
- audio, _ = infer(ref_audio, ref_text, text, model_choice, remove_silence, language)
828
  sr, audio_data = audio
829
 
830
  # generated_audio_segments.append(audio_data)
 
223
 
224
 
225
  @gpu_decorator
226
+ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
227
  if exp_name == "Multi":
228
  ema_model = F5TTS_ema_model
229
  # elif exp_name == "Polish":
 
263
  punctuation_weights = {",": 0, ".": 0, " ": 0} # Add more punctuation as needed
264
  progress = tqdm(gen_text_batches)
265
  ipa_text_ref = text_to_ipa(ref_text, language=language)
266
+ print(language)
267
 
268
  for i, gen_text in enumerate(progress):
269
 
 
412
  return (target_sample_rate, final_wave), spectrogram_path
413
 
414
  @gpu_decorator
415
+ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fade_duration=0.15 # Set the desired language code dynamically
416
  ):
417
 
418
  print(gen_text)
 
470
  print(f'gen_text {i}', batch_text)
471
 
472
  gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
473
+ return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration)
474
 
475
 
476
  @gpu_decorator
 
496
  continue # Skip if the speaker is neither speaker1 nor speaker2
497
 
498
  # Generate audio for this block
499
+ audio, _ = infer(ref_audio, ref_text, text, exp_name, remove_silence)
500
 
501
  # Convert the generated audio to a numpy array
502
  sr, audio_data = audio
 
619
  model_choice,
620
  remove_silence,
621
  cross_fade_duration_slider,
622
+ # language_choice,
623
  ],
624
  outputs=[audio_output, spectrogram_output],
625
  )
 
825
  ref_text = speech_types[current_emotion].get('ref_text', '')
826
 
827
  # Generate speech for this segment
828
+ audio, _ = infer(ref_audio, ref_text, text, model_choice, remove_silence)
829
  sr, audio_data = audio
830
 
831
  # generated_audio_segments.append(audio_data)