Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

Gregniuki commited on Nov 29, 2024

Commit

b38b379

verified ·

1 Parent(s): 2eb20e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -231,7 +231,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
         ref_text = ref_text + " "
 # Define weights for characters
-    punctuation_weights = {",": 0, ".": 0, " ": 0}  # Add more punctuation as needed
     for i, gen_text in enumerate(progress.tqdm(gen_text_batches)):
         # Prepare the text
@@ -383,9 +383,9 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
         aseg = non_silent_wave
         audio_duration = len(aseg)
-        if audio_duration > 8000:
-            gr.Warning("Audio is over 8s, clipping to only first 15s.")
-            aseg = aseg[:8000]
         aseg.export(f.name, format="wav")
         ref_audio = f.name
@@ -394,7 +394,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
         ref_text = pipe(
             ref_audio,
-            chunk_length_s=30,
             batch_size=128,
             generate_kwargs={"task": "transcribe"# ,"language": language_code  # Use the variable here
             },

         ref_text = ref_text + " "
 # Define weights for characters
+    punctuation_weights = {",": 2, ".": 3, " ": 1}  # Add more punctuation as needed
     for i, gen_text in enumerate(progress.tqdm(gen_text_batches)):
         # Prepare the text
         aseg = non_silent_wave
         audio_duration = len(aseg)
+        if audio_duration > 7500:
+            gr.Warning("Audio is over 8s, clipping to only first 8s.")
+            aseg = aseg[:7500]
         aseg.export(f.name, format="wav")
         ref_audio = f.name
         ref_text = pipe(
             ref_audio,
+            chunk_length_s=15,
             batch_size=128,
             generate_kwargs={"task": "transcribe"# ,"language": language_code  # Use the variable here
             },