Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

App Files Files Community

Gregniuki commited on Nov 28, 2024

Commit

dc16460

verified ·

1 Parent(s): 89a95a4

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -23

app.py CHANGED Viewed

@@ -137,11 +137,12 @@ E2TTS_ema_model2 = load_custom(
 )
-def chunk_text(text, max_chars=100):
     """
     Splits the input text into chunks, each with a maximum number of characters.
-    If a chunk exceeds the character limit, it will split at a space after the limit is exceeded,
-    but only if no punctuation mark is present at the split point.
     Args:
         text (str): The text to be split.
@@ -153,28 +154,19 @@ def chunk_text(text, max_chars=100):
     chunks = []
     current_chunk = ""
-    # Split the text into sentences based on punctuation followed by whitespace
-    sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
-    for sentence in sentences:
-        # Check if adding this sentence exceeds the max_chars limit
-        if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for the space
-            current_chunk += sentence + " "
         else:
-            # If the chunk exceeds max_chars and no punctuation at the end, split at the last space
             if current_chunk:
-                # Find the last space in the current chunk and split there
-                split_index = current_chunk.rfind(" ")
-                if split_index != -1:
-                    chunks.append(current_chunk[:split_index].strip())
-                    current_chunk = current_chunk[split_index:].strip() + sentence
-                else:
-                    # If no space is found (unusual case), append the chunk as is
-                    chunks.append(current_chunk.strip())
-            else:
-                # If no chunk is being built, just append the sentence
-                current_chunk = sentence + " "
     # Append any remaining text in current_chunk to chunks
     if current_chunk:
         chunks.append(current_chunk.strip())
@@ -183,6 +175,7 @@ def chunk_text(text, max_chars=100):
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":
@@ -383,7 +376,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
     # Use the new chunk_text function to split gen_text
     max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
     print(f"max chars: {max_chars} ")
-    gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
     print('ref_text', ref_text)
     for i, batch_text in enumerate(gen_text_batches):
         print(f'gen_text {i}', batch_text)

 )
+def chunk_text(text, max_chars=110):
     """
     Splits the input text into chunks, each with a maximum number of characters.
+    If a chunk exceeds the character limit and there is no punctuation at the end,
+    it will split at the last space.
     Args:
         text (str): The text to be split.
     chunks = []
     current_chunk = ""
+    # Split the text into words (instead of sentences) to handle cases with no punctuation.
+    words = text.split(" ")
+    for word in words:
+        # Check if adding this word exceeds the max_chars limit
+        if len(current_chunk) + len(word) + 1 <= max_chars:  # +1 for the space
+            current_chunk += word + " "
         else:
+            # If the chunk exceeds max_chars, split at the last space
             if current_chunk:
+                chunks.append(current_chunk.strip())  # Add the chunk
+            current_chunk = word + " "  # Start a new chunk with the current word
     # Append any remaining text in current_chunk to chunks
     if current_chunk:
         chunks.append(current_chunk.strip())
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":
     # Use the new chunk_text function to split gen_text
     max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
     print(f"max chars: {max_chars} ")
+    gen_text_batches = chunk_text(gen_text, max_chars=110)
     print('ref_text', ref_text)
     for i, batch_text in enumerate(gen_text_batches):
         print(f'gen_text {i}', batch_text)