Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

App Files Files Community

Gregniuki commited on Nov 28, 2024

Commit

5c1fc8b

verified ·

1 Parent(s): dc16460

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -14

app.py CHANGED Viewed

@@ -138,34 +138,43 @@ E2TTS_ema_model2 = load_custom(
-def chunk_text(text, max_chars=110):
     """
     Splits the input text into chunks, each with a maximum number of characters.
-    If a chunk exceeds the character limit and there is no punctuation at the end,
-    it will split at the last space.
     Args:
         text (str): The text to be split.
-        max_chars (int): The maximum number of characters per chunk.
     Returns:
         List[str]: A list of text chunks.
     """
     chunks = []
     current_chunk = ""
-    # Split the text into words (instead of sentences) to handle cases with no punctuation.
-    words = text.split(" ")
-    for word in words:
-        # Check if adding this word exceeds the max_chars limit
-        if len(current_chunk) + len(word) + 1 <= max_chars:  # +1 for the space
-            current_chunk += word + " "
         else:
-            # If the chunk exceeds max_chars, split at the last space
             if current_chunk:
-                chunks.append(current_chunk.strip())  # Add the chunk
-            current_chunk = word + " "  # Start a new chunk with the current word
     # Append any remaining text in current_chunk to chunks
     if current_chunk:
@@ -175,7 +184,6 @@ def chunk_text(text, max_chars=110):
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":

+def chunk_text(text, max_chars=100):
     """
     Splits the input text into chunks, each with a maximum number of characters.
+    Splits occur after punctuation marks or after a space when no punctuation is present
+    and the chunk exceeds `split_after_space_chars`.
     Args:
         text (str): The text to be split.
+        max_chars (int): The maximum number of characters per chunk after punctuation.
+        split_after_space_chars (int): The maximum number of characters per chunk when no punctuation is present.
     Returns:
         List[str]: A list of text chunks.
     """
     chunks = []
     current_chunk = ""
+    split_after_space_chars = 135
+    # Split the text into sentences based on punctuation followed by whitespace
+    sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
+    for sentence in sentences:
+        # If adding this sentence does not exceed max_chars, add it to the current chunk
+        if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for space
+            current_chunk += sentence + " "
         else:
+            # Add the current chunk if it's over the max_chars limit
             if current_chunk:
+                chunks.append(current_chunk.strip())
+            current_chunk = sentence + " "
+        # If current chunk exceeds split_after_space_chars and no punctuation, split at space
+        if len(current_chunk) > split_after_space_chars and re.search(r"\w", current_chunk):
+            split_index = current_chunk.rfind(" ")
+            if split_index != -1:
+                chunks.append(current_chunk[:split_index].strip())  # Add the chunk before the space
+                current_chunk = current_chunk[split_index:].strip()  # Start new chunk after the space
     # Append any remaining text in current_chunk to chunks
     if current_chunk:
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":