Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

App Files Files Community

Gregniuki commited on Nov 28, 2024

Commit

f28f6b8

verified ·

1 Parent(s): adac44f

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -20

app.py CHANGED Viewed

@@ -141,9 +141,10 @@ E2TTS_ema_model2 = load_custom(
 def chunk_text(text, max_chars=100):
     """
-    Splits the input text into chunks, each with a maximum number of characters.
-    Splits occur after punctuation marks or after a space when no punctuation is present
-    and the chunk exceeds `split_after_space_chars`.
     Args:
         text (str): The text to be split.
@@ -161,31 +162,32 @@ def chunk_text(text, max_chars=100):
     sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
     for sentence in sentences:
-        # If adding this sentence does not exceed max_chars, add it to the current chunk
         if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for space
             current_chunk += sentence + " "
         else:
-            # Add the current chunk if it's over the max_chars limit
             if current_chunk:
                 chunks.append(current_chunk.strip())
             current_chunk = sentence + " "
-        # If current chunk exceeds split_after_space_chars and no punctuation, split at space
-        if len(current_chunk) > split_after_space_chars:
-            # Check if the chunk has punctuation
-            if not re.search(r"[;:,.!?；：，。！？]", current_chunk):
-                # No punctuation; split after the last space before 135 characters
-                split_index = current_chunk.rfind(" ", 0, split_after_space_chars)
-                if split_index != -1:
-                    chunks.append(current_chunk[:split_index].strip())  # Add the chunk before the space
-                    current_chunk = current_chunk[split_index:].strip()  # Start new chunk after the space
-            else:
-                # Chunk contains punctuation; just add it to chunks
-                if current_chunk:
-                    chunks.append(current_chunk.strip())
-                current_chunk = ""
-    # Append any remaining text in current_chunk to chunks
     if current_chunk:
         chunks.append(current_chunk.strip())
@@ -194,6 +196,7 @@ def chunk_text(text, max_chars=100):
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":

 def chunk_text(text, max_chars=100):
     """
+    Splits the input text into chunks, ensuring:
+    - Chunks are split by punctuation where possible.
+    - If no punctuation is found and the chunk exceeds `split_after_space_chars`,
+      it is split into smaller chunks of up to `split_after_space_chars`.
     Args:
         text (str): The text to be split.
     sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
     for sentence in sentences:
+        # If adding this sentence doesn't exceed max_chars, append it to the current chunk
         if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for space
             current_chunk += sentence + " "
         else:
+            # If current chunk exceeds split_after_space_chars, handle the splitting
+            while len(current_chunk) > split_after_space_chars:
+                split_index = current_chunk.rfind(" ", 0, split_after_space_chars)
+                if split_index == -1:  # No spaces to split; force split at 135 characters
+                    split_index = split_after_space_chars
+                chunks.append(current_chunk[:split_index].strip())
+                current_chunk = current_chunk[split_index:].strip()
+            # Add the current chunk to the list and start a new chunk
             if current_chunk:
                 chunks.append(current_chunk.strip())
             current_chunk = sentence + " "
+    # If the remaining chunk exceeds split_after_space_chars, split it further
+    while len(current_chunk) > split_after_space_chars:
+        split_index = current_chunk.rfind(" ", 0, split_after_space_chars)
+        if split_index == -1:  # No spaces to split; force split at 135 characters
+            split_index = split_after_space_chars
+        chunks.append(current_chunk[:split_index].strip())
+        current_chunk = current_chunk[split_index:].strip()
+    # Add any leftover chunk
     if current_chunk:
         chunks.append(current_chunk.strip())
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":