Gregniuki commited on
Commit
5c1fc8b
·
verified ·
1 Parent(s): dc16460

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -138,34 +138,43 @@ E2TTS_ema_model2 = load_custom(
138
 
139
 
140
 
141
- def chunk_text(text, max_chars=110):
142
  """
143
  Splits the input text into chunks, each with a maximum number of characters.
144
- If a chunk exceeds the character limit and there is no punctuation at the end,
145
- it will split at the last space.
146
 
147
  Args:
148
  text (str): The text to be split.
149
- max_chars (int): The maximum number of characters per chunk.
 
150
 
151
  Returns:
152
  List[str]: A list of text chunks.
153
  """
154
  chunks = []
155
  current_chunk = ""
 
156
 
157
- # Split the text into words (instead of sentences) to handle cases with no punctuation.
158
- words = text.split(" ")
159
 
160
- for word in words:
161
- # Check if adding this word exceeds the max_chars limit
162
- if len(current_chunk) + len(word) + 1 <= max_chars: # +1 for the space
163
- current_chunk += word + " "
164
  else:
165
- # If the chunk exceeds max_chars, split at the last space
166
  if current_chunk:
167
- chunks.append(current_chunk.strip()) # Add the chunk
168
- current_chunk = word + " " # Start a new chunk with the current word
 
 
 
 
 
 
 
169
 
170
  # Append any remaining text in current_chunk to chunks
171
  if current_chunk:
@@ -175,7 +184,6 @@ def chunk_text(text, max_chars=110):
175
 
176
 
177
 
178
-
179
  @gpu_decorator
180
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
181
  if exp_name == "English":
 
138
 
139
 
140
 
141
+ def chunk_text(text, max_chars=100):
142
  """
143
  Splits the input text into chunks, each with a maximum number of characters.
144
+ Splits occur after punctuation marks or after a space when no punctuation is present
145
+ and the chunk exceeds `split_after_space_chars`.
146
 
147
  Args:
148
  text (str): The text to be split.
149
+ max_chars (int): The maximum number of characters per chunk after punctuation.
150
+ split_after_space_chars (int): The maximum number of characters per chunk when no punctuation is present.
151
 
152
  Returns:
153
  List[str]: A list of text chunks.
154
  """
155
  chunks = []
156
  current_chunk = ""
157
+ split_after_space_chars = 135
158
 
159
+ # Split the text into sentences based on punctuation followed by whitespace
160
+ sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[;:,。!?])", text)
161
 
162
+ for sentence in sentences:
163
+ # If adding this sentence does not exceed max_chars, add it to the current chunk
164
+ if len(current_chunk) + len(sentence) + 1 <= max_chars: # +1 for space
165
+ current_chunk += sentence + " "
166
  else:
167
+ # Add the current chunk if it's over the max_chars limit
168
  if current_chunk:
169
+ chunks.append(current_chunk.strip())
170
+ current_chunk = sentence + " "
171
+
172
+ # If current chunk exceeds split_after_space_chars and no punctuation, split at space
173
+ if len(current_chunk) > split_after_space_chars and re.search(r"\w", current_chunk):
174
+ split_index = current_chunk.rfind(" ")
175
+ if split_index != -1:
176
+ chunks.append(current_chunk[:split_index].strip()) # Add the chunk before the space
177
+ current_chunk = current_chunk[split_index:].strip() # Start new chunk after the space
178
 
179
  # Append any remaining text in current_chunk to chunks
180
  if current_chunk:
 
184
 
185
 
186
 
 
187
  @gpu_decorator
188
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
189
  if exp_name == "English":