Gregniuki commited on
Commit
dc16460
·
verified ·
1 Parent(s): 89a95a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -23
app.py CHANGED
@@ -137,11 +137,12 @@ E2TTS_ema_model2 = load_custom(
137
  )
138
 
139
 
140
- def chunk_text(text, max_chars=100):
 
141
  """
142
  Splits the input text into chunks, each with a maximum number of characters.
143
- If a chunk exceeds the character limit, it will split at a space after the limit is exceeded,
144
- but only if no punctuation mark is present at the split point.
145
 
146
  Args:
147
  text (str): The text to be split.
@@ -153,28 +154,19 @@ def chunk_text(text, max_chars=100):
153
  chunks = []
154
  current_chunk = ""
155
 
156
- # Split the text into sentences based on punctuation followed by whitespace
157
- sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[;:,。!?])", text)
158
 
159
- for sentence in sentences:
160
- # Check if adding this sentence exceeds the max_chars limit
161
- if len(current_chunk) + len(sentence) + 1 <= max_chars: # +1 for the space
162
- current_chunk += sentence + " "
163
  else:
164
- # If the chunk exceeds max_chars and no punctuation at the end, split at the last space
165
  if current_chunk:
166
- # Find the last space in the current chunk and split there
167
- split_index = current_chunk.rfind(" ")
168
- if split_index != -1:
169
- chunks.append(current_chunk[:split_index].strip())
170
- current_chunk = current_chunk[split_index:].strip() + sentence
171
- else:
172
- # If no space is found (unusual case), append the chunk as is
173
- chunks.append(current_chunk.strip())
174
- else:
175
- # If no chunk is being built, just append the sentence
176
- current_chunk = sentence + " "
177
-
178
  # Append any remaining text in current_chunk to chunks
179
  if current_chunk:
180
  chunks.append(current_chunk.strip())
@@ -183,6 +175,7 @@ def chunk_text(text, max_chars=100):
183
 
184
 
185
 
 
186
  @gpu_decorator
187
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
188
  if exp_name == "English":
@@ -383,7 +376,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
383
  # Use the new chunk_text function to split gen_text
384
  max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
385
  print(f"max chars: {max_chars} ")
386
- gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
387
  print('ref_text', ref_text)
388
  for i, batch_text in enumerate(gen_text_batches):
389
  print(f'gen_text {i}', batch_text)
 
137
  )
138
 
139
 
140
+
141
+ def chunk_text(text, max_chars=110):
142
  """
143
  Splits the input text into chunks, each with a maximum number of characters.
144
+ If a chunk exceeds the character limit and there is no punctuation at the end,
145
+ it will split at the last space.
146
 
147
  Args:
148
  text (str): The text to be split.
 
154
  chunks = []
155
  current_chunk = ""
156
 
157
+ # Split the text into words (instead of sentences) to handle cases with no punctuation.
158
+ words = text.split(" ")
159
 
160
+ for word in words:
161
+ # Check if adding this word exceeds the max_chars limit
162
+ if len(current_chunk) + len(word) + 1 <= max_chars: # +1 for the space
163
+ current_chunk += word + " "
164
  else:
165
+ # If the chunk exceeds max_chars, split at the last space
166
  if current_chunk:
167
+ chunks.append(current_chunk.strip()) # Add the chunk
168
+ current_chunk = word + " " # Start a new chunk with the current word
169
+
 
 
 
 
 
 
 
 
 
170
  # Append any remaining text in current_chunk to chunks
171
  if current_chunk:
172
  chunks.append(current_chunk.strip())
 
175
 
176
 
177
 
178
+
179
  @gpu_decorator
180
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
181
  if exp_name == "English":
 
376
  # Use the new chunk_text function to split gen_text
377
  max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
378
  print(f"max chars: {max_chars} ")
379
+ gen_text_batches = chunk_text(gen_text, max_chars=110)
380
  print('ref_text', ref_text)
381
  for i, batch_text in enumerate(gen_text_batches):
382
  print(f'gen_text {i}', batch_text)