Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -137,11 +137,12 @@ E2TTS_ema_model2 = load_custom(
|
|
137 |
)
|
138 |
|
139 |
|
140 |
-
|
|
|
141 |
"""
|
142 |
Splits the input text into chunks, each with a maximum number of characters.
|
143 |
-
If a chunk exceeds the character limit
|
144 |
-
|
145 |
|
146 |
Args:
|
147 |
text (str): The text to be split.
|
@@ -153,28 +154,19 @@ def chunk_text(text, max_chars=100):
|
|
153 |
chunks = []
|
154 |
current_chunk = ""
|
155 |
|
156 |
-
# Split the text into sentences
|
157 |
-
|
158 |
|
159 |
-
for
|
160 |
-
# Check if adding this
|
161 |
-
if len(current_chunk) + len(
|
162 |
-
current_chunk +=
|
163 |
else:
|
164 |
-
# If the chunk exceeds max_chars
|
165 |
if current_chunk:
|
166 |
-
#
|
167 |
-
|
168 |
-
|
169 |
-
chunks.append(current_chunk[:split_index].strip())
|
170 |
-
current_chunk = current_chunk[split_index:].strip() + sentence
|
171 |
-
else:
|
172 |
-
# If no space is found (unusual case), append the chunk as is
|
173 |
-
chunks.append(current_chunk.strip())
|
174 |
-
else:
|
175 |
-
# If no chunk is being built, just append the sentence
|
176 |
-
current_chunk = sentence + " "
|
177 |
-
|
178 |
# Append any remaining text in current_chunk to chunks
|
179 |
if current_chunk:
|
180 |
chunks.append(current_chunk.strip())
|
@@ -183,6 +175,7 @@ def chunk_text(text, max_chars=100):
|
|
183 |
|
184 |
|
185 |
|
|
|
186 |
@gpu_decorator
|
187 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
|
188 |
if exp_name == "English":
|
@@ -383,7 +376,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
383 |
# Use the new chunk_text function to split gen_text
|
384 |
max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
|
385 |
print(f"max chars: {max_chars} ")
|
386 |
-
gen_text_batches = chunk_text(gen_text, max_chars=
|
387 |
print('ref_text', ref_text)
|
388 |
for i, batch_text in enumerate(gen_text_batches):
|
389 |
print(f'gen_text {i}', batch_text)
|
|
|
137 |
)
|
138 |
|
139 |
|
140 |
+
|
141 |
+
def chunk_text(text, max_chars=110):
|
142 |
"""
|
143 |
Splits the input text into chunks, each with a maximum number of characters.
|
144 |
+
If a chunk exceeds the character limit and there is no punctuation at the end,
|
145 |
+
it will split at the last space.
|
146 |
|
147 |
Args:
|
148 |
text (str): The text to be split.
|
|
|
154 |
chunks = []
|
155 |
current_chunk = ""
|
156 |
|
157 |
+
# Split the text into words (instead of sentences) to handle cases with no punctuation.
|
158 |
+
words = text.split(" ")
|
159 |
|
160 |
+
for word in words:
|
161 |
+
# Check if adding this word exceeds the max_chars limit
|
162 |
+
if len(current_chunk) + len(word) + 1 <= max_chars: # +1 for the space
|
163 |
+
current_chunk += word + " "
|
164 |
else:
|
165 |
+
# If the chunk exceeds max_chars, split at the last space
|
166 |
if current_chunk:
|
167 |
+
chunks.append(current_chunk.strip()) # Add the chunk
|
168 |
+
current_chunk = word + " " # Start a new chunk with the current word
|
169 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
# Append any remaining text in current_chunk to chunks
|
171 |
if current_chunk:
|
172 |
chunks.append(current_chunk.strip())
|
|
|
175 |
|
176 |
|
177 |
|
178 |
+
|
179 |
@gpu_decorator
|
180 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
|
181 |
if exp_name == "English":
|
|
|
376 |
# Use the new chunk_text function to split gen_text
|
377 |
max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
|
378 |
print(f"max chars: {max_chars} ")
|
379 |
+
gen_text_batches = chunk_text(gen_text, max_chars=110)
|
380 |
print('ref_text', ref_text)
|
381 |
for i, batch_text in enumerate(gen_text_batches):
|
382 |
print(f'gen_text {i}', batch_text)
|