Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -138,34 +138,43 @@ E2TTS_ema_model2 = load_custom(
|
|
138 |
|
139 |
|
140 |
|
141 |
-
def chunk_text(text, max_chars=
|
142 |
"""
|
143 |
Splits the input text into chunks, each with a maximum number of characters.
|
144 |
-
|
145 |
-
|
146 |
|
147 |
Args:
|
148 |
text (str): The text to be split.
|
149 |
-
max_chars (int): The maximum number of characters per chunk.
|
|
|
150 |
|
151 |
Returns:
|
152 |
List[str]: A list of text chunks.
|
153 |
"""
|
154 |
chunks = []
|
155 |
current_chunk = ""
|
|
|
156 |
|
157 |
-
# Split the text into
|
158 |
-
|
159 |
|
160 |
-
for
|
161 |
-
#
|
162 |
-
if len(current_chunk) + len(
|
163 |
-
current_chunk +=
|
164 |
else:
|
165 |
-
#
|
166 |
if current_chunk:
|
167 |
-
chunks.append(current_chunk.strip())
|
168 |
-
current_chunk =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
# Append any remaining text in current_chunk to chunks
|
171 |
if current_chunk:
|
@@ -175,7 +184,6 @@ def chunk_text(text, max_chars=110):
|
|
175 |
|
176 |
|
177 |
|
178 |
-
|
179 |
@gpu_decorator
|
180 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
|
181 |
if exp_name == "English":
|
|
|
138 |
|
139 |
|
140 |
|
141 |
+
def chunk_text(text, max_chars=100):
|
142 |
"""
|
143 |
Splits the input text into chunks, each with a maximum number of characters.
|
144 |
+
Splits occur after punctuation marks or after a space when no punctuation is present
|
145 |
+
and the chunk exceeds `split_after_space_chars`.
|
146 |
|
147 |
Args:
|
148 |
text (str): The text to be split.
|
149 |
+
max_chars (int): The maximum number of characters per chunk after punctuation.
|
150 |
+
split_after_space_chars (int): The maximum number of characters per chunk when no punctuation is present.
|
151 |
|
152 |
Returns:
|
153 |
List[str]: A list of text chunks.
|
154 |
"""
|
155 |
chunks = []
|
156 |
current_chunk = ""
|
157 |
+
split_after_space_chars = 135
|
158 |
|
159 |
+
# Split the text into sentences based on punctuation followed by whitespace
|
160 |
+
sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[;:,。!?])", text)
|
161 |
|
162 |
+
for sentence in sentences:
|
163 |
+
# If adding this sentence does not exceed max_chars, add it to the current chunk
|
164 |
+
if len(current_chunk) + len(sentence) + 1 <= max_chars: # +1 for space
|
165 |
+
current_chunk += sentence + " "
|
166 |
else:
|
167 |
+
# Add the current chunk if it's over the max_chars limit
|
168 |
if current_chunk:
|
169 |
+
chunks.append(current_chunk.strip())
|
170 |
+
current_chunk = sentence + " "
|
171 |
+
|
172 |
+
# If current chunk exceeds split_after_space_chars and no punctuation, split at space
|
173 |
+
if len(current_chunk) > split_after_space_chars and re.search(r"\w", current_chunk):
|
174 |
+
split_index = current_chunk.rfind(" ")
|
175 |
+
if split_index != -1:
|
176 |
+
chunks.append(current_chunk[:split_index].strip()) # Add the chunk before the space
|
177 |
+
current_chunk = current_chunk[split_index:].strip() # Start new chunk after the space
|
178 |
|
179 |
# Append any remaining text in current_chunk to chunks
|
180 |
if current_chunk:
|
|
|
184 |
|
185 |
|
186 |
|
|
|
187 |
@gpu_decorator
|
188 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
|
189 |
if exp_name == "English":
|