Jimmy Vu commited on
Commit
0210dff
·
1 Parent(s): 7c7451a

Update split_sentence

Browse files
Files changed (2) hide show
  1. gradio_app.py +3 -4
  2. utils/sentence.py +6 -2
gradio_app.py CHANGED
@@ -235,8 +235,7 @@ def inference(input_text, language, speaker_id=None, gpt_cond_latent=None, speak
235
  sentence = normalize_vietnamese_text(sentence)
236
  text_tokens = torch.IntTensor(xtts_model.tokenizer.encode(sentence, lang=lang)).unsqueeze(0).to(xtts_model.device)
237
  num_of_tokens += text_tokens.shape[-1]
238
- txts = [sentence]
239
- if len(sentence) >= max_text_length: txts = split_sentence(sentence)
240
  for txt in txts:
241
  logger.info(f"[{lang}] {txt}")
242
  try:
@@ -250,11 +249,11 @@ def inference(input_text, language, speaker_id=None, gpt_cond_latent=None, speak
250
  top_k=top_k,
251
  repetition_penalty=repetition_penalty,
252
  length_penalty=dynamic_length_penalty(len(sentence)),
253
- enable_text_splitting=True,
254
  )
255
  out_wavs.append(out["wav"])
256
  except Exception as e:
257
- logger.error(f"Error processing text: {txt} - {e}")
258
  return np.concatenate(out_wavs), num_of_tokens
259
 
260
 
 
235
  sentence = normalize_vietnamese_text(sentence)
236
  text_tokens = torch.IntTensor(xtts_model.tokenizer.encode(sentence, lang=lang)).unsqueeze(0).to(xtts_model.device)
237
  num_of_tokens += text_tokens.shape[-1]
238
+ txts = split_sentence(sentence, max_text_length=max_text_length)
 
239
  for txt in txts:
240
  logger.info(f"[{lang}] {txt}")
241
  try:
 
249
  top_k=top_k,
250
  repetition_penalty=repetition_penalty,
251
  length_penalty=dynamic_length_penalty(len(sentence)),
252
+ enable_text_splitting=False,
253
  )
254
  out_wavs.append(out["wav"])
255
  except Exception as e:
256
+ logger.error(f"Error processing text: {e}")
257
  return np.concatenate(out_wavs), num_of_tokens
258
 
259
 
utils/sentence.py CHANGED
@@ -1,5 +1,5 @@
1
 
2
- def split_sentence(sentence, delimiters=",;-!?"):
3
  """
4
  Splits a sentence into two halves, prioritizing the delimiter closest to the middle.
5
  If no delimiter is found, it ensures words are not split in the middle.
@@ -11,6 +11,9 @@ def split_sentence(sentence, delimiters=",;-!?"):
11
  Returns:
12
  tuple: A tuple containing the two halves of the sentence.
13
  """
 
 
 
14
  # Find all delimiter indices in the sentence
15
  delimiter_indices = [i for i, char in enumerate(sentence) if char in delimiters]
16
 
@@ -50,7 +53,8 @@ def split_sentence(sentence, delimiters=",;-!?"):
50
  first_half = sentence[:split_index].strip()
51
  second_half = sentence[split_index:].strip()
52
 
53
- return first_half, second_half
 
54
 
55
 
56
  def merge_sentences(sentences):
 
1
 
2
+ def split_sentence(sentence, max_text_length=180, delimiters=",;-!?"):
3
  """
4
  Splits a sentence into two halves, prioritizing the delimiter closest to the middle.
5
  If no delimiter is found, it ensures words are not split in the middle.
 
11
  Returns:
12
  tuple: A tuple containing the two halves of the sentence.
13
  """
14
+ if len(sentence) < max_text_length:
15
+ return [sentence]
16
+
17
  # Find all delimiter indices in the sentence
18
  delimiter_indices = [i for i, char in enumerate(sentence) if char in delimiters]
19
 
 
53
  first_half = sentence[:split_index].strip()
54
  second_half = sentence[split_index:].strip()
55
 
56
+ return split_sentence(first_half, max_text_length=max_text_length) \
57
+ + split_sentence(second_half, max_text_length=max_text_length)
58
 
59
 
60
  def merge_sentences(sentences):