Dupaja commited on
Commit
279f06b
·
1 Parent(s): ea60bf3

Added function to convert digits to written numbers, for handling with SpeechT5

Browse files
Files changed (1) hide show
  1. handler.py +31 -2
handler.py CHANGED
@@ -5,9 +5,35 @@ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5Hif
5
  from datasets import load_dataset
6
  import time
7
  import re
 
8
  from typing import Dict, List, Any
9
 
10
- #from tourtise utils
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def split_and_recombine_text(text, desired_length=200, max_length=300):
12
  """Split text it into chunks of a desired length trying to keep sentences intact."""
13
  # normalize text, remove redundant whitespace and convert non-ascii quotes to ascii
@@ -107,6 +133,8 @@ class EndpointHandler:
107
  given_text = data.get("inputs", "")
108
 
109
  start_time = time.time()
 
 
110
 
111
  texts = split_and_recombine_text(given_text)
112
  audios = []
@@ -115,7 +143,8 @@ class EndpointHandler:
115
  inputs = self.processor(text=t, return_tensors="pt")
116
  speech = self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)
117
 
118
- audios.append(speech.numpy())
 
119
 
120
 
121
  final_speech = np.concatenate(audios)
 
5
  from datasets import load_dataset
6
  import time
7
  import re
8
+ import inflect
9
  from typing import Dict, List, Any
10
 
11
+ def convert_numbers_to_text(input_string):
12
+ p = inflect.engine()
13
+ words = input_string.split()
14
+ new_words = []
15
+
16
+ for word in words:
17
+
18
+ if word.isdigit() and len(word) == 4: # Check for years (4-digit numbers)
19
+ year = int(word)
20
+ if year < 2000:
21
+ # Split the year into two parts
22
+ first_part = year // 100
23
+ second_part = year % 100
24
+ # Convert each part to words and combine
25
+ word = p.number_to_words(first_part) + " " + p.number_to_words(second_part)
26
+ elif year < 9999:
27
+ # Convert directly for year 2000 and beyond
28
+ word = p.number_to_words(year)
29
+ elif word.replace(',','').isdigit(): # Check for any other digits
30
+ word = word.replace(',','')
31
+ number = int(word)
32
+ word = p.number_to_words(number).replace(',', '')
33
+ new_words.append(word)
34
+
35
+ return ' '.join(new_words)
36
+
37
  def split_and_recombine_text(text, desired_length=200, max_length=300):
38
  """Split text it into chunks of a desired length trying to keep sentences intact."""
39
  # normalize text, remove redundant whitespace and convert non-ascii quotes to ascii
 
133
  given_text = data.get("inputs", "")
134
 
135
  start_time = time.time()
136
+
137
+ given_text = convert_numbers_to_text(given_text)
138
 
139
  texts = split_and_recombine_text(given_text)
140
  audios = []
 
143
  inputs = self.processor(text=t, return_tensors="pt")
144
  speech = self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)
145
 
146
+ audios.append(speech)
147
+ #audios.append(speech.numpy())
148
 
149
 
150
  final_speech = np.concatenate(audios)