Add handling for fractions and hopefully other number uses
Browse files- handler.py +39 -3
handler.py
CHANGED
@@ -8,14 +8,46 @@ import re
|
|
8 |
import inflect
|
9 |
from typing import Dict, List, Any
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def convert_numbers_to_text(input_string):
|
12 |
p = inflect.engine()
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
new_words = []
|
15 |
|
16 |
for word in words:
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
year = int(word)
|
20 |
if year < 2000:
|
21 |
# Split the year into two parts
|
@@ -30,7 +62,9 @@ def convert_numbers_to_text(input_string):
|
|
30 |
word = word.replace(',','')
|
31 |
number = int(word)
|
32 |
word = p.number_to_words(number).replace(',', '')
|
33 |
-
|
|
|
|
|
34 |
|
35 |
return ' '.join(new_words)
|
36 |
|
@@ -131,6 +165,8 @@ class EndpointHandler:
|
|
131 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
132 |
|
133 |
given_text = data.get("inputs", "")
|
|
|
|
|
134 |
|
135 |
start_time = time.time()
|
136 |
|
|
|
8 |
import inflect
|
9 |
from typing import Dict, List, Any
|
10 |
|
11 |
+
def contains_special_characters(s):
|
12 |
+
return bool(re.search(r'[π΅πΎπππ]', s))
|
13 |
+
|
14 |
+
def check_punctuation(s):
|
15 |
+
if s.endswith('.'):
|
16 |
+
return '.'
|
17 |
+
elif s.endswith(','):
|
18 |
+
return ','
|
19 |
+
elif s.endswith('!'):
|
20 |
+
return '!'
|
21 |
+
elif s.endswith('?'):
|
22 |
+
return '?'
|
23 |
+
else:
|
24 |
+
return ''
|
25 |
+
|
26 |
def convert_numbers_to_text(input_string):
|
27 |
p = inflect.engine()
|
28 |
+
new_string = input_string
|
29 |
+
|
30 |
+
# Find patterns like [6/7] or other number-character combinations
|
31 |
+
mixed_patterns = re.findall(r'\[?\b\d+[^)\] ]*\]?', new_string)
|
32 |
+
for pattern in mixed_patterns:
|
33 |
+
# Isolate numbers from other characters
|
34 |
+
numbers = re.findall(r'\d+', pattern)
|
35 |
+
# Replace numbers with words within the pattern
|
36 |
+
for number in numbers:
|
37 |
+
number_word = p.number_to_words(number)
|
38 |
+
pattern_with_words = re.sub(number_word, number, pattern, 1)
|
39 |
+
new_string = new_string.replace(pattern, pattern_with_words)
|
40 |
+
|
41 |
+
words = new_string.split()
|
42 |
new_words = []
|
43 |
|
44 |
for word in words:
|
45 |
|
46 |
+
punct = check_punctuation(word)
|
47 |
+
|
48 |
+
if contains_special_characters(word):
|
49 |
+
pass
|
50 |
+
elif word.isdigit() and len(word) == 4: # Check for years (4-digit numbers)
|
51 |
year = int(word)
|
52 |
if year < 2000:
|
53 |
# Split the year into two parts
|
|
|
62 |
word = word.replace(',','')
|
63 |
number = int(word)
|
64 |
word = p.number_to_words(number).replace(',', '')
|
65 |
+
|
66 |
+
|
67 |
+
new_words.append(word+punct)
|
68 |
|
69 |
return ' '.join(new_words)
|
70 |
|
|
|
165 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
166 |
|
167 |
given_text = data.get("inputs", "")
|
168 |
+
given_text = given_text.replace('&','and')
|
169 |
+
given_text = given_text.replace('-',' ')
|
170 |
|
171 |
start_time = time.time()
|
172 |
|