Dupaja commited on
Commit
5f8cefd
Β·
1 Parent(s): e641a54

Add handling for fractions and hopefully other number uses

Browse files
Files changed (1) hide show
  1. handler.py +39 -3
handler.py CHANGED
@@ -8,14 +8,46 @@ import re
8
  import inflect
9
  from typing import Dict, List, Any
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def convert_numbers_to_text(input_string):
12
  p = inflect.engine()
13
- words = input_string.split()
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  new_words = []
15
 
16
  for word in words:
17
 
18
- if word.isdigit() and len(word) == 4: # Check for years (4-digit numbers)
 
 
 
 
19
  year = int(word)
20
  if year < 2000:
21
  # Split the year into two parts
@@ -30,7 +62,9 @@ def convert_numbers_to_text(input_string):
30
  word = word.replace(',','')
31
  number = int(word)
32
  word = p.number_to_words(number).replace(',', '')
33
- new_words.append(word)
 
 
34
 
35
  return ' '.join(new_words)
36
 
@@ -131,6 +165,8 @@ class EndpointHandler:
131
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
132
 
133
  given_text = data.get("inputs", "")
 
 
134
 
135
  start_time = time.time()
136
 
 
8
  import inflect
9
  from typing import Dict, List, Any
10
 
11
+ def contains_special_characters(s):
12
+ return bool(re.search(r'[π“΅π–Ύπ“žπšŸπ”Ÿ]', s))
13
+
14
+ def check_punctuation(s):
15
+ if s.endswith('.'):
16
+ return '.'
17
+ elif s.endswith(','):
18
+ return ','
19
+ elif s.endswith('!'):
20
+ return '!'
21
+ elif s.endswith('?'):
22
+ return '?'
23
+ else:
24
+ return ''
25
+
26
  def convert_numbers_to_text(input_string):
27
  p = inflect.engine()
28
+ new_string = input_string
29
+
30
+ # Find patterns like [6/7] or other number-character combinations
31
+ mixed_patterns = re.findall(r'\[?\b\d+[^)\] ]*\]?', new_string)
32
+ for pattern in mixed_patterns:
33
+ # Isolate numbers from other characters
34
+ numbers = re.findall(r'\d+', pattern)
35
+ # Replace numbers with words within the pattern
36
+ for number in numbers:
37
+ number_word = p.number_to_words(number)
38
+ pattern_with_words = re.sub(number_word, number, pattern, 1)
39
+ new_string = new_string.replace(pattern, pattern_with_words)
40
+
41
+ words = new_string.split()
42
  new_words = []
43
 
44
  for word in words:
45
 
46
+ punct = check_punctuation(word)
47
+
48
+ if contains_special_characters(word):
49
+ pass
50
+ elif word.isdigit() and len(word) == 4: # Check for years (4-digit numbers)
51
  year = int(word)
52
  if year < 2000:
53
  # Split the year into two parts
 
62
  word = word.replace(',','')
63
  number = int(word)
64
  word = p.number_to_words(number).replace(',', '')
65
+
66
+
67
+ new_words.append(word+punct)
68
 
69
  return ' '.join(new_words)
70
 
 
165
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
166
 
167
  given_text = data.get("inputs", "")
168
+ given_text = given_text.replace('&','and')
169
+ given_text = given_text.replace('-',' ')
170
 
171
  start_time = time.time()
172