Spaces:

cdactvm
/

Tamil_ASR_Demo

Sleeping

App Files Files Community

cdactvm commited on Jan 1

Commit

0655b26

verified ·

1 Parent(s): c298401

Update text2int.py

Browse files

Files changed (1) hide show

text2int.py +89 -200

text2int.py CHANGED Viewed

@@ -1,200 +1,89 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[ ]:
-# # Function to convert Hindi text to numerical representation
-# from isNumber import is_number
-# def text_to_int (textnum, numwords={}):
-#     units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
-#             'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
-#             'sixteen', 'seventeen', 'eighteen', 'nineteen']
-#     tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
-#     scales = ['hundred', 'thousand', 'lac','million', 'billion', 'trillion']
-#     ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
-#     ordinal_endings = [('ieth', 'y'), ('th', '')]
-#     if not numwords:
-#         numwords['and'] = (1, 0)
-#         for idx, word in enumerate(units): numwords[word] = (1, idx)
-#         for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
-#         for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
-#     textnum = textnum.replace('-', ' ')
-#     current = result = 0
-#     curstring = ''
-#     onnumber = False
-#     lastunit = False
-#     lastscale = False
-#     def is_numword(x):
-#         if is_number(x):
-#             return True
-#         if word in numwords:
-#             return True
-#         return False
-#     def from_numword(x):
-#         if is_number(x):
-#             scale = 0
-#             increment = int(x.replace(',', ''))
-#             return scale, increment
-#         return numwords[x]
-#     for word in textnum.split():
-#         if word in ordinal_words:
-#             scale, increment = (1, ordinal_words[word])
-#             current = current * scale + increment
-#             if scale > 100:
-#                 result += current
-#                 current = 0
-#             onnumber = True
-#             lastunit = False
-#             lastscale = False
-#         else:
-#             for ending, replacement in ordinal_endings:
-#                 if word.endswith(ending):
-#                     word = "%s%s" % (word[:-len(ending)], replacement)
-#             if (not is_numword(word)) or (word == 'and' and not lastscale):
-#                 if onnumber:
-#                     # Flush the current number we are building
-#                     curstring += repr(result + current) + " "
-#                 curstring += word + " "
-#                 result = current = 0
-#                 onnumber = False
-#                 lastunit = False
-#                 lastscale = False
-#             else:
-#                 scale, increment = from_numword(word)
-#                 onnumber = True
-#                 if lastunit and (word not in scales):
-#                     # Assume this is part of a string of individual numbers to
-#                     # be flushed, such as a zipcode "one two three four five"
-#                     curstring += repr(result + current)
-#                     result = current = 0
-#                 if scale > 1:
-#                     current = max(1, current)
-#                 current = current * scale + increment
-#                 if scale > 100:
-#                     result += current
-#                     current = 0
-#                 lastscale = False
-#                 lastunit = False
-#                 if word in scales:
-#                     lastscale = True
-#                 elif word in units:
-#                     lastunit = True
-#     if onnumber:
-#         curstring += repr(result + current)
-#     return curstring
-# In[3]:
-import nbimporter
-from isNumber import is_number  # Remove or replace this if unnecessary
-def text_to_int(textnum, numwords={}):
-    # Define units, tens, and scales including "lac"
-    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
-            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
-            'sixteen', 'seventeen', 'eighteen', 'nineteen']
-    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
-    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # "lac" added
-    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
-    ordinal_endings = [('ieth', 'y'), ('th', '')]
-    if not numwords:
-        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
-        # Add units, tens, and scales to numwords
-        for idx, word in enumerate(units):
-            numwords[word] = (1, idx)
-        for idx, word in enumerate(tens):
-            numwords[word] = (1, idx * 10)
-        for idx, word in enumerate(scales):
-            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle "lac" as 10^5
-    # Remove hyphens and normalize input
-    textnum = textnum.replace('-', ' ')
-    current = result = 0
-    curstring = ''
-    onnumber = False
-    lastunit = False
-    lastscale = False
-    def is_numword(x):
-        return is_number(x) or x in numwords
-    def from_numword(x):
-        if is_number(x):
-            return 0, int(x.replace(',', ''))
-        return numwords[x]
-    for word in textnum.split():
-        if word in ordinal_words:
-            scale, increment = (1, ordinal_words[word])
-            current = current * scale + increment
-            if scale > 100:
-                result += current
-                current = 0
-            onnumber = True
-            lastunit = False
-            lastscale = False
-        else:
-            for ending, replacement in ordinal_endings:
-                if word.endswith(ending):
-                    word = f"{word[:-len(ending)]}{replacement}"
-            if not is_numword(word) or (word == 'and' and not lastscale):
-                if onnumber:
-                    curstring += repr(result + current) + " "
-                curstring += word + " "
-                result = current = 0
-                onnumber = False
-                lastunit = False
-                lastscale = False
-            else:
-                scale, increment = from_numword(word)
-                onnumber = True
-                if lastunit and word not in scales:
-                    curstring += repr(result + current) + " "
-                    result = current = 0
-                if scale > 1:
-                    current = max(1, current)
-                current = current * scale + increment
-                if scale >= 100:
-                    result += current
-                    current = 0
-                lastscale = word in scales
-                lastunit = word in units
-    if onnumber:
-        curstring += repr(result + current)
-    return curstring.strip()
-# In[ ]:

+import nbimporter
+from isNumber import is_number  # Remove or replace this if unnecessary
+def text_to_int(textnum, numwords={}):
+    # Define units, tens, and scales including "lac"
+    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
+            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
+            'sixteen', 'seventeen', 'eighteen', 'nineteen']
+    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
+    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # "lac" added
+    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
+    ordinal_endings = [('ieth', 'y'), ('th', '')]
+    if not numwords:
+        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
+        # Add units, tens, and scales to numwords
+        for idx, word in enumerate(units):
+            numwords[word] = (1, idx)
+        for idx, word in enumerate(tens):
+            numwords[word] = (1, idx * 10)
+        for idx, word in enumerate(scales):
+            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle "lac" as 10^5
+    # Remove hyphens and normalize input
+    textnum = textnum.replace('-', ' ')
+    current = result = 0
+    curstring = ''
+    onnumber = False
+    lastunit = False
+    lastscale = False
+    def is_numword(x):
+        return is_number(x) or x in numwords
+    def from_numword(x):
+        if is_number(x):
+            return 0, int(x.replace(',', ''))
+        return numwords[x]
+    for word in textnum.split():
+        if word in ordinal_words:
+            scale, increment = (1, ordinal_words[word])
+            current = current * scale + increment
+            if scale > 100:
+                result += current
+                current = 0
+            onnumber = True
+            lastunit = False
+            lastscale = False
+        else:
+            for ending, replacement in ordinal_endings:
+                if word.endswith(ending):
+                    word = f"{word[:-len(ending)]}{replacement}"
+            if not is_numword(word) or (word == 'and' and not lastscale):
+                if onnumber:
+                    curstring += repr(result + current) + " "
+                curstring += word + " "
+                result = current = 0
+                onnumber = False
+                lastunit = False
+                lastscale = False
+            else:
+                scale, increment = from_numword(word)
+                onnumber = True
+                if lastunit and word not in scales:
+                    curstring += repr(result + current) + " "
+                    result = current = 0
+                if scale > 1:
+                    current = max(1, current)
+                current = current * scale + increment
+                if scale >= 100:
+                    result += current
+                    current = 0
+                lastscale = word in scales
+                lastunit = word in units
+    if onnumber:
+        curstring += repr(result + current)
+    return curstring.strip()