Spaces:

cdactvm
/

Punjabi_ASR_Demo

Sleeping

App Files Files Community

cdactvm commited on Jan 24

Commit

4088cbb

verified ·

1 Parent(s): 223e61e

Upload 6 files

Browse files

Files changed (6) hide show

Text2List.py +91 -0
convert2list.py +55 -0
isNumber.py +22 -0
processDoubles.py +31 -0
replaceWords.py +137 -0
text2int.py +102 -0

Text2List.py ADDED Viewed

	@@ -0,0 +1,91 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+def text_to_list():
+    text_list=[
+    # Punjabi script for English numbers (11-19)
+    'ਏਲੈਵਨ', 'ਟਵੈਲਵ', 'ਥਰਟੀਨ', 'ਫੋਰਟੀਨ', 'ਫਿਫਟੀਨ', 'ਸਿਕਸਟੀਨ', 'ਸੈਵਨਟੀਨ', 'ਏਟੀਨ', 'ਨਾਈਨਟੀਨ',
+    # Punjabi numbers (11-19)
+    'ਗਿਆਰਹ', 'ਬਾਰਾਂ', 'ਤੇਹਰਾਂ', 'ਚੌਦਾਂਹ', 'ਪੰਦਰਾਂ', 'ਸੋਲਾਹ', 'ਸਤਾਰਾਂ', 'ਅਠਾਰਾਂ', 'ਉੱਨ੍ਹੀ',
+    # Punjabi script for English multiples of ten (20, 30, ..., 90)
+    'ਟਵੈਂਟੀ', 'ਥਰਟੀ', 'ਫੋਰਟੀ', 'ਫਿਫਟੀ', 'ਸਿਕਸਟੀ', 'ਸੇਵੰਟੀ', 'ਏਟੀ'  , 'ਨਾਈਂਟੀ',
+    # Punjabi multiples of ten (20, 30, ..., 90)
+    'ਵੀਹ', 'ਤੀਹ', 'ਚਾਲੀ', 'ਪੰਜਾਹ', 'ਸੱਠ', 'ਸਤੱਰ', 'ਅੱਸੀ', 'ਨੱਬੇ',
+    # Punjabi script for English combinations of 21-29
+    'ਟਵੈਂਟੀ ਵਨ', 'ਟਵੈਂਟੀ ਟੂ', 'ਟਵੈਂਟੀ ਥ੍ਰੀ', 'ਟਵੈਂਟੀ ਫੋਰ', 'ਟਵੈਂਟੀ ਫਾਈਵ', 'ਟਵੈਂਟੀ ਸਿਕਸ', 'ਟਵੈਂਟੀ ਸੇਵਨ', 'ਟਵੈਂਟੀ ਏਟ', 'ਟਵੈਂਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 21-29
+    'ਇੱਕੀ', 'ਬਾਈ', 'ਤੇਈ', 'ਚੌਵੀ', 'ਪੱਚੀ', 'ਛੱਬੀ', 'ਸਤਾਈ', 'ਅਠਾਈ', 'ਉਂਣਤੀ',
+    # Punjabi script for English combinations of 31-39
+    'ਥਰਟੀ ਵਨ', 'ਥਰਟੀ ਟੂ', 'ਥਰਟੀ ਥ੍ਰੀ', 'ਥਰਟੀ ਫੋਰ', 'ਥਰਟੀ ਫਾਈਵ', 'ਥਰਟੀ ਸਿਕਸ', 'ਥਰਟੀ ਸੇਵਨ', 'ਥਰਟੀ ਏਟ', 'ਥਰਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 31-39
+    'ਇਕੱਤੀ', 'ਬੱਤੀ', 'ਤੇਂਤੀ', 'ਚੋਨਤੀ' , 'ਪੈਂਤੀ', 'ਛੱਤੀ', 'ਸੈਂਤੀ', 'ਅਠੱਤੀ' , 'ਉਨਤਾਲੀ',
+    # Punjabi script for English combinations of 41-49
+    'ਫੋਰਟੀ ਵਨ', 'ਫੋਰਟੀ ਟੂ', 'ਫੋਰਟੀ ਥ੍ਰੀ', 'ਫੋਰਟੀ ਫੋਰ', 'ਫੋਰਟੀ ਫਾਈਵ', 'ਫੋਰਟੀ ਸਿਕਸ', 'ਫੋਰਟੀ ਸੇਵਨ', 'ਫੋਰਟੀ ਏਟ', 'ਫੋਰਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 41-49
+    'ਇਕਤਾਲੀ', 'ਬਿਆਲੀ', 'ਤਰਤਾਲੀ' , 'ਚੋਤਾਲੀ', 'ਪੰਤਾਲੀ', 'ਛਿਆਲੀ', 'ਸੰਤਾਲੀ', 'ਅੜਤਾਲੀ' , 'ਉਣੰਜਾ',
+    # Punjabi script for English combinations of 51-59
+    'ਫਿਫਟੀ ਵਨ', 'ਫਿਫਟੀ ਟੂ', 'ਫਿਫਟੀ ਥ੍ਰੀ', 'ਫਿਫਟੀ ਫੋਰ', 'ਫਿਫਟੀ ਫਾਈਵ', 'ਫਿਫਟੀ ਸਿਕਸ', 'ਫਿਫਟੀ ਸੇਵਨ', 'ਫਿਫਟੀ ਏਟ', 'ਫਿਫਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 51-59
+    'ਅਕਵੰਜਾ', 'ਬਵੰਜਾ', 'ਤਰਵੰਜਾ', 'ਚੁਰੰਜਾ', 'ਪਚਵੰਜਾ' , 'ਛਪੰਜਾ' , 'ਸਤਵੰਜਾ' , 'ਅਠਵੰਜਾ' , 'ਉਣਹਾਟ' ,
+    # Punjabi script for English combinations of 61-69
+    'ਸਿਕਸਟੀ ਵਨ', 'ਸਿਕਸਟੀ ਟੂ', 'ਸਿਕਸਟੀ ਥ੍ਰੀ', 'ਸਿਕਸਟੀ ਫੋਰ', 'ਸਿਕਸਟੀ ਫਾਈਵ', 'ਸਿਕਸਟੀ ਸਿਕਸ', 'ਸਿਕਸਟੀ ਸੇਵਨ', 'ਸਿਕਸਟੀ ਏਟ', 'ਸਿਕਸਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 61-69
+    'ਇਕਹਾਟ' , 'ਬਾਹਟ', 'ਤ੍ਰੇਹਟ' , 'ਚੋਹਟ', 'ਪਹਿਨਟ' , 'ਛੇਹਾਟ' , 'ਸਤਾਹਟ' , 'ਅਠਾਹਠ' , 'ਉਂਣਹਤਰ' ,
+    # Punjabi script for English combinations of 71-79
+    'ਸੇਵੰਟੀ ਵਨ', 'ਸੇਵੰਟੀ ਟੂ', 'ਸੇਵੰਟੀ ਥ੍ਰੀ', 'ਸੇਵੰਟੀ ਫੋਰ', 'ਸੇਵੰਟੀ ਫਾਈਵ', 'ਸੇਵੰਟੀ ਸਿਕਸ', 'ਸੇਵੰਟੀ ਸੇਵਨ', 'ਸੇਵੰਟੀ ਏਟ', 'ਸੇਵੰਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 71-79
+    'ਇਕਹੱਤਰ' , 'ਬਹੱਤਰ', 'ਤਿਹੱਤਰ', 'ਚੌਹੱਤਰ', 'ਪਚਹੱਤਰ', 'ਛਿਹੱਤਰ', 'ਸੱਤਹੱਤਰ', 'ਅਠਹੱਤਰ', 'ਉਣਾਸੀ',
+    # Punjabi script for English combinations of 81-89
+    'ਏਟੀ ਵਨ', 'ਏਟੀ ਟੂ', 'ਏਟੀ ਥ੍ਰੀ', 'ਏਟੀ ਫੋਰ', 'ਏਟੀ ਫਾਈਵ', 'ਏਟੀ ਸਿਕਸ', 'ਏਟੀ ਸੇਵਨ', 'ਏਟੀ ਏਟ', 'ਏਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 81-89
+    'ਇੱਕਿਆਸੀ', 'ਬਿਆਸੀ', 'ਤਰਾਸੀ', 'ਚੌਰਾਸੀ', 'ਪਚਾਸੀ', 'ਛਿਆਸੀ', 'ਸਤਾਸੀ', 'ਅਠਾਸੀ', 'ਉਣੰਨਵੇਂ' ,
+    # Punjabi script for English combinations of 91-99
+    'ਨਾਈਂਟੀ ਵਨ', 'ਨਾਈਂਟੀ ਟੂ', 'ਨਾਈਂਟੀ ਥ੍ਰੀ', 'ਨਾਈਂਟੀ ਫੋਰ', 'ਨਾਈਂਟੀ ਫਾਈਵ', 'ਨਾਈਂਟੀ ਸਿਕਸ', 'ਨਾਈਂਟੀ ਸੇਵਨ', 'ਨਾਈਂਟੀ ਏਟ', 'ਨਾਈਂਟੀ ਨਾਈਨ',
+    # Punjabi combinations of 91-99
+    'ਇੱਕਿਆਨਵੇ', 'ਬਨਵੇਂ' , 'ਤੇਰਾਨਵੇਂ' , 'ਚੌਰਨਵੇ', 'ਪਚੰਨਵੇਂ' , 'ਛਿਆਨਵੇ' , 'ਸਤੰਨਵੇ' , 'ਅਠੰਨਵੇ' , 'ਨੜ੍ਹੀਨਵੇਂ',
+    # Punjabi script for English numbers (0-10)
+    'ਜ਼ੀਰੋ', 'ਵਨ', 'ਟੂ', 'ਥ੍ਰੀ', 'ਫੋਰ', 'ਫਾਈਵ', 'ਸਿਕਸ', 'ਸੇਵਨ', 'ਏਟ', 'ਨਾਈਨ', 'ਟੈਨ',
+    # Punjabi numbers (0-10)
+    'ਸਿਫ਼ਰ', 'ਇੱਕ', 'ਦੋ', 'ਤਿੰਨ', 'ਚਾਰ', 'ਪੰਜ', 'ਛੇ', 'ਸੱਤ', 'ਅੱਠ', 'ਨੌ', 'ਦਸ',
+    # Punjabi script for 100
+    'ਹੰਡਰਡ',
+    # Punjabi for 100
+    'ਸੌ',
+    # Punjabi for 1000
+    'ਹਜ਼ਾਰ',
+    ]
+    return text_list
+# In[ ]:

convert2list.py ADDED Viewed

	@@ -0,0 +1,55 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[2]:
+# import nbimporter
+import nbimporter
+from Text2List import text_to_list
+def convert_to_list(text, text_list):
+    matched_words = []
+    unmatched_text = ''  # To accumulate unmatched characters
+    # Sort text_list by length in descending order to prioritize longest matches first
+    text_list_sorted = sorted(text_list, key=len, reverse=True)
+    while text:
+        matched = False
+        for word in text_list_sorted:
+            if text.startswith(word):
+                # Add any accumulated unmatched text before appending the matched word
+                if unmatched_text:
+                    matched_words.append(unmatched_text)
+                    unmatched_text = ''  # Reset unmatched text accumulator
+                matched_words.append(word)
+                text = text[len(word):]  # Remove the matched part from text
+                matched = True
+                break
+        if not matched:
+            # Accumulate unmatched characters
+            unmatched_text += text[0]
+            text = text[1:]
+    # If there's any remaining unmatched text, add it to the result
+    if unmatched_text:
+        matched_words.append(unmatched_text)
+    # Join matched words and unmatched text with a space
+    result = ' '.join(matched_words)
+    return result
+# text = "जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच"
+# if __name__=="__main__":
+#     converted=convert_to_list(text, text_to_list())
+#     print(converted)
+# In[ ]:

isNumber.py ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+# Function to check if the string is a number
+def is_number(x):
+    if type(x) == str:
+        x = x.replace(',', '')
+    try:
+        float(x)
+    except:
+        return False
+    return True
+# In[ ]:

processDoubles.py ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+import re
+def process_doubles(sentence):
+    # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
+    sentence = re.sub(r'(ਡਬਲ)(\S+)', r'\1 \2', sentence)
+    tokens = sentence.split()
+    result = []
+    i = 0
+    while i < len(tokens):
+        if tokens[i] == "ਡਬਲ":
+            if i + 1 < len(tokens):
+                result.append(tokens[i + 1])  # Append the next word/number
+                result.append(tokens[i + 1])  # Append the next word/number again to duplicate
+                i += 2  # Skip over the next word since it's already processed
+            else:
+                result.append(tokens[i])
+                i += 1
+        else:
+            result.append(tokens[i])
+            i += 1
+    return ' '.join(result)

replaceWords.py ADDED Viewed

	@@ -0,0 +1,137 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[9]:
+import re
+def replace_words(sentence):
+    replacement_map = {
+        'one': ['ਵਨ', 'ਇੱਕ'],
+        'two': ['ਟੂ', 'ਦੋ'],
+        'three': ['ਥ੍ਰੀ', 'ਤਿੰਨ'],
+        'four': ['ਫੋਰ', 'ਚਾਰ'],
+        'five': ['ਫਾਈਵ', 'ਪੰਜ'],
+        'six': ['ਸਿਕਸ', 'ਛੇ',],
+        'seven': ['ਸੇਵਨ', 'ਸੱਤ'],
+        'eight': ['ਏਟ', 'ਅੱਠ'],
+        'nine': ['ਨਾਈਨ', 'ਨੌ'],
+        'ten': ['ਟੈਨ', 'ਦਸ', ],
+        'eleven': ['ਇਲੇਵਨ', 'ਗਿਆਰਹ'],
+        'twelve': ['ਟਵੈਲਵ', 'ਬਾਰਾਂ'],
+        'thirteen': ['ਥਰਟੀਨ', 'ਤੇਹਰਾਂ'],
+        'fourteen': ['ਫੋਰਟੀਨ', 'ਚੌਦਾਂਹ'],
+        'fifteen': ['ਫਿਫਟੀਨ', 'ਪੰਦਰਾਂ'],
+        'sixteen': ['ਸਿਕਸਟਿਨ', 'ਸੋਲਾਹ'],
+        'seventeen': ['ਸੈਵਨਟੀਨ', 'ਸਤਾਰਾਂ'],
+        'eighteen': ['ਏਟੀਨ', 'ਅਠਾਰਾਂ'],
+        'nineteen': ['ਨਾਈਨਟੀਨ', 'ਉੱਨ੍ਹੀ'],
+        'twenty': ['ਟਵੈਂਟੀ', 'ਵੀਹ'],
+        'twenty one': ['ਟਵੈਂਟੀ ਵਨ', 'ਇੱਕੀ'],
+        'twenty two': ['ਟਵੈਂਟੀ ਟੂ', 'ਬਾਈ'],
+        'twenty three': ['ਟਵੈਂਟੀ ਥ੍ਰੀ', 'ਤੇਈ'],
+        'twenty four': ['ਟਵੈਂਟੀ ਫੋਰ', 'ਚੋਵੀ'],
+        'twenty five': ['ਟਵੈਂਟੀ ਫਾਈਵ', 'ਪੱਚੀ'],
+        'twenty six': ['ਟਵੈਂਟੀ ਸਿਕਸ', 'ਛੱਬੀ'],
+        'twenty seven': ['ਟਵੈਂਟੀ ਸੇਵਨ', 'ਸਤਾਈ'],
+        'twenty eight': ['ਟਵੈਂਟੀ ਏਟ', 'ਅਠਾਈ'],
+        'twenty nine': ['ਟਵੈਂਟੀ ਨਾਈਨ', 'ਉਂਣਤੀ'],
+        'thirty': ['ਥਰਟੀ', 'ਤੀਹ'],
+        'thirty one': ['ਥਰਟੀ ਵਨ', 'ਇਕੱਤੀ'],
+        'thirty two': ['ਥਰਟੀ ਟੂ', 'ਬੱਤੀ'],
+        'thirty three': ['ਥਰਟੀ ਥ੍ਰੀ', 'ਤੇਤੀ'],
+        'thirty four': ['ਥਰਟੀ ਫੋਰ', 'ਚੋਨਤੀ'],
+        'thirty five': ['ਥਰਟੀ ਫਾਈਵ', 'ਪੈਂਤੀ'],
+        'thirty six': ['ਥਰਟੀ ਸਿਕਸ', 'ਛੱਤੀ'],
+        'thirty seven': ['ਥਰਟੀ ਸੇਵਨ', 'ਸੈਂਤੀ'],
+        'thirty eight': ['ਥਰਟੀ ਏਟ', 'ਅਠੱਤੀ'],
+        'thirty nine': ['ਥਰਟੀ ਨਾਈਨ', 'ਉਨਤਾਲੀ'],
+        'forty': ['ਫੋਰਟੀ', 'ਚਾਲੀ'],
+        'forty one': ['ਫੋਰਟੀ ਵਨ', 'ਇਕਤਾਲੀ'],
+        'forty two': ['ਫੋਰਟੀ ਟੂ', 'ਬਿਆਲੀ'],
+        'forty three': ['ਫੋਰਟੀ ਥ੍ਰੀ', 'ਤਰਤਾਲੀ'],
+        'forty four': ['ਫੋਰਟੀ ਫੋਰ', 'ਚੋਤਾਲੀ'],
+        'forty five': ['ਫੋਰਟੀ ਫਾਈਵ', 'ਪੰਤਾਲੀ'],
+        'forty six': ['ਫੋਰਟੀ ਸਿਕਸ', 'ਛਿਆਲੀ'],
+        'forty seven': ['ਫੋਰਟੀ ਸੇਵਨ', 'ਸੈਂਤਾਲੀ'],
+        'forty eight': ['ਫੋਰਟੀ ਏਟ', 'ਅੜਤਾਲੀ'],
+        'forty nine': ['ਫੋਰਟੀ ਨਾਈਨ', 'ਉਣੰਜਾ'],
+        'fifty': ['ਫਿਫਟੀ', 'ਪੰਜਾਹ'],
+        'fifty one': ['ਫਿਫਟੀ ਵਨ', 'ਅਕਵੰਜਾ'],
+        'fifty two': ['ਫਿਫਟੀ ਟੂ', 'ਬਵੰਜਾ'],
+        'fifty three': ['ਫਿਫਟੀ ਥ੍ਰੀ', 'ਤਰਵੰਜਾ'],
+        'fifty four': ['ਫਿਫਟੀ ਫੋਰ', 'ਚੁਰੰਜਾ'],
+        'fifty five': ['ਫਿਫਟੀ ਫਾਈਵ', 'ਪਚਵੰਜਾ'],
+        'fifty six': ['ਫਿਫਟੀ ਸਿਕਸ', 'ਛਪੰਜਾ'],
+        'fifty seven': ['ਫਿਫਟੀ ਸੇਵਨ', 'ਸਤਵੰਜਾ'],
+        'fifty eight': ['ਫਿਫਟੀ ਏਟ', 'ਅਠਵੰਜਾ'],
+        'fifty nine': ['ਫਿਫਟੀ ਨਾਈਨ', 'ਉਣਹਾਟ'],
+        'sixty': ['ਸਿਕਸਟੀ', 'ਸੱਠ'],
+        'sixty one': ['ਸਿਕਸਟੀ ਵਨ', 'ਇਕਹਾਟ'],
+        'sixty two': ['ਸਿਕਸਟੀ ਟੂ', 'ਬਾਹਟ'],
+        'sixty three': ['ਸਿਕਸਟੀ ਥ੍ਰੀ', 'ਤ੍ਰੇਹਟ'],
+        'sixty four': ['ਸਿਕਸਟੀ ਫੋਰ', 'ਚੋਹਟ'],
+        'sixty five': ['ਸਿਕਸਟੀ ਫਾਈਵ', 'ਪਹਿਨਟ'],
+        'sixty six': ['ਸਿਕਸਟੀ ਸਿਕਸ', 'ਛੇਹਾਟ'],
+        'sixty seven': ['ਸਿਕਸਟੀ ਸੇਵਨ', 'ਸਤਾਹਟ'],
+        'sixty eight': ['ਸਿਕਸਟੀ ਏਟ', 'ਅਠਾਹਠ'],
+        'sixty nine': ['ਸਿਕਸਟੀ ਨਾਈਨ', 'ਉਂਣਹਤਰ'],
+        'seventy': ['ਸੇਵੰਟੀ', 'ਸੱਤਰ'],
+        'seventy one': ['ਸੇਵੰਟੀ ਵਨ', 'ਇਕਹੱਤਰ'],
+        'seventy two': ['ਸੇਵੰਟੀ ਟੂ', 'ਬਹੱਤਰ'],
+        'seventy three': ['ਸੇਵੰਟੀ ਥ੍ਰੀ', 'ਤਿਹੱਤਰ'],
+        'seventy four': ['ਸੇਵੰਟੀ ਫੋਰ', 'ਚੌਹੱਤਰ'],
+        'seventy five': ['ਸੇਵੰਟੀ ਫਾਈਵ', 'ਪਚਹੱਤਰ'],
+        'seventy six': ['ਸੇਵੰਟੀ ਸਿਕਸ', 'ਛਿਹੱਤਰ'],
+        'seventy seven': ['ਸੇਵੰਟੀ ਸੇਵਨ', 'ਸਤਹੱਤਰ'],
+        'seventy eight': ['ਸੇਵੰਟੀ ਏਟ', 'ਅਠਹੱਤਰ'],
+        'seventy nine': ['ਸੇਵੰਟੀ ਨਾਈਨ', 'ਉਣਾਸੀ'],
+        'eighty': ['ਏਟੀ', 'ਅਸੀ','ਅੱਸੀ'],
+        'eighty one': ['ਏਟੀ ਵਨ', 'ਇੱਕਿਆਸੀ'],
+        'eighty two': ['ਏਟੀ ਟੂ', 'ਬਿਆਸੀ'],
+        'eighty three': ['ਏਟੀ ਥ੍ਰੀ', 'ਤਿਰਾਸੀ'],
+        'eighty four': ['ਏਟੀ ਫੋਰ', 'ਚੌਰਾਸੀ'],
+        'eighty five': ['ਏਟੀ ਫਾਈਵ', 'ਪਚਾਸੀ'],
+        'eighty six': ['ਏਟੀ ਸਿਕਸ', 'ਛਿਆਸੀ'],
+        'eighty seven': ['ਏਟੀ ਸੇਵਨ', 'ਸਤਾਸੀ'],
+        'eighty eight': ['ਏਟੀ ਏਟ', 'ਅਠਾਸੀ'],
+        'eighty nine': ['ਏਟੀ ਨਾਈਨ', 'ਨਵਾਸੀ'],
+        'ninety': ['ਨਾਇੰਟੀ', 'ਨੱਬੇ'],
+        'ninety one': ['ਨਾਇੰਟੀ ਵਨ', 'ਇੱਕਿਆਨਵੇ'],
+        'ninety two': ['ਨਾਇੰਟੀ ਟੂ', 'ਬਨਵੇਂ'],
+        'ninety three': ['ਨਾਇੰਟੀ ਥ੍ਰੀ', 'ਤੇਰਾਨਵੇਂ'],
+        'ninety four': ['ਨਾਇੰਟੀ ਫੋਰ', 'ਚੌਰਾਨਵੇ'],
+        'ninety five': ['ਨਾਇੰਟੀ ਫਾਈਵ', 'ਪਚੰਨਵੇਂ'],
+        'ninety six': ['ਨਾਇੰਟੀ ਸਿਕਸ', 'ਛਿਆਨਵੇ'],
+        'ninety seven': ['ਨਾਇੰਟੀ ਸੇਵਨ', 'ਸਤੰਨਵੇ'],
+        'ninety eight': ['ਨਾਇੰਟੀ ਏਟ', 'ਅਠੰਨਵੇ'],
+        'ninety nine': ['ਨਾਇੰਟੀ ਨਾਈਨ', 'ਨੜ੍ਹੀਨਵੇਂ'],
+        'hundred': ['ਹੰਡਰਡ', 'ਸੌ']
+}
+    words = sentence.split()  # Split the sentence by spaces
+    # Replace words using the mapping
+    for i, word in enumerate(words):
+        for replacement, patterns in replacement_map.items():
+            if word in patterns:
+                words[i] = replacement  # Replace the word if it's fully matched
+    # Join the processed words back into a sentence
+    return ' '.join(words)
+# In[ ]:

text2int.py ADDED Viewed

	@@ -0,0 +1,102 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[3]:
+import nbimporter
+from isNumber import is_number  # Remove or replace this if unnecessary
+def text_to_int(textnum, numwords={}):
+    # Define units, tens, and scales including "lac"
+    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
+            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
+            'sixteen', 'seventeen', 'eighteen', 'nineteen']
+    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
+    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # "lac" added
+    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
+    ordinal_endings = [('ieth', 'y'), ('th', '')]
+    if not numwords:
+        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
+        # Add units, tens, and scales to numwords
+        for idx, word in enumerate(units):
+            numwords[word] = (1, idx)
+        for idx, word in enumerate(tens):
+            numwords[word] = (1, idx * 10)
+        for idx, word in enumerate(scales):
+            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle "lac" as 10^5
+    # Remove hyphens and normalize input
+    textnum = textnum.replace('-', ' ')
+    current = result = 0
+    curstring = ''
+    onnumber = False
+    lastunit = False
+    lastscale = False
+    def is_numword(x):
+        return is_number(x) or x in numwords
+    def from_numword(x):
+        if is_number(x):
+            return 0, int(x.replace(',', ''))
+        return numwords[x]
+    for word in textnum.split():
+        if word in ordinal_words:
+            scale, increment = (1, ordinal_words[word])
+            current = current * scale + increment
+            if scale > 100:
+                result += current
+                current = 0
+            onnumber = True
+            lastunit = False
+            lastscale = False
+        else:
+            for ending, replacement in ordinal_endings:
+                if word.endswith(ending):
+                    word = f"{word[:-len(ending)]}{replacement}"
+            if not is_numword(word) or (word == 'and' and not lastscale):
+                if onnumber:
+                    curstring += repr(result + current) + " "
+                curstring += word + " "
+                result = current = 0
+                onnumber = False
+                lastunit = False
+                lastscale = False
+            else:
+                scale, increment = from_numword(word)
+                onnumber = True
+                if lastunit and word not in scales:
+                    curstring += repr(result + current) + " "
+                    result = current = 0
+                if scale > 1:
+                    current = max(1, current)
+                current = current * scale + increment
+                if scale >= 100:
+                    result += current
+                    current = 0
+                lastscale = word in scales
+                lastunit = word in units
+    if onnumber:
+        curstring += repr(result + current)
+    return curstring.strip()
+# In[ ]: