|
import re
|
|
|
|
one = ["", "one ", "two ", "three ", "four ",
|
|
"five ", "six ", "seven ", "eight ",
|
|
"nine ", "ten ", "eleven ", "twelve ",
|
|
"thirteen ", "fourteen ", "fifteen ",
|
|
"sixteen ", "seventeen ", "eighteen ",
|
|
"nineteen "]
|
|
|
|
|
|
|
|
ten = ["", "", "twenty ", "thirty ", "forty ",
|
|
"fifty ", "sixty ", "seventy ", "eighty ",
|
|
"ninety "]
|
|
|
|
|
|
def numToWords(n, s):
|
|
str = ""
|
|
|
|
if n <= 19:
|
|
str += one[n]
|
|
|
|
else:
|
|
str += ten[n // 10] + one[n % 10]
|
|
|
|
|
|
if (n):
|
|
str += s
|
|
|
|
return str
|
|
|
|
|
|
def intToWord(n):
|
|
n=int(n)
|
|
out = ""
|
|
|
|
out += numToWords((n // 10000000),
|
|
"crore ")
|
|
|
|
out += numToWords(((n // 100000) % 100),
|
|
"lakh ")
|
|
|
|
out += numToWords(((n // 1000) % 100),
|
|
"thousand ")
|
|
|
|
out += numToWords(((n // 100) % 10),
|
|
"hundred ")
|
|
|
|
if n > 100 and n % 100:
|
|
out += "and "
|
|
|
|
|
|
|
|
out += numToWords((n % 100), "")
|
|
|
|
return out.strip()
|
|
|
|
|
|
def preprocess_text(text):
|
|
"""
|
|
Приведение к нормальному виду с отделенными точками и запятыми
|
|
srs:
|
|
Hello, World! This is a sample text with numbers 12345 and symbols #$%.
|
|
return:
|
|
['HELLO', ',', 'WORLD', 'THIS', 'IS', 'A', 'SAMPLE', 'TEXT', 'WITH', 'NUMBERS', 'AND', 'SYMBOLS', '.']
|
|
"""
|
|
if not (text.isspace()) and text and text:
|
|
|
|
text = text.upper()
|
|
text = re.sub(r'([.,])', r' \1 ', text)
|
|
|
|
text = re.sub(r'[^A-Z .,^0-9]', '', text)
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
|
text = text.split()
|
|
result = []
|
|
for word in text:
|
|
if word.isdigit():
|
|
result = result + (intToWord(word).upper()).split()
|
|
else:
|
|
result.append(word)
|
|
else:
|
|
result = ['текст введи :(']
|
|
|
|
return result
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sample_text = "Hello, World! This is a sample text with numbers 12345 and symbols #$%."
|
|
processed_text = preprocess_text(sample_text)
|
|
print("Processed text:", processed_text)
|
|
|