File size: 2,426 Bytes
9ba7d3b b3ef6db 9ba7d3b b3ef6db 9ba7d3b b3ef6db 9ba7d3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import re
one = ["", "one ", "two ", "three ", "four ",
"five ", "six ", "seven ", "eight ",
"nine ", "ten ", "eleven ", "twelve ",
"thirteen ", "fourteen ", "fifteen ",
"sixteen ", "seventeen ", "eighteen ",
"nineteen "]
# strings at index 0 and 1 are not used,
# they are to make array indexing simple
ten = ["", "", "twenty ", "thirty ", "forty ",
"fifty ", "sixty ", "seventy ", "eighty ",
"ninety "]
def numToWords(n, s):
str = ""
if n <= 19:
str += one[n]
# if n is more than 19, divide it
else:
str += ten[n // 10] + one[n % 10]
# if n is non-zero
if (n):
str += s
return str
def intToWord(n):
n=int(n)
out = ""
out += numToWords((n // 10000000),
"crore ")
out += numToWords(((n // 100000) % 100),
"lakh ")
out += numToWords(((n // 1000) % 100),
"thousand ")
out += numToWords(((n // 100) % 10),
"hundred ")
if n > 100 and n % 100:
out += "and "
# handles digits at ones and tens
# places (if any)
out += numToWords((n % 100), "")
return out.strip()
def preprocess_text(text):
"""
Приведение к нормальному виду с отделенными точками и запятыми
srs:
Hello, World! This is a sample text with numbers 12345 and symbols #$%.
return:
['HELLO', ',', 'WORLD', 'THIS', 'IS', 'A', 'SAMPLE', 'TEXT', 'WITH', 'NUMBERS', 'AND', 'SYMBOLS', '.']
"""
if not (text.isspace()) and text and text:
text = text.upper()
text = re.sub(r'([.,])', r' \1 ', text)
text = re.sub(r'[^A-Z .,^0-9]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
text = text.split()
result = []
for word in text:
if word.isdigit():
result = result + (intToWord(word).upper()).split()
else:
result.append(word)
else:
result = ['текст введи :(']
return result
if __name__ == "__main__":
sample_text = "Hello, World! This is a sample text with numbers 12345 and symbols #$%."
processed_text = preprocess_text(sample_text)
print("Processed text:", processed_text)
|