raw
history blame contribute delete
566 Bytes
from re import sub
def clean_text(text):
"""
Applies some pre-processing on the given text.
Steps :
- Lowering text
- Removing backslashes
- removes a. out of the answers
- replaces 'b.', 'c.', and 'd.' with comma ','
"""
text = text.lower()
text = text.replace('\\', '') # generic replace was advised by Danit
text = text.replace('a.', '')
text = text.replace('b.', ',')
text = text.replace('c.', ',')
text = text.replace('d.', ',')
text = sub("\d+", "<num>", text)
return ' '.join(text.split())