import re def preprocessing_text(text): list_separators=re.findall(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text) list_splits=re.split(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)[1:] conversation=list(map(lambda a, b: a+b, list_separators, list_splits)) #remove text sent to seller [conversation.remove(conv) for conv in conversation if "para Vendedor" in conv.split("\n",2)[0]] final_text="" for text in conversation: text=re.sub(r".*?\.jpeg|.*?\.jpg|.*?\.png|.*?\.pdf","",text) new_text=text.split(" ")[0]+":"+text.split("\n",2)[2] final_text=final_text+new_text return final_text