File size: 689 Bytes
3e39b83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import re

def preprocessing_text(text):

    list_separators=re.findall(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)
    list_splits=re.split(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)[1:]

    conversation=list(map(lambda a, b: a+b, list_separators, list_splits))

    #remove text sent to seller
    [conversation.remove(conv) for conv in conversation if "para Vendedor" in conv.split("\n",2)[0]]

    final_text=""
    for text in conversation:
        text=re.sub(r".*?\.jpeg|.*?\.jpg|.*?\.png|.*?\.pdf","",text)
        new_text=text.split(" ")[0]+":"+text.split("\n",2)[2]
        final_text=final_text+new_text

    return final_text