cdgranadillo commited on
Commit
3e39b83
·
1 Parent(s): f89f27f

Create custom_functions.py

Browse files
Files changed (1) hide show
  1. custom_functions.py +19 -0
custom_functions.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def preprocessing_text(text):
4
+
5
+ list_separators=re.findall(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)
6
+ list_splits=re.split(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)[1:]
7
+
8
+ conversation=list(map(lambda a, b: a+b, list_separators, list_splits))
9
+
10
+ #remove text sent to seller
11
+ [conversation.remove(conv) for conv in conversation if "para Vendedor" in conv.split("\n",2)[0]]
12
+
13
+ final_text=""
14
+ for text in conversation:
15
+ text=re.sub(r".*?\.jpeg|.*?\.jpg|.*?\.png|.*?\.pdf","",text)
16
+ new_text=text.split(" ")[0]+":"+text.split("\n",2)[2]
17
+ final_text=final_text+new_text
18
+
19
+ return final_text