Spaces:
Runtime error
Runtime error
Commit
·
3e39b83
1
Parent(s):
f89f27f
Create custom_functions.py
Browse files- custom_functions.py +19 -0
custom_functions.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def preprocessing_text(text):
|
4 |
+
|
5 |
+
list_separators=re.findall(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)
|
6 |
+
list_splits=re.split(r"Comprador*.?\(.*?\)|Mediador*.?\(.*?\)|Vendedor*.?\(.*?\)", text)[1:]
|
7 |
+
|
8 |
+
conversation=list(map(lambda a, b: a+b, list_separators, list_splits))
|
9 |
+
|
10 |
+
#remove text sent to seller
|
11 |
+
[conversation.remove(conv) for conv in conversation if "para Vendedor" in conv.split("\n",2)[0]]
|
12 |
+
|
13 |
+
final_text=""
|
14 |
+
for text in conversation:
|
15 |
+
text=re.sub(r".*?\.jpeg|.*?\.jpg|.*?\.png|.*?\.pdf","",text)
|
16 |
+
new_text=text.split(" ")[0]+":"+text.split("\n",2)[2]
|
17 |
+
final_text=final_text+new_text
|
18 |
+
|
19 |
+
return final_text
|