Joshua1808 commited on
Commit
bc97bc5
·
1 Parent(s): e5f8353

preprocess

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py CHANGED
@@ -31,6 +31,33 @@ auth = tw.OAuthHandler(consumer_key, consumer_secret)
31
  auth.set_access_token(access_token, access_token_secret)
32
  api = tw.API(auth, wait_on_rate_limit=True)
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def highlight_survived(s):
35
  return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
36
 
@@ -38,6 +65,7 @@ def color_survived(val):
38
  color = 'red' if val=='Sexista' else 'white'
39
  return f'background-color: {color}'
40
 
 
41
  st.set_page_config(layout="wide")
42
  st.markdown('<style>body{background-color: Blue;}</style>',unsafe_allow_html=True)
43
 
 
31
  auth.set_access_token(access_token, access_token_secret)
32
  api = tw.API(auth, wait_on_rate_limit=True)
33
 
34
+ def preprocess(text):
35
+ text=text.lower()
36
+ # remove hyperlinks
37
+ text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
38
+ text = re.sub(r'http?:\/\/.*[\r\n]*', '', text)
39
+ #Replace &amp, &lt, &gt with &,<,> respectively
40
+ text=text.replace(r'&amp;?',r'and')
41
+ text=text.replace(r'&lt;',r'<')
42
+ text=text.replace(r'&gt;',r'>')
43
+ #remove hashtag sign
44
+ #text=re.sub(r"#","",text)
45
+ #remove mentions
46
+ text = re.sub(r"(?:\@)\w+", '', text)
47
+ #text=re.sub(r"@","",text)
48
+ #remove non ascii chars
49
+ text=text.encode("ascii",errors="ignore").decode()
50
+ #remove some puncts (except . ! ?)
51
+ text=re.sub(r'[:"#$%&\*+,-/:;<=>@\\^_`{|}~]+','',text)
52
+ text=re.sub(r'[!]+','!',text)
53
+ text=re.sub(r'[?]+','?',text)
54
+ text=re.sub(r'[.]+','.',text)
55
+ text=re.sub(r"'","",text)
56
+ text=re.sub(r"\(","",text)
57
+ text=re.sub(r"\)","",text)
58
+ text=" ".join(text.split())
59
+ return text
60
+
61
  def highlight_survived(s):
62
  return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
63
 
 
65
  color = 'red' if val=='Sexista' else 'white'
66
  return f'background-color: {color}'
67
 
68
+
69
  st.set_page_config(layout="wide")
70
  st.markdown('<style>body{background-color: Blue;}</style>',unsafe_allow_html=True)
71