Joshua1808 commited on
Commit
5613506
·
1 Parent(s): 6d38699

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -167
app.py DELETED
@@ -1,167 +0,0 @@
1
- import tweepy as tw
2
- import streamlit as st
3
- import pandas as pd
4
- import torch
5
- import numpy as np
6
- import re
7
-
8
- from pysentimiento.preprocessing import preprocess_tweet
9
-
10
- from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
11
- from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
12
- tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
13
- model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
14
-
15
- import torch
16
- if torch.cuda.is_available():
17
- device = torch.device( "cuda")
18
- print('I will use the GPU:', torch.cuda.get_device_name(0))
19
-
20
- else:
21
- print('No GPU available, using the CPU instead.')
22
- device = torch.device("cpu")
23
-
24
- consumer_key = st.secrets["BjipwQslVG4vBdy4qK318KnoA"]
25
- consumer_secret = st.secrets["3fzL70v9faklrPgvTi3zbofw9rwk92fgGdtAslFkFYt8kGmqBJ"]
26
- access_token = st.secrets["AAAAAAAAAAAAAAAAAAAAANvrkgEAAAAAv91xx%2Bs%2FfuteNpH7K9nHPvpcjXg%3DHzScJpkiqRvJhHTwuVGJ4apg8Jlica4xtfyC5WhWWu33CX7Y9f"]
27
- access_token_secret = st.secrets["AAAAAAAAAAAAAAAAAAAAANvrkgEAAAAAv91xx%2Bs%2FfuteNpH7K9nHPvpcjXg%3DHzScJpkiqRvJhHTwuVGJ4apg8Jlica4xtfyC5WhWWu33CX7Y9f"]
28
- auth = tw.OAuthHandler(consumer_key, consumer_secret)
29
- auth.set_access_token(access_token, access_token_secret)
30
- api = tw.API(auth, wait_on_rate_limit=True)
31
-
32
- def preprocess(text):
33
- text=text.lower()
34
- # remove hyperlinks
35
- text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
36
- text = re.sub(r'http?:\/\/.*[\r\n]*', '', text)
37
- #Replace &amp, &lt, &gt with &,<,> respectively
38
- text=text.replace(r'&amp;?',r'and')
39
- text=text.replace(r'&lt;',r'<')
40
- text=text.replace(r'&gt;',r'>')
41
- #remove hashtag sign
42
- #text=re.sub(r"#","",text)
43
- #remove mentions
44
- text = re.sub(r"(?:\@)\w+", '', text)
45
- #text=re.sub(r"@","",text)
46
- #remove non ascii chars
47
- text=text.encode("ascii",errors="ignore").decode()
48
- #remove some puncts (except . ! ?)
49
- text=re.sub(r'[:"#$%&\*+,-/:;<=>@\\^_`{|}~]+','',text)
50
- text=re.sub(r'[!]+','!',text)
51
- text=re.sub(r'[?]+','?',text)
52
- text=re.sub(r'[.]+','.',text)
53
- text=re.sub(r"'","",text)
54
- text=re.sub(r"\(","",text)
55
- text=re.sub(r"\)","",text)
56
- text=" ".join(text.split())
57
- return text
58
-
59
- def highlight_survived(s):
60
- return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
61
-
62
- def color_survived(val):
63
- color = 'red' if val=='Sexista' else 'white'
64
- return f'background-color: {color}'
65
-
66
- st.set_page_config(layout="wide")
67
- st.markdown('<style>body{background-color: Blue;}</style>',unsafe_allow_html=True)
68
-
69
- #background-color: Blue;
70
-
71
- colT1,colT2 = st.columns([2,8])
72
- with colT2:
73
- #st.title('Analisis de comentarios sexistas en Twitter')
74
- st.markdown(""" <style> .font {
75
- font-size:40px ; font-family: 'Cooper Black'; color: #FF9633;}
76
- </style> """, unsafe_allow_html=True)
77
- st.markdown('<p class="font">Análisis de comentarios sexistas en Twitter</p>', unsafe_allow_html=True)
78
-
79
- st.markdown(""" <style> .font1 {
80
- font-size:28px ; font-family: 'Times New Roman'; color: #8d33ff;}
81
- </style> """, unsafe_allow_html=True)
82
-
83
- st.markdown(""" <style> .font2 {
84
- font-size:16px ; font-family: 'Times New Roman'; color: #3358ff;}
85
- </style> """, unsafe_allow_html=True)
86
-
87
- def run():
88
- with st.form(key='Introduzca Texto'):
89
- col,buff1, buff2 = st.columns([2,2,1])
90
- #col.text_input('smaller text window:')
91
- search_words = col.text_input("Introduzca el termino o usuario para analizar y pulse el check correspondiente")
92
- number_of_tweets = col.number_input('Introduzca número de tweets a analizar. Máximo 50', 0,50,10)
93
- termino=st.checkbox('Término')
94
- usuario=st.checkbox('Usuario')
95
- submit_button = col.form_submit_button(label='Analizar')
96
- error=False
97
- if submit_button:
98
- date_since = "2020-09-14"
99
- if ( termino == False and usuario == False):
100
- st.text('Error no se ha seleccionado ningun check')
101
- error=True
102
- elif ( termino == True and usuario == True):
103
- st.text('Error se han seleccionado los dos check')
104
- error=True
105
-
106
-
107
- if (error == False):
108
- if (termino):
109
- new_search = search_words + " -filter:retweets"
110
- tweets =tw.Cursor(api.search_tweets,q=new_search,lang="es",since=date_since).items(number_of_tweets)
111
- elif (usuario):
112
- tweets = api.user_timeline(screen_name = search_words,count=number_of_tweets)
113
-
114
- tweet_list = [i.text for i in tweets]
115
- #tweet_list = [strip_undesired_chars(i.text) for i in tweets]
116
- text= pd.DataFrame(tweet_list)
117
- #text[0] = text[0].apply(preprocess)
118
- text[0] = text[0].apply(preprocess_tweet)
119
- text1=text[0].values
120
- indices1=tokenizer.batch_encode_plus(text1.tolist(),
121
- max_length=128,
122
- add_special_tokens=True,
123
- return_attention_mask=True,
124
- pad_to_max_length=True,
125
- truncation=True)
126
- input_ids1=indices1["input_ids"]
127
- attention_masks1=indices1["attention_mask"]
128
- prediction_inputs1= torch.tensor(input_ids1)
129
- prediction_masks1 = torch.tensor(attention_masks1)
130
- # Set the batch size.
131
- batch_size = 25
132
- # Create the DataLoader.
133
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
134
- prediction_sampler1 = SequentialSampler(prediction_data1)
135
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
136
- print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
137
- # Put model in evaluation mode
138
- model.eval()
139
- # Tracking variables
140
- predictions = []
141
- # Predict
142
- for batch in prediction_dataloader1:
143
- batch = tuple(t.to(device) for t in batch)
144
- # Unpack the inputs from our dataloader
145
- b_input_ids1, b_input_mask1 = batch
146
- # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
147
- with torch.no_grad():
148
- # Forward pass, calculate logit predictions
149
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
150
- logits1 = outputs1[0]
151
- # Move logits and labels to CPU
152
- logits1 = logits1.detach().cpu().numpy()
153
- # Store predictions and true labels
154
- predictions.append(logits1)
155
- flat_predictions = [item for sublist in predictions for item in sublist]
156
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
157
- df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Últimos '+ str(number_of_tweets)+' Tweets'+' de '+search_words, 'Sexista'])
158
- df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
159
-
160
-
161
- st.table(df.reset_index(drop=True).head(20).style.applymap(color_survived, subset=['Sexista']))
162
-
163
-
164
- #st.dataframe(df.style.apply(highlight_survived, axis=1))
165
- #st.table(df)
166
- #st.write(df)
167
- run()