Runtime error
Runtime error
Browse files
@@ -58,6 +58,7 @@ def preprocess(text):
58 |
text=" ".join(text.split())
59 |
return text
60 |
61 |
def highlight_survived(s):
62 |
return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
63 |
@@ -85,6 +86,118 @@ with colT2:
85 |
font-size:16px ; font-family: 'Times New Roman'; color: #3358ff;}
86 |
</style> """, unsafe_allow_html=True)
87 |
88 |
89 |
def run():
90 |
with st.form("my_form"):
@@ -107,65 +220,10 @@ def run():
107 |
108 |
if (error == False):
109 |
if (termino):
110 |
111 |
#tweets =tw.Cursor(api.search_tweets,q=new_search,lang="es").items(number_of_tweets)
112 |
# Tokenizar la frase
113 |
tokens = tokenizer.tokenize(search_words)
114 |
# Convertir los tokens a un formato compatible con el modelo
115 |
input_ids = tokenizer.convert_tokens_to_ids(tokens)
116 |
attention_masks = [1] * len(input_ids)
117 |
# Pasar los tokens al modelo
118 |
outputs = model(torch.tensor([input_ids]), token_type_ids=None, attention_mask=torch.tensor([attention_masks]))
119 |
120 |
# Obtener la probabilidad de que la frase sea "sexista"
121 |
probabilidad_sexista = outputs[0][0][1].item()
122 |
123 |
# Crear un Dataframe
124 |
text= pd.DataFrame({'palabra': [search_words],'probabilidad':[probabilidad_sexista]})
125 |
126 |
127 |
128 |
elif (usuario):
129 |
130 |
131 |
text= pd.DataFrame(tweet_list)
132 |
text[0] = text[0].apply(preprocess_tweet)
133 |
134 |
indices1=tokenizer.batch_encode_plus(text1.tolist(),max_length=128,add_special_tokens=True, return_attention_mask=True,pad_to_max_length=True,truncation=True)
135 |
136 |
137 |
prediction_inputs1= torch.tensor(input_ids1)
138 |
prediction_masks1 = torch.tensor(attention_masks1)
139 |
# Set the batch size.
140 |
batch_size = 25
141 |
# Create the DataLoader.
142 |
prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
143 |
prediction_sampler1 = SequentialSampler(prediction_data1)
144 |
prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
145 |
print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
146 |
# Pone el modelo en modo evaluación
147 |
148 |
# Variables de Seguimiento
149 |
predictions = []
150 |
# Predict
151 |
for batch in prediction_dataloader1:
152 |
batch = tuple( for t in batch)
153 |
# Descomprimir las entradas de nuestro cargador de datos
154 |
b_input_ids1, b_input_mask1 = batch
155 |
# Decirle al modelo que no calcule ni almacene gradientes, ahorrando memoria y # acelerando la predicción.
156 |
with torch.no_grad():
157 |
# Forward pass, calculate logit predictions
158 |
outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
159 |
logits1 = outputs1[0]
160 |
# Move logits and labels to CPU
161 |
logits1 = logits1.detach().cpu().numpy()
162 |
# Store predictions and true labels
163 |
164 |
flat_predictions = [item for sublist in predictions for item in sublist]
165 |
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
166 |
df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Últimos '+ str(number_of_tweets)+' Tweets'+' de '+search_words, 'Sexista'])
167 |
df['Sexista']= np.where(df['Sexista']== 0, 'No Sexistas', 'Sexistas')
168 |
169 |
st.table(df.reset_index(drop=True).head(20).style.applymap(color_survived, subset=['Sexista']))
170 |
171 |
58 |
text=" ".join(text.split())
59 |
return text
60 |
61 |
62 |
def highlight_survived(s):
63 |
return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
64 |
86 |
font-size:16px ; font-family: 'Times New Roman'; color: #3358ff;}
87 |
</style> """, unsafe_allow_html=True)
88 |
89 |
90 |
91 |
92 |
93 |
def analizar_tweets(search_words, number_of_tweets ):
94 |
tweets = api.user_timeline(screen_name = search_words, count= number_of_tweets)
95 |
tweet_list = [i.text for i in tweets]
96 |
text= pd.DataFrame(tweet_list)
97 |
text[0] = text[0].apply(preprocess_tweet)
98 |
99 |
indices1=tokenizer.batch_encode_plus(text1.tolist(), max_length=128,add_special_tokens=True, return_attention_mask=True,pad_to_max_length=True,truncation=True)
100 |
101 |
102 |
prediction_inputs1= torch.tensor(input_ids1)
103 |
prediction_masks1 = torch.tensor(attention_masks1)
104 |
batch_size = 25
105 |
# Create the DataLoader.
106 |
prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
107 |
prediction_sampler1 = SequentialSampler(prediction_data1)
108 |
prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
109 |
#print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
110 |
# Put model in evaluation mode
111 |
112 |
# Tracking variables
113 |
predictions = []
114 |
for batch in prediction_dataloader1:
115 |
batch = tuple( for t in batch)
116 |
# Unpack the inputs from our dataloader
117 |
b_input_ids1, b_input_mask1 = batch
118 |
119 |
#Telling the model not to compute or store gradients, saving memory and # speeding up prediction
120 |
with torch.no_grad():
121 |
# Forward pass, calculate logit predictions
122 |
outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
123 |
logits1 = outputs1[0]
124 |
# Move logits and labels to CPU
125 |
logits1 = logits1.detach().cpu().numpy()
126 |
# Store predictions and true labels
127 |
128 |
129 |
#flat_predictions = [item for sublist in predictions for item in sublist]
130 |
flat_predictions = [item for sublist in predictions for item in sublist]
131 |
132 |
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
133 |
134 |
probability = np.amax(logits1,axis=1).flatten()
135 |
Tweets =['Últimos '+ str(number_of_tweets)+' Tweets'+' de '+search_words]
136 |
df = pd.DataFrame(list(zip(text1, flat_predictions,probability)), columns = ['Tweets' , 'Sexista','Probabilidad'])
137 |
138 |
df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
139 |
df['Tweets'] = df['Tweets'].str.replace('RT|@', '')
140 |
#df['Tweets'] = df['Tweets'].apply(lambda x: re.sub(r'[:;][-o^]?[)\]DpP3]|[(/\\]|[\U0001f600-\U0001f64f]|[\U0001f300-\U0001f5ff]|[\U0001f680-\U0001f6ff]|[\U0001f1e0-\U0001f1ff]','', x))
141 |
142 |
st.table(df.reset_index(drop=True).head(20).style.applymap(color_survived, subset=['Sexista']))
143 |
144 |
return df
145 |
146 |
def analizar_frase(frase):
147 |
#palabra = frase.split()
148 |
palabra = [frase]
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
prediction_inputs1= torch.tensor(input_ids1)
157 |
prediction_masks1 = torch.tensor(attention_masks1)
158 |
batch_size = 25
159 |
prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
160 |
prediction_sampler1 = SequentialSampler(prediction_data1)
161 |
prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
162 |
163 |
predictions = []
164 |
# Predict
165 |
for batch in prediction_dataloader1:
166 |
batch = tuple( for t in batch)
167 |
# Unpack the inputs from our dataloader
168 |
b_input_ids1, b_input_mask1 = batch
169 |
# Telling the model not to compute or store gradients, saving memory and # speeding up prediction
170 |
with torch.no_grad():
171 |
# Forward pass, calculate logit predictions
172 |
outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
173 |
logits1 = outputs1[0]
174 |
# Move logits and labels to CPU
175 |
logits1 = logits1.detach().cpu().numpy()
176 |
# Store predictions and true labels
177 |
178 |
flat_predictions = [item for sublist in predictions for item in sublist]
179 |
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
180 |
tokens = tokenizer.tokenize(frase)
181 |
# Convertir los tokens a un formato compatible con el modelo
182 |
input_ids = tokenizer.convert_tokens_to_ids(tokens)
183 |
attention_masks = [1] * len(input_ids)
184 |
185 |
# Pasar los tokens al modelo
186 |
outputs = model(torch.tensor([input_ids]), token_type_ids=None, attention_mask=torch.tensor([attention_masks]))
187 |
scores = outputs[0]
188 |
#prediccion = scores.argmax(dim=1).item()
189 |
# Obtener la probabilidad de que la frase sea "sexista"
190 |
probabilidad_sexista = scores.amax(dim=1).item()
191 |
192 |
193 |
# Crear un Dataframe
194 |
text= pd.DataFrame({'Frase': [frase], 'Prediccion':[flat_predictions], 'Probabilidad':[probabilidad_sexista]})
195 |
text['prediccion'] = np.where(text['prediccion'] == 0 , 'No Sexista', 'Sexista')
196 |
197 |
st.table(df.reset_index(drop=True).head(20).style.applymap(color_survived, subset=['Sexista']))
198 |
199 |
return text
200 |
201 |
202 |
def run():
203 |
with st.form("my_form"):
220 |
221 |
if (error == False):
222 |
if (termino):
223 |
224 |
225 |
elif (usuario):
226 |
227 |
228 |
229 |