Spaces:
Runtime error
Runtime error
Commit
·
a76f382
1
Parent(s):
2511137
Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,16 @@ import geopy
|
|
9 |
|
10 |
from pysentimiento.preprocessing import preprocess_tweet
|
11 |
from geopy.geocoders import Nominatim
|
|
|
12 |
|
13 |
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
|
14 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
|
15 |
tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
|
16 |
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
|
17 |
|
|
|
|
|
|
|
18 |
import torch
|
19 |
if torch.cuda.is_available():
|
20 |
device = torch.device( "cuda")
|
@@ -148,57 +152,19 @@ def analizar_tweets(search_words, number_of_tweets ):
|
|
148 |
def analizar_frase(frase):
|
149 |
#palabra = frase.split()
|
150 |
palabra = [frase]
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
pad_to_max_length=True,
|
155 |
-
truncation=True)
|
156 |
-
input_ids1=indices1["input_ids"]
|
157 |
-
attention_masks1=indices1["attention_mask"]
|
158 |
-
prediction_inputs1= torch.tensor(input_ids1)
|
159 |
-
prediction_masks1 = torch.tensor(attention_masks1)
|
160 |
-
batch_size = 25
|
161 |
-
prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
|
162 |
-
prediction_sampler1 = SequentialSampler(prediction_data1)
|
163 |
-
prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
|
164 |
-
model.eval()
|
165 |
-
predictions = []
|
166 |
-
# Predict
|
167 |
-
for batch in prediction_dataloader1:
|
168 |
-
batch = tuple(t.to(device) for t in batch)
|
169 |
-
# Unpack the inputs from our dataloader
|
170 |
-
b_input_ids1, b_input_mask1 = batch
|
171 |
-
# Telling the model not to compute or store gradients, saving memory and # speeding up prediction
|
172 |
-
with torch.no_grad():
|
173 |
-
# Forward pass, calculate logit predictions
|
174 |
-
outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
|
175 |
-
logits1 = outputs1[0]
|
176 |
-
# Move logits and labels to CPU
|
177 |
-
logits1 = logits1.detach().cpu().numpy()
|
178 |
-
# Store predictions and true labels
|
179 |
-
predictions.append(logits1)
|
180 |
-
flat_predictions = [item for sublist in predictions for item in sublist]
|
181 |
-
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
|
182 |
-
tokens = tokenizer.tokenize(frase)
|
183 |
-
# Convertir los tokens a un formato compatible con el modelo
|
184 |
-
input_ids = tokenizer.convert_tokens_to_ids(tokens)
|
185 |
-
attention_masks = [1] * len(input_ids)
|
186 |
-
|
187 |
-
# Pasar los tokens al modelo
|
188 |
-
outputs = model(torch.tensor([input_ids]), token_type_ids=None, attention_mask=torch.tensor([attention_masks]))
|
189 |
-
scores = outputs[0]
|
190 |
-
#prediccion = scores.argmax(dim=1).item()
|
191 |
-
# Obtener la probabilidad de que la frase sea "sexista"
|
192 |
-
probabilidad_sexista = scores.amax(dim=1).item()
|
193 |
-
#print(probabilidad_sexista)
|
194 |
-
|
195 |
-
# Crear un Dataframe
|
196 |
-
text= pd.DataFrame({'Frase': [frase], 'Prediccion':[flat_predictions], 'Probabilidad':[probabilidad_sexista]})
|
197 |
-
text['Prediccion'] = np.where(text['Prediccion'] == 0 , 'No Sexista', 'Sexista')
|
198 |
|
|
|
|
|
199 |
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
202 |
return tabla
|
203 |
|
204 |
def tweets_localidad(buscar_localidad):
|
|
|
9 |
|
10 |
from pysentimiento.preprocessing import preprocess_tweet
|
11 |
from geopy.geocoders import Nominatim
|
12 |
+
from transformers import pipeline
|
13 |
|
14 |
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
|
15 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
|
16 |
tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
|
17 |
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
|
18 |
|
19 |
+
model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
|
20 |
+
pipeline_nlp = pipeline("text-classification", model=model_checkpoint)
|
21 |
+
|
22 |
import torch
|
23 |
if torch.cuda.is_available():
|
24 |
device = torch.device( "cuda")
|
|
|
152 |
def analizar_frase(frase):
|
153 |
#palabra = frase.split()
|
154 |
palabra = [frase]
|
155 |
+
labels = pipeline_nlp (palabra)
|
156 |
+
print(labels)
|
157 |
+
predictions = pipeline_nlp(palabra)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
+
# convierte las predicciones en una lista de diccionarios
|
160 |
+
data = [{'text': palabra, 'label': prediction['label'], 'score': prediction['score']} for prediction in predictions]
|
161 |
|
162 |
+
# crea un DataFrame a partir de la lista de diccionarios
|
163 |
+
df = pd.DataFrame(data)
|
164 |
+
|
165 |
+
# muestra el DataFrame
|
166 |
+
tabla = st.text
|
167 |
+
|
168 |
return tabla
|
169 |
|
170 |
def tweets_localidad(buscar_localidad):
|