Joshua1808 commited on
Commit
a76f382
·
1 Parent(s): 2511137

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -49
app.py CHANGED
@@ -9,12 +9,16 @@ import geopy
9
 
10
  from pysentimiento.preprocessing import preprocess_tweet
11
  from geopy.geocoders import Nominatim
 
12
 
13
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
14
  from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
15
  tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
16
  model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
17
 
 
 
 
18
  import torch
19
  if torch.cuda.is_available():
20
  device = torch.device( "cuda")
@@ -148,57 +152,19 @@ def analizar_tweets(search_words, number_of_tweets ):
148
  def analizar_frase(frase):
149
  #palabra = frase.split()
150
  palabra = [frase]
151
-
152
- indices1=tokenizer.batch_encode_plus(palabra,max_length=128,add_special_tokens=True,
153
- return_attention_mask=True,
154
- pad_to_max_length=True,
155
- truncation=True)
156
- input_ids1=indices1["input_ids"]
157
- attention_masks1=indices1["attention_mask"]
158
- prediction_inputs1= torch.tensor(input_ids1)
159
- prediction_masks1 = torch.tensor(attention_masks1)
160
- batch_size = 25
161
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
162
- prediction_sampler1 = SequentialSampler(prediction_data1)
163
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
164
- model.eval()
165
- predictions = []
166
- # Predict
167
- for batch in prediction_dataloader1:
168
- batch = tuple(t.to(device) for t in batch)
169
- # Unpack the inputs from our dataloader
170
- b_input_ids1, b_input_mask1 = batch
171
- # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
172
- with torch.no_grad():
173
- # Forward pass, calculate logit predictions
174
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
175
- logits1 = outputs1[0]
176
- # Move logits and labels to CPU
177
- logits1 = logits1.detach().cpu().numpy()
178
- # Store predictions and true labels
179
- predictions.append(logits1)
180
- flat_predictions = [item for sublist in predictions for item in sublist]
181
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
182
- tokens = tokenizer.tokenize(frase)
183
- # Convertir los tokens a un formato compatible con el modelo
184
- input_ids = tokenizer.convert_tokens_to_ids(tokens)
185
- attention_masks = [1] * len(input_ids)
186
-
187
- # Pasar los tokens al modelo
188
- outputs = model(torch.tensor([input_ids]), token_type_ids=None, attention_mask=torch.tensor([attention_masks]))
189
- scores = outputs[0]
190
- #prediccion = scores.argmax(dim=1).item()
191
- # Obtener la probabilidad de que la frase sea "sexista"
192
- probabilidad_sexista = scores.amax(dim=1).item()
193
- #print(probabilidad_sexista)
194
-
195
- # Crear un Dataframe
196
- text= pd.DataFrame({'Frase': [frase], 'Prediccion':[flat_predictions], 'Probabilidad':[probabilidad_sexista]})
197
- text['Prediccion'] = np.where(text['Prediccion'] == 0 , 'No Sexista', 'Sexista')
198
 
 
 
199
 
200
- tabla = st.table(text.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
201
-
 
 
 
 
202
  return tabla
203
 
204
  def tweets_localidad(buscar_localidad):
 
9
 
10
  from pysentimiento.preprocessing import preprocess_tweet
11
  from geopy.geocoders import Nominatim
12
+ from transformers import pipeline
13
 
14
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
15
  from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
16
  tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
17
  model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
18
 
19
+ model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
20
+ pipeline_nlp = pipeline("text-classification", model=model_checkpoint)
21
+
22
  import torch
23
  if torch.cuda.is_available():
24
  device = torch.device( "cuda")
 
152
  def analizar_frase(frase):
153
  #palabra = frase.split()
154
  palabra = [frase]
155
+ labels = pipeline_nlp (palabra)
156
+ print(labels)
157
+ predictions = pipeline_nlp(palabra)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ # convierte las predicciones en una lista de diccionarios
160
+ data = [{'text': palabra, 'label': prediction['label'], 'score': prediction['score']} for prediction in predictions]
161
 
162
+ # crea un DataFrame a partir de la lista de diccionarios
163
+ df = pd.DataFrame(data)
164
+
165
+ # muestra el DataFrame
166
+ tabla = st.text
167
+
168
  return tabla
169
 
170
  def tweets_localidad(buscar_localidad):