Spaces:

Joshua1808
/

PaginaWeb

Runtime error

App Files Files Community

Joshua1808 commited on Jan 31, 2023

Commit

a76f382

1 Parent(s): 2511137

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -49

app.py CHANGED Viewed

@@ -9,12 +9,16 @@ import geopy
 from pysentimiento.preprocessing import preprocess_tweet
 from geopy.geocoders import Nominatim
 from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
 from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
 tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
 model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
 import torch
 if torch.cuda.is_available():
     device = torch.device(	"cuda")
@@ -148,57 +152,19 @@ def analizar_tweets(search_words, number_of_tweets ):
 def analizar_frase(frase):
   #palabra = frase.split()
   palabra = [frase]
-  indices1=tokenizer.batch_encode_plus(palabra,max_length=128,add_special_tokens=True,
-                                         return_attention_mask=True,
-                                         pad_to_max_length=True,
-                                         truncation=True)
-  input_ids1=indices1["input_ids"]
-  attention_masks1=indices1["attention_mask"]
-  prediction_inputs1= torch.tensor(input_ids1)
-  prediction_masks1 = torch.tensor(attention_masks1)
-  batch_size = 25
-  prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
-  prediction_sampler1 = SequentialSampler(prediction_data1)
-  prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
-  model.eval()
-  predictions = []
-  # Predict
-  for batch in prediction_dataloader1:
-    batch = tuple(t.to(device) for t in batch)
-    # Unpack the inputs from our dataloader
-    b_input_ids1, b_input_mask1 = batch
-    # Telling the model not to compute or store gradients, saving memory and   # speeding up prediction
-    with torch.no_grad():
-      # Forward pass, calculate logit predictions
-      outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
-    logits1 = outputs1[0]
-    # Move logits and labels to CPU
-    logits1 = logits1.detach().cpu().numpy()
-    # Store predictions and true labels
-    predictions.append(logits1)
-  flat_predictions = [item for sublist in predictions for item in sublist]
-  flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
-  tokens = tokenizer.tokenize(frase)
-  # Convertir los tokens a un formato compatible con el modelo
-  input_ids = tokenizer.convert_tokens_to_ids(tokens)
-  attention_masks = [1] * len(input_ids)
-  # Pasar los tokens al modelo
-  outputs = model(torch.tensor([input_ids]), token_type_ids=None, attention_mask=torch.tensor([attention_masks]))
-  scores = outputs[0]
-  #prediccion = scores.argmax(dim=1).item()
-  # Obtener la probabilidad de que la frase sea "sexista"
-  probabilidad_sexista = scores.amax(dim=1).item()
-  #print(probabilidad_sexista)
-  # Crear un Dataframe
-  text= pd.DataFrame({'Frase': [frase], 'Prediccion':[flat_predictions], 'Probabilidad':[probabilidad_sexista]})
-  text['Prediccion'] = np.where(text['Prediccion'] == 0 , 'No Sexista', 'Sexista')
-  tabla = st.table(text.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
   return tabla
 def tweets_localidad(buscar_localidad):

 from pysentimiento.preprocessing import preprocess_tweet
 from geopy.geocoders import Nominatim
+from transformers import pipeline
 from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
 from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
 tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
 model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
+model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
+pipeline_nlp = pipeline("text-classification", model=model_checkpoint)
 import torch
 if torch.cuda.is_available():
     device = torch.device(	"cuda")
 def analizar_frase(frase):
   #palabra = frase.split()
   palabra = [frase]
+  labels = pipeline_nlp (palabra)
+  print(labels)
+  predictions = pipeline_nlp(palabra)
+  # convierte las predicciones en una lista de diccionarios
+  data = [{'text': palabra, 'label': prediction['label'], 'score': prediction['score']} for prediction in predictions]
+  # crea un DataFrame a partir de la lista de diccionarios
+  df = pd.DataFrame(data)
+  # muestra el DataFrame
+  tabla = st.text
   return tabla
 def tweets_localidad(buscar_localidad):