Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import nltk | |
| from nltk.corpus import sentiwordnet as swn | |
| from nltk.corpus import stopwords | |
| flatten = lambda l: [item for sublist in l for item in sublist] | |
| tagsswn = { | |
| "NN": "n", | |
| "VB": "v", | |
| "JJ": "a", | |
| "RB": "r", | |
| } | |
| def get_sentiment(aval, stopwords): | |
| """ | |
| Calcula o score de sentimento de um texto usando SentiWordNet. | |
| Entrada: | |
| aval (str): Texto a ser analisado. | |
| Saída: | |
| tuple: Score positivo e negativo do texto. | |
| """ | |
| pos_scores = [] | |
| neg_scores = [] | |
| sentences = nltk.sent_tokenize(aval) | |
| sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences] | |
| tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words)) | |
| tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords] | |
| for word, pos in tagged_sentence_words: | |
| swn_pos = tagsswn.get(pos[:2], None) | |
| if not swn_pos: | |
| continue | |
| synsets = list(swn.senti_synsets(word.lower(), swn_pos)) | |
| if not synsets: | |
| continue | |
| synset = synsets[0] | |
| pos_scores.append(synset.pos_score()) | |
| neg_scores.append(synset.neg_score()) | |
| sump = np.sum(pos_scores) if pos_scores else 0 | |
| sumn = np.sum(neg_scores) if neg_scores else 0 | |
| return sump, sumn | |
| def classify_sentiment(aval, stopwords): | |
| """ | |
| Classifica um texto como positivo ou negativo com base no score de sentimento. | |
| Entrada: | |
| aval (str): Texto a ser classificado. | |
| Saída: | |
| str: "positive" se o score positivo for maior, "negative" caso contrário. | |
| """ | |
| pos_score, neg_score = get_sentiment(aval, stopwords) | |
| return "positive" if pos_score > neg_score else "negative" | |
| def wordnet_pipeline(df, column): | |
| nltk.download('sentiwordnet') | |
| nltk.download('wordnet') | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| nltk.download('averaged_perceptron_tagger') | |
| stpwrds = set(stopwords.words("english")) | |
| l = [] | |
| for review in df[column]: | |
| l.append(classify_sentiment(review, stpwrds)) | |
| return l |