Spaces:
Sleeping
Sleeping
import nltk | |
from nltk.tokenize import word_tokenize | |
from nltk.corpus import stopwords | |
import string | |
from transformers import BertTokenizer, TFBertForSequenceClassification, TextClassificationPipeline | |
import tensorflow as tf | |
# Download NLTK resources (one-time step) | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Define stopwords and punctuation | |
stop_words = set(stopwords.words('english')) | |
punctuations = set(string.punctuation) | |
# Function to preprocess text | |
def preprocess_text(text): | |
text = str(text) | |
# Lowercase the text | |
text = text.lower() | |
# Tokenize the text | |
tokens = word_tokenize(text) | |
# Remove stopwords and punctuation | |
tokens = [token for token in tokens if token not in stop_words and token not in punctuations] | |
# Reconstruct the text | |
preprocessed_text = ' '.join(tokens) | |
return preprocessed_text | |
bert_tokenizer = BertTokenizer.from_pretrained('mainakhf/bert-base-uncased-sentiment-analysis') | |
# Load model | |
bert_model = TFBertForSequenceClassification.from_pretrained('mainakhf/bert-base-uncased-sentiment-analysis') | |
def Get_sentiment(Review, Tokenizer=bert_tokenizer, Model=bert_model): | |
# Convert Review to a list if it's not already a list | |
if not isinstance(Review, list): | |
Review = [Review] | |
model = bert_model | |
model.config.id2label = {0: "Negative", 1: "Positive"} | |
tokenizer = bert_tokenizer | |
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer) | |
pred_labels=pipe(Review) | |
return [pred_labels[0]['label']] |