stevillis's picture
feat: create Glassdoor reviews sentiment classification app
78b7771
"""
This module provides a Streamlit web application for classifying Glassdoor
reviews into sentiment categories using a fine-tuned BERT model.
"""
import numpy as np
import streamlit as st
import torch
from transformers import BertTokenizer
from config import (BERTIMBAU_FINETUNED_MODEL, BERTIMBAU_MODEL,
SENTIMENT_MAPPING)
from glassdoor_reviews_classifier import GlassdoorReviewsClassifier
@st.cache_resource
def load_model():
"""
Loads the fine-tuned BERT model for sentiment classification.
Returns:
model (GlassdoorReviewsClassifier): The loaded model.
"""
try:
model = GlassdoorReviewsClassifier().to(device)
model.load_state_dict(
torch.load(BERTIMBAU_FINETUNED_MODEL, map_location=device)
)
model.eval()
return model
except Exception as e:
st.error(f"Error loading model: {e}")
return None
@st.cache_resource
def load_tokenizer():
"""
Loads the BERT tokenizer.
Returns:
tokenizer (BertTokenizer): The loaded tokenizer.
"""
return BertTokenizer.from_pretrained(BERTIMBAU_MODEL)
def predict_sentiment(text):
"""
Predicts the sentiment of a given text.
Args:
text (str): The input text to classify.
Returns:
np.ndarray: The predicted probabilities for each sentiment class.
"""
outputs = []
encoded_text = tokenizer(
text=text,
max_length=512,
add_special_tokens=True,
return_token_type_ids=False,
padding="max_length",
truncation=True,
return_attention_mask=True,
return_tensors="pt",
)
input_ids = encoded_text["input_ids"].to(device)
attention_mask = encoded_text["attention_mask"].to(device)
with torch.no_grad():
output = model(input_ids, attention_mask)
probabilities = torch.nn.functional.softmax(output, dim=1)
outputs.append(probabilities.cpu().numpy())
return np.concatenate(outputs, axis=0)
def get_sentiment_and_score(user_input):
"""
Gets the sentiment and score for a given user input.
Args:
user_input (str): The input text from the user.
Returns:
tuple: The predicted sentiment and its corresponding score.
"""
output_probabilities = predict_sentiment(user_input)
predicted_index = np.argmax(output_probabilities)
predicted_sentiment = SENTIMENT_MAPPING.get(predicted_index)
sentiment_score = np.max(output_probabilities)
return predicted_sentiment, sentiment_score
if __name__ == "__main__":
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
model = load_model()
tokenizer = load_tokenizer()
user_input = st.text_input("Glassdoor Review Text")
if user_input:
predicted_sentiment, sentiment_score = get_sentiment_and_score(user_input)
st.write(
f"**Sentiment:** {predicted_sentiment}, **Score:** {sentiment_score:.4f} "
)