File size: 3,051 Bytes
78b7771 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
"""
This module provides a Streamlit web application for classifying Glassdoor
reviews into sentiment categories using a fine-tuned BERT model.
"""
import numpy as np
import streamlit as st
import torch
from transformers import BertTokenizer
from config import (BERTIMBAU_FINETUNED_MODEL, BERTIMBAU_MODEL,
SENTIMENT_MAPPING)
from glassdoor_reviews_classifier import GlassdoorReviewsClassifier
@st.cache_resource
def load_model():
"""
Loads the fine-tuned BERT model for sentiment classification.
Returns:
model (GlassdoorReviewsClassifier): The loaded model.
"""
try:
model = GlassdoorReviewsClassifier().to(device)
model.load_state_dict(
torch.load(BERTIMBAU_FINETUNED_MODEL, map_location=device)
)
model.eval()
return model
except Exception as e:
st.error(f"Error loading model: {e}")
return None
@st.cache_resource
def load_tokenizer():
"""
Loads the BERT tokenizer.
Returns:
tokenizer (BertTokenizer): The loaded tokenizer.
"""
return BertTokenizer.from_pretrained(BERTIMBAU_MODEL)
def predict_sentiment(text):
"""
Predicts the sentiment of a given text.
Args:
text (str): The input text to classify.
Returns:
np.ndarray: The predicted probabilities for each sentiment class.
"""
outputs = []
encoded_text = tokenizer(
text=text,
max_length=512,
add_special_tokens=True,
return_token_type_ids=False,
padding="max_length",
truncation=True,
return_attention_mask=True,
return_tensors="pt",
)
input_ids = encoded_text["input_ids"].to(device)
attention_mask = encoded_text["attention_mask"].to(device)
with torch.no_grad():
output = model(input_ids, attention_mask)
probabilities = torch.nn.functional.softmax(output, dim=1)
outputs.append(probabilities.cpu().numpy())
return np.concatenate(outputs, axis=0)
def get_sentiment_and_score(user_input):
"""
Gets the sentiment and score for a given user input.
Args:
user_input (str): The input text from the user.
Returns:
tuple: The predicted sentiment and its corresponding score.
"""
output_probabilities = predict_sentiment(user_input)
predicted_index = np.argmax(output_probabilities)
predicted_sentiment = SENTIMENT_MAPPING.get(predicted_index)
sentiment_score = np.max(output_probabilities)
return predicted_sentiment, sentiment_score
if __name__ == "__main__":
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
model = load_model()
tokenizer = load_tokenizer()
user_input = st.text_input("Glassdoor Review Text")
if user_input:
predicted_sentiment, sentiment_score = get_sentiment_and_score(user_input)
st.write(
f"**Sentiment:** {predicted_sentiment}, **Score:** {sentiment_score:.4f} "
)
|