Spaces:

stevillis
/

bertimbau-finetuned-glassdoor-reviews

Running

App Files Files Community

bertimbau-finetuned-glassdoor-reviews / app.py

stevillis

feat: create Glassdoor reviews sentiment classification app

78b7771 20 days ago

raw

history blame contribute delete

3.05 kB

	"""
	This module provides a Streamlit web application for classifying Glassdoor
	reviews into sentiment categories using a fine-tuned BERT model.
	"""

	import numpy as np
	import streamlit as st
	import torch
	from transformers import BertTokenizer

	from config import (BERTIMBAU_FINETUNED_MODEL, BERTIMBAU_MODEL,
	SENTIMENT_MAPPING)
	from glassdoor_reviews_classifier import GlassdoorReviewsClassifier


	@st.cache_resource
	def load_model():
	"""
	Loads the fine-tuned BERT model for sentiment classification.

	Returns:
	model (GlassdoorReviewsClassifier): The loaded model.
	"""
	try:
	model = GlassdoorReviewsClassifier().to(device)
	model.load_state_dict(
	torch.load(BERTIMBAU_FINETUNED_MODEL, map_location=device)
	)

	model.eval()

	return model
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None


	@st.cache_resource
	def load_tokenizer():
	"""
	Loads the BERT tokenizer.

	Returns:
	tokenizer (BertTokenizer): The loaded tokenizer.
	"""
	return BertTokenizer.from_pretrained(BERTIMBAU_MODEL)


	def predict_sentiment(text):
	"""
	Predicts the sentiment of a given text.

	Args:
	text (str): The input text to classify.

	Returns:
	np.ndarray: The predicted probabilities for each sentiment class.
	"""
	outputs = []
	encoded_text = tokenizer(
	text=text,
	max_length=512,
	add_special_tokens=True,
	return_token_type_ids=False,
	padding="max_length",
	truncation=True,
	return_attention_mask=True,
	return_tensors="pt",
	)

	input_ids = encoded_text["input_ids"].to(device)
	attention_mask = encoded_text["attention_mask"].to(device)

	with torch.no_grad():
	output = model(input_ids, attention_mask)
	probabilities = torch.nn.functional.softmax(output, dim=1)
	outputs.append(probabilities.cpu().numpy())

	return np.concatenate(outputs, axis=0)


	def get_sentiment_and_score(user_input):
	"""
	Gets the sentiment and score for a given user input.

	Args:
	user_input (str): The input text from the user.

	Returns:
	tuple: The predicted sentiment and its corresponding score.
	"""
	output_probabilities = predict_sentiment(user_input)

	predicted_index = np.argmax(output_probabilities)
	predicted_sentiment = SENTIMENT_MAPPING.get(predicted_index)

	sentiment_score = np.max(output_probabilities)

	return predicted_sentiment, sentiment_score


	if __name__ == "__main__":
	if torch.cuda.is_available():
	device = torch.device("cuda")
	else:
	device = torch.device("cpu")

	model = load_model()
	tokenizer = load_tokenizer()

	user_input = st.text_input("Glassdoor Review Text")

	if user_input:
	predicted_sentiment, sentiment_score = get_sentiment_and_score(user_input)

	st.write(
	f"Sentiment: {predicted_sentiment}, Score: {sentiment_score:.4f} "
	)