Spaces:

CristopherWVSU
/

SentimentAnalysis

Sleeping

App Files Files Community

SentimentAnalysis / app.py

CristopherWVSU

More error files fixed

f36eca2 9 days ago

raw

history blame contribute delete

4.71 kB

	import streamlit as st
	import joblib
	import re
	import nltk
	from nltk.corpus import stopwords
	nltk.download('stopwords')
	stop_words = set(stopwords.words('english'))

	# LOAD MODELS AND VECTORIZER
	models = {
	"Random Forest": "RFCsentimentAnalysis_model.pkl",
	"Logistic Regression": "LRsentimentAnalysis_model.pkl",
	"Multinomial Naïve Bayes": "MNBsentimentAnalysis_model.pkl"
	}

	with open("vectorizer.pkl", "rb") as vectorizer_file:
	vectorizer = joblib.load(vectorizer_file)

	app, model_eval = st.tabs(["Application", "Model Evaluation"])

	# STREAMLIT APP TAB 1
	with app:
	# Sidebar for model selection
	st.sidebar.header("Select Model")
	model_choice = st.sidebar.selectbox("Choose a model:", list(models.keys()))

	# Load selected model
	with open(models[model_choice], "rb") as model_file:
	model = joblib.load(model_file)

	# MAPPING RESULTS
	sentiment_mapping = {0: "Neutral", 1: "Positive", 2: "Negative"}

	# FUNCTION TO REDUCE TEXT TO ITS MOST BASIC FORM
	def clean_text(text):
	text = text.lower()
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	text = ' '.join([word for word in text.split() if word not in stop_words])
	return text

	# STREAMLIT UI
	st.title("Sentiment Analysis App")
	st.write("Enter text below to analyze its sentiment.")

	user_input = st.text_area("Enter text:")

	if st.button("Analyze Sentiment"):
	if user_input:
	cleaned_input = clean_text(user_input)
	transformed_input = vectorizer.transform([cleaned_input])

	prediction = model.predict(transformed_input)[0]
	sentiment = sentiment_mapping[prediction]

	st.write(f"Predicted Sentiment: {sentiment}")
	else:
	st.write("Please enter some text to analyze.")

	with model_eval:

	st.header("Model Evaluation")
	st.write("The Sentiment Analysis model was trained in order to detect if a text is positive, negative, or neutral. The dataset was taken from kaggle.")
	st.write("Dataset by Ismiel Hossen Abir. Link: https://www.kaggle.com/datasets/mdismielhossenabir/sentiment-analysis")

	# SENTIMENT DISTRIBUTION
	st.header("Sentiment Distribution")
	st.write("The model was trained using a dataset with the total amount of text equivalent to the following labels")
	st.image("sentiment_distribution.png")

	# Confusion Matrix
	st.title("Confusion Matrix")
	st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:")
	st.write("- True Positives (TP): Correctly predicted Spam")
	st.write("- True Negatives (TN): Correctly predicted Not Spam")
	st.write("- False Positives (FP): Predicted Spam but was actually Not Spam (Type I error)")
	st.write("- False Negatives (FN): Predicted Not Spam but was actually Spam (Type II error)")

	st.header("Naive Bayes Confusion Matrix")
	st.write("The image below represents the Confusion Matrix of the Naive Bayes model.")
	st.image("MNBConfusion Matrix.png")

	st.header("Logistic Regression Confusion Matrix")
	st.write("The image below represents the Confusion Matrix of the Logistic Regression model.")
	st.image("LRconfusion_matrix.png")


	st.header("Random Forest Confusion Matrix")
	st.write("The image below represents the Confusion Matrix of the Random Forest model.")
	st.image("RFCConfusion Matrix.png")


	# Evaluation Metrics
	st.title("Evaluation Metrics")
	st.write("Evaluation metrics help assess the performance of the sentiment analysis.")

	st.header("Naive Bayes Evaluation Metrics")
	st.write("The image below represents the Accuracy, F1 score, and classification report of the Naive Bayes model.")
	st.image("MNBclassification_report.png")

	st.header("Logistic Regression Evaluation Metrics")
	st.write("The image below represents the Accuracy, F1 score, and classification report of the Logistic Regression model.")
	st.image("LRclassification_report.png")

	st.header("Random Forest Evaluation Metrics")
	st.write("The image below represents the Accuracy, F1 score, and classification report of the Random Forest Classifier model.")
	st.image("RFCclassification_report.png")

	# COMPARISON

	st.header("Comparison")
	st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Logistic Regression and Random Forests performs better than the Naive Bayes.")