Spaces:
Sleeping
Sleeping
import streamlit as st | |
import joblib | |
import re | |
import nltk | |
from nltk.corpus import stopwords | |
nltk.download('stopwords') | |
stop_words = set(stopwords.words('english')) | |
# LOAD MODELS AND VECTORIZER | |
models = { | |
"Random Forest": "RFCsentimentAnalysis_model.pkl", | |
"Logistic Regression": "LRsentimentAnalysis_model.pkl", | |
"Multinomial Naïve Bayes": "MNBsentimentAnalysis_model.pkl" | |
} | |
with open("vectorizer.pkl", "rb") as vectorizer_file: | |
vectorizer = joblib.load(vectorizer_file) | |
app, model_eval = st.tabs(["Application", "Model Evaluation"]) | |
# STREAMLIT APP TAB 1 | |
with app: | |
# Sidebar for model selection | |
st.sidebar.header("Select Model") | |
model_choice = st.sidebar.selectbox("Choose a model:", list(models.keys())) | |
# Load selected model | |
with open(models[model_choice], "rb") as model_file: | |
model = joblib.load(model_file) | |
# MAPPING RESULTS | |
sentiment_mapping = {0: "Neutral", 1: "Positive", 2: "Negative"} | |
# FUNCTION TO REDUCE TEXT TO ITS MOST BASIC FORM | |
def clean_text(text): | |
text = text.lower() | |
text = re.sub(r'[^a-zA-Z\s]', '', text) | |
text = ' '.join([word for word in text.split() if word not in stop_words]) | |
return text | |
# STREAMLIT UI | |
st.title("Sentiment Analysis App") | |
st.write("Enter text below to analyze its sentiment.") | |
user_input = st.text_area("Enter text:") | |
if st.button("Analyze Sentiment"): | |
if user_input: | |
cleaned_input = clean_text(user_input) | |
transformed_input = vectorizer.transform([cleaned_input]) | |
prediction = model.predict(transformed_input)[0] | |
sentiment = sentiment_mapping[prediction] | |
st.write(f"Predicted Sentiment: **{sentiment}**") | |
else: | |
st.write("Please enter some text to analyze.") | |
with model_eval: | |
st.header("Model Evaluation") | |
st.write("The Sentiment Analysis model was trained in order to detect if a text is positive, negative, or neutral. The dataset was taken from kaggle.") | |
st.write("Dataset by Ismiel Hossen Abir. Link: https://www.kaggle.com/datasets/mdismielhossenabir/sentiment-analysis") | |
# SENTIMENT DISTRIBUTION | |
st.header("Sentiment Distribution") | |
st.write("The model was trained using a dataset with the total amount of text equivalent to the following labels") | |
st.image("sentiment_distribution.png") | |
# Confusion Matrix | |
st.title("Confusion Matrix") | |
st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:") | |
st.write("- **True Positives (TP):** Correctly predicted Spam") | |
st.write("- **True Negatives (TN):** Correctly predicted Not Spam") | |
st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)") | |
st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)") | |
st.header("Naive Bayes Confusion Matrix") | |
st.write("The image below represents the Confusion Matrix of the Naive Bayes model.") | |
st.image("MNBConfusion Matrix.png") | |
st.header("Logistic Regression Confusion Matrix") | |
st.write("The image below represents the Confusion Matrix of the Logistic Regression model.") | |
st.image("LRconfusion_matrix.png") | |
st.header("Random Forest Confusion Matrix") | |
st.write("The image below represents the Confusion Matrix of the Random Forest model.") | |
st.image("RFCConfusion Matrix.png") | |
# Evaluation Metrics | |
st.title("Evaluation Metrics") | |
st.write("Evaluation metrics help assess the performance of the sentiment analysis.") | |
st.header("Naive Bayes Evaluation Metrics") | |
st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.") | |
st.image("MNBclassification_report.png") | |
st.header("Logistic Regression Evaluation Metrics") | |
st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.") | |
st.image("LRclassification_report.png") | |
st.header("Random Forest Evaluation Metrics") | |
st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Random Forest Classifier model.") | |
st.image("RFCclassification_report.png") | |
# COMPARISON | |
st.header("Comparison") | |
st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Logistic Regression and Random Forests performs better than the Naive Bayes.") | |