import streamlit as st import joblib import re import nltk from nltk.corpus import stopwords nltk.download('stopwords') stop_words = set(stopwords.words('english')) # LOAD MODELS AND VECTORIZER models = { "Random Forest": "RFCsentimentAnalysis_model.pkl", "Logistic Regression": "LRsentimentAnalysis_model.pkl", "Multinomial Naïve Bayes": "MNBsentimentAnalysis_model.pkl" } with open("vectorizer.pkl", "rb") as vectorizer_file: vectorizer = joblib.load(vectorizer_file) app, model_eval = st.tabs(["Application", "Model Evaluation"]) # STREAMLIT APP TAB 1 with app: # Sidebar for model selection st.sidebar.header("Select Model") model_choice = st.sidebar.selectbox("Choose a model:", list(models.keys())) # Load selected model with open(models[model_choice], "rb") as model_file: model = joblib.load(model_file) # MAPPING RESULTS sentiment_mapping = {0: "Neutral", 1: "Positive", 2: "Negative"} # FUNCTION TO REDUCE TEXT TO ITS MOST BASIC FORM def clean_text(text): text = text.lower() text = re.sub(r'[^a-zA-Z\s]', '', text) text = ' '.join([word for word in text.split() if word not in stop_words]) return text # STREAMLIT UI st.title("Sentiment Analysis App") st.write("Enter text below to analyze its sentiment.") user_input = st.text_area("Enter text:") if st.button("Analyze Sentiment"): if user_input: cleaned_input = clean_text(user_input) transformed_input = vectorizer.transform([cleaned_input]) prediction = model.predict(transformed_input)[0] sentiment = sentiment_mapping[prediction] st.write(f"Predicted Sentiment: **{sentiment}**") else: st.write("Please enter some text to analyze.") with model_eval: st.header("Model Evaluation") st.write("The Sentiment Analysis model was trained in order to detect if a text is positive, negative, or neutral. The dataset was taken from kaggle.") st.write("Dataset by Ismiel Hossen Abir. Link: https://www.kaggle.com/datasets/mdismielhossenabir/sentiment-analysis") # SENTIMENT DISTRIBUTION st.header("Sentiment Distribution") st.write("The model was trained using a dataset with the total amount of text equivalent to the following labels") st.image("sentiment_distribution.png") # Confusion Matrix st.title("Confusion Matrix") st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:") st.write("- **True Positives (TP):** Correctly predicted Spam") st.write("- **True Negatives (TN):** Correctly predicted Not Spam") st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)") st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)") st.header("Naive Bayes Confusion Matrix") st.write("The image below represents the Confusion Matrix of the Naive Bayes model.") st.image("MNBConfusion Matrix.png") st.header("Logistic Regression Confusion Matrix") st.write("The image below represents the Confusion Matrix of the Logistic Regression model.") st.image("LRconfusion_matrix.png") st.header("Random Forest Confusion Matrix") st.write("The image below represents the Confusion Matrix of the Random Forest model.") st.image("RFCConfusion Matrix.png") # Evaluation Metrics st.title("Evaluation Metrics") st.write("Evaluation metrics help assess the performance of the sentiment analysis.") st.header("Naive Bayes Evaluation Metrics") st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.") st.image("MNBclassification_report.png") st.header("Logistic Regression Evaluation Metrics") st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.") st.image("LRclassification_report.png") st.header("Random Forest Evaluation Metrics") st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Random Forest Classifier model.") st.image("RFCclassification_report.png") # COMPARISON st.header("Comparison") st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Logistic Regression and Random Forests performs better than the Naive Bayes.")