import streamlit as st
import numpy as np
import pandas as pd
import io
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
from sklearn.preprocessing import (
    OneHotEncoder,
    OrdinalEncoder,
    StandardScaler,
    MinMaxScaler,
)
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE
from sklearn.linear_model import Ridge, Lasso, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.naive_bayes import MultinomialNB
from xgboost import XGBRFRegressor, XGBRFClassifier
from lightgbm import LGBMRegressor, LGBMClassifier
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    mean_squared_error,
    r2_score,
)
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    confusion_matrix,
    precision_score,
    recall_score,
)
import pickle

st.set_page_config(page_title="Tabular Data Analysis and Auto ML", page_icon="🤖")
sns.set_style("white")
sns.set_context("poster", font_scale=0.7)
palette = [
    "#1d7874",
    "#679289",
    "#f4c095",
    "#ee2e31",
    "#ffb563",
    "#918450",
    "#f85e00",
    "#a41623",
    "#9a031e",
    "#d6d6d6",
    "#ffee32",
    "#ffd100",
    "#333533",
    "#202020",
]


def main():
    file = st.sidebar.file_uploader("Upload Your CSV File Here: ")
    process = st.sidebar.button("Process")
    option = st.sidebar.radio(
        "Select an Option: ",
        (
            "Basic EDA",
            "Univariate Analysis",
            "Bivariate Analysis",
            "Preprocess",
            "Training and Evaluation",
        ),
    )
    placeholder = st.empty()
    placeholder.markdown(
    "<h1 style='text-align: center;'>Welcome to Tabular Data Analysis and Auto ML🤖</h1>",
    unsafe_allow_html=True
)


    if file is not None and process:
        data = load_csv(file)
        st.session_state["data"] = data

    if "data" in st.session_state:
        data = st.session_state["data"]
        placeholder.empty()

        if option == "Basic EDA":
            st.markdown(
                "<h1 style='text-align: center;'>Basic EDA</h1>", unsafe_allow_html=True
            )

            st.subheader("Data Overview")
            st.write(data_overview(data))
            st.write(duplicate(data))
            st.dataframe(data.head())

            st.subheader("Data Types and Unique Value Counts")
            display_data_info(data)

            st.subheader("Missing Data")
            missing_data(data)

            st.subheader("Value Counts")
            value_counts(data)

            st.subheader("Descriptive Statistics")
            st.write(data.describe().T)

        if option == "Univariate Analysis":
            st.markdown(
                "<h1 style='text-align: center;'>Univariate Analysis</h1>",
                unsafe_allow_html=True,
            )
            plot = st.radio(
                "Select a chart: ",
                ("Count Plot", "Pie Chart", "Histogram", "Violin Plot", "Scatter Plot"),
            )

            if plot == "Count Plot":
                column = st.selectbox(
                    "Select a column", [""] + list(data.select_dtypes("O"))
                )
                if column:
                    countplot(data, column)

            if plot == "Pie Chart":
                column = st.selectbox(
                    "Select a column", [""] + list(data.select_dtypes("O"))
                )
                if column:
                    piechart(data, column)

            if plot == "Histogram":
                column = st.selectbox(
                    "Select a column",
                    [""] + list(data.select_dtypes(include=["int", "float"])),
                )
                if column:
                    histogram(data, column)

            if plot == "Violin Plot":
                column = st.selectbox(
                    "Select a column",
                    [""] + list(data.select_dtypes(include=["int", "float"])),
                )
                if column:
                    violinplot(data, column)

            if plot == "Scatter Plot":
                column = st.selectbox(
                    "Select a column",
                    [""] + list(data.select_dtypes(include=["int", "float"])),
                )
                if column:
                    scatterplot(data, column)

        if option == "Bivariate Analysis":
            st.markdown(
                "<h1 style='text-align: center;'>Bivariate Analysis</h1>",
                unsafe_allow_html=True,
            )
            plot = st.radio(
                "Select a chart: ",
                ("Scatter Plot", "Bar Plot", "Box Plot", "Pareto Chart"),
            )

            if plot == "Scatter Plot":
                columns = st.multiselect(
                    "Select two columns",
                    [""] + list(data.select_dtypes(include=["int", "float"])),
                )

                if columns:
                    biscatterplot(data, columns)

            if plot == "Bar Plot":
                columns = st.multiselect("Select two columns", list(data.columns))

                if columns:
                    bibarplot(data, columns)

            if plot == "Box Plot":
                columns = st.multiselect("Select two columns", list(data.columns))

                if columns:
                    biboxplot(data, columns)

            if plot == "Pareto Chart":
                column = st.selectbox(
                    "Select a columns",
                    [""] + list(data.select_dtypes(include="object")),
                )

                if column:
                    paretoplot(data, column)

        if option == "Preprocess":
            st.markdown(
                "<h1 style='text-align: center;'>Data Preprocessing</h1>",
                unsafe_allow_html=True,
            )

            operation = st.radio(
                "Select preprocessing step: ",
                (
                    "Drop Columns",
                    "Handling Missing Values",
                    "Encode Categorical Features",
                ),
            )

            if operation == "Drop Columns":
                columns = st.multiselect("Select Columns to drop: ", (data.columns))
                drop_columns = st.button("Drop Columns")
                if drop_columns:
                    data.drop(columns, axis=1, inplace=True)
                    st.success("Dropped selected columns✅✅✅")

            elif operation == "Handling Missing Values":
                num_missing = st.selectbox(
                    "Select a Approach (Numerical columns only): ",
                    ("", "Drop", "Backward Fill", "Forward Fill", "Mean", "Median"),
                ).lower()

                cat_missing = st.selectbox(
                    "Select a Approach (Categorical columns only): ",
                    ("", "Drop", "Most Frequent Values", "Replace with 'Unknown'"),
                ).lower()
                hmv = st.button("Handle Missing Values")

                if hmv:
                    if num_missing:
                        num_data = data.select_dtypes(include=["int64", "float64"])

                        if num_missing == "drop":
                            data = data.dropna(subset=num_data.columns)

                        elif num_missing in [
                            "mean",
                            "median",
                            "backward fill",
                            "forward fill",
                        ]:
                            if num_missing == "mean":
                                fill_values = num_data.mean()
                            elif num_missing == "median":
                                fill_values = num_data.median()
                            elif num_missing == "backward fill":
                                fill_values = num_data.bfill()
                            elif num_missing == "forward fill":
                                fill_values = num_data.ffill()

                            data.fillna(value=fill_values, inplace=True)

                            st.success(
                                "Imputed missing values in numerical columns with selected approach."
                            )

                    if cat_missing:
                        cat_data = data.select_dtypes(exclude=["int", "float"])

                        if cat_missing == "drop":
                            data = data.dropna(subset=cat_data.columns)

                        elif cat_missing == "most frequent values":
                            mode_values = data[cat_data.columns].mode().iloc[0]
                            data[cat_data.columns] = data[cat_data.columns].fillna(
                                mode_values
                            )

                        elif cat_missing == "replace with 'unknown'":
                            data[cat_data.columns] = data[cat_data.columns].fillna(
                                "Unknown"
                            )

                        st.success(
                            "Imputed missing values in categorical columns with selected approach."
                        )

            elif operation == "Encode Categorical Features":
                oe_columns = st.multiselect(
                    "Choose Columns for Ordinal Encoding",
                    [""] + list(data.select_dtypes(include="object")),
                )
                st.info("Other columns will be One Hot Encoded.")

                encode_columns = st.button("Encode Columns")

                if encode_columns:
                    bool_columns = data.select_dtypes(include=bool).columns
                    data[bool_columns] = data[bool_columns].astype(int)
                    if oe_columns:
                        oe = OrdinalEncoder()
                        data[oe_columns] = oe.fit_transform(
                            data[oe_columns].astype("str")
                        )

                    try:
                        remaining_cat_cols = [
                        col
                        for col in data.select_dtypes(include="object")
                        if col not in oe_columns
                    ]
                    except:
                        pass

                    if len(remaining_cat_cols) > 0:
                        data = pd.get_dummies(
                            data, columns=remaining_cat_cols, drop_first=False
                        )
                        st.success("Encoded categorical columns")


                bool_columns = data.select_dtypes(include=bool).columns
                data[bool_columns] = data[bool_columns].astype(int)
            st.session_state["data"] = data


                


            preprocessed_data_csv = data.to_csv(index=False)
            preprocessed_data_buffer = io.StringIO()
            preprocessed_data_buffer.write(preprocessed_data_csv)
            preprocessed_data_bytes = preprocessed_data_buffer.getvalue()
            if st.download_button(
                label="Download Preprocessed Data",
                key="preprocessed_data",    
                on_click=None,
                data=preprocessed_data_bytes.encode(),
                file_name="preprocessed_data.csv",
                mime="text/csv",
            ):
                st.success('Data Downloaded')


        if option == "Training and Evaluation":
            st.markdown(
                "<h1 style='text-align: center;'>Training and Evaluation</h1>",
                unsafe_allow_html=True,
            )
            algo = st.selectbox("Choose Algorithm Type:", ("", "Regression", "Classification"))

            if algo == "Regression":
                target = st.selectbox("Chose Target Variable (Y): ", list(data.columns))

                try:
                    X = data.drop(target, axis=1)
                    Y = data[target]
                except Exception as e:
                    st.write(str(e))

                st.write(
                    "80% of the data will be used for training the model, rest of 20% data will be used for evaluating the model."
                )
                X_train, X_test, y_train, y_test = train_test_split(
                    X, Y, test_size=0.2, random_state=42
                )

                scale = st.selectbox(
                    "Choose how do you want to scale features:",
                    ("", "Standard Scaler", "Min Max Scaler"),
                )

                if scale == "Standard Scaler":
                    scaler = StandardScaler()
                    X_train = scaler.fit_transform(X_train)
                    X_test = scaler.transform(X_test)

                elif scale == "Min Max Scaler":
                    scaler = MinMaxScaler()
                    X_train = scaler.fit_transform(X_train)
                    X_test = scaler.transform(X_test)

                model = st.selectbox(
                    "Choose Regression Model for training: ",
                    (
                        "",
                        "Ridge Regression",
                        "Decision Tree Regressor",
                        "Random Forest Regressor",
                        "SVR",
                        "XGBRF Regressor",
                        "LGBM Regressor",
                    ),
                )

                if model == "Ridge Regression":
                    reg = Ridge(alpha=1.0)
                    reg.fit(X_train, y_train)
                    pred = reg.predict(X_test)
                    st.write(
                        "Mean Absolute Error (MAE): {:.4f}".format(
                            mean_absolute_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Mean Squared Error (MSE): {:.4f}".format(
                            mean_squared_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Root Mean Squared Error (RMSE): {:.4f}".format(
                            mean_squared_error(pred, y_test, squared=False)
                        )
                    )
                    st.write("R-squared (R²): {:.4f}".format(r2_score(pred, y_test)))

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(reg),
                        file_name="ridge_regression_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("ridge_regression_model.pkl", "wb") as model_file:
                            pickle.dump(reg, model_file)

                elif model == "Decision Tree Regressor":
                    reg = DecisionTreeRegressor(max_depth=10)
                    reg.fit(X_train, y_train)
                    pred = reg.predict(X_test)
                    st.write(
                        "Mean Absolute Error (MAE): {:.4f}".format(
                            mean_absolute_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Mean Squared Error (MSE): {:.4f}".format(
                            mean_squared_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Root Mean Squared Error (RMSE): {:.4f}".format(
                            mean_squared_error(pred, y_test, squared=False)
                        )
                    )
                    st.write("R-squared (R²): {:.4f}".format(r2_score(pred, y_test)))

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(reg),
                        file_name="decision_tree_regression_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open(
                            "decision_tree_regression_model.pkl", "wb"
                        ) as model_file:
                            pickle.dump(reg, model_file)

                elif model == "Random Forest Regressor":
                    reg = RandomForestRegressor(max_depth=10, n_estimators=100)
                    reg.fit(X_train, y_train)
                    pred = reg.predict(X_test)
                    st.write(
                        "Mean Absolute Error (MAE): {:.4f}".format(
                            mean_absolute_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Mean Squared Error (MSE): {:.4f}".format(
                            mean_squared_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Root Mean Squared Error (RMSE): {:.4f}".format(
                            mean_squared_error(pred, y_test, squared=False)
                        )
                    )
                    st.write("R-squared (R²): {:.4f}".format(r2_score(pred, y_test)))

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(reg),
                        file_name="random_forest_regression_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open(
                            "random_forest_regression_model.pkl", "wb"
                        ) as model_file:
                            pickle.dump(reg, model_file)

                elif model == "SVR":
                    reg = SVR(C=1.0, epsilon=0.2)
                    reg.fit(X_train, y_train)
                    pred = reg.predict(X_test)
                    st.write(
                        "Mean Absolute Error (MAE): {:.4f}".format(
                            mean_absolute_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Mean Squared Error (MSE): {:.4f}".format(
                            mean_squared_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Root Mean Squared Error (RMSE): {:.4f}".format(
                            mean_squared_error(pred, y_test, squared=False)
                        )
                    )
                    st.write("R-squared (R²): {:.4f}".format(r2_score(pred, y_test)))

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(reg),
                        file_name="svr_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("svr_model.pkl", "wb") as model_file:
                            pickle.dump(reg, model_file)

                elif model == "XGBRF Regressor":
                    reg = XGBRFRegressor(reg_lambda=1)
                    reg.fit(X_train, y_train)
                    pred = reg.predict(X_test)
                    st.write(
                        "Mean Absolute Error (MAE): {:.4f}".format(
                            mean_absolute_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Mean Squared Error (MSE): {:.4f}".format(
                            mean_squared_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Root Mean Squared Error (RMSE): {:.4f}".format(
                            mean_squared_error(pred, y_test, squared=False)
                        )
                    )
                    st.write("R-squared (R²): {:.4f}".format(r2_score(pred, y_test)))

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(reg),
                        file_name="xgbrf_regression_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("xgbrf_regression_model.pkl", "wb") as model_file:
                            pickle.dump(reg, model_file)

                elif model == "LGBM Regressor":
                    reg = LGBMRegressor(reg_lambda=1)
                    reg.fit(X_train, y_train)
                    pred = reg.predict(X_test)
                    st.write(
                        "Mean Absolute Error (MAE): {:.4f}".format(
                            mean_absolute_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Mean Squared Error (MSE): {:.4f}".format(
                            mean_squared_error(pred, y_test)
                        )
                    )
                    st.write(
                        "Root Mean Squared Error (RMSE): {:.4f}".format(
                            mean_squared_error(pred, y_test, squared=False)
                        )
                    )
                    st.write("R-squared (R²): {:.4f}".format(r2_score(pred, y_test)))

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(reg),
                        file_name="lgbm_regression_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("lgbm_regression_model.pkl", "wb") as model_file:
                            pickle.dump(reg, model_file)

            elif algo == "Classification":
                target = st.selectbox("Chose Target Variable (Y): ", list(data.columns))

                try:
                    X = data.drop(target, axis=1)
                    Y = data[target]
                except Exception as e:
                    st.write(str(e))

                st.write(
                    "80% of the data will be used for training the model, rest of 20% data will be used for evaluating the model."
                )
                X_train, X_test, y_train, y_test = train_test_split(
                    X, Y, test_size=0.2, random_state=42
                )

                balance = st.selectbox(
                    "Do you want to balance dataset?", ("", "Yes", "No")
                )
                if balance == "Yes":
                    piechart(data, target)

                    sample = st.selectbox(
                        "Which approach you want to use?",
                        ("", "Random Under Sampling", "Random Over Sampling", "SMOTE"),
                    )

                    if sample == "Random Under Sampling":
                        rus = RandomUnderSampler(random_state=42)
                        X_train, y_train = rus.fit_resample(X_train, y_train)

                    elif sample == "Random Over Sampling":
                        ros = RandomOverSampler(random_state=42)
                        X_train, y_train = ros.fit_resample(X_train, y_train)

                    elif sample == "SMOTE":
                        smote = SMOTE(random_state=42)
                        X_train, y_train = smote.fit_resample(X_train, y_train)

                scale = st.selectbox(
                    "Choose how do you want to scale features:",
                    ("", "Standard Scaler", "Min Max Scaler"),
                )


                if scale == "Standard Scaler":
                    scaler = StandardScaler()
                    X_train = scaler.fit_transform(X_train)
                    X_test = scaler.transform(X_test)

                elif scale == "Min Max Scaler":
                    scaler = MinMaxScaler()
                    X_train = scaler.fit_transform(X_train)
                    X_test = scaler.transform(X_test)

                model = st.selectbox(
                    "Choose Classification Model for training: ",
                    (
                        "",
                        "Logistic Regression",
                        "Decision Tree Classifier",
                        "Random Forest Classifier",
                        "SVC",
                        "XGBRF Classifier",
                        "LGBM Classifier",
                    ),
                )

                if model == "Logistic Regression":
                    clf = LogisticRegression(penalty="l2")
                    clf.fit(X_train, y_train)
                    pred = clf.predict(X_test)
                    st.write(
                        "Accuracy Score: {:.4f}".format(accuracy_score(pred, y_test))
                    )

                    try:
                        st.write("F1 Score: {:.4f}".format(f1_score(pred, y_test)))
                        st.write('Precision Score: {:.4f}' .format(precision_score(pred, y_test)))
                        st.write('Recall Score: {:.4f}'.format(recall_score(pred, y_test)))
                    except ValueError:
                        st.write('Macro Precision Score: {:.4f}' .format(precision_score(pred, y_test, average='macro')))
                        st.write('Macro Recall Score: {:.4f}'.format(recall_score(pred, y_test, average='macro'))) 
                        st.write("Macro F1 Score: {:.4f}".format(f1_score(pred, y_test, average='macro')))
        

                    plot_confusion_matrix(
                        pred, y_test, "Logistic Regression Confusion Matrix "
                    )

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(clf),
                        file_name="logistic_regression_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("logistic_regression_model.pkl", "wb") as model_file:
                            pickle.dump(clf, model_file)

                if model == "Decision Tree Classifier":
                    clf = DecisionTreeClassifier(max_depth=5)
                    clf.fit(X_train, y_train)
                    pred = clf.predict(X_test)
                    st.write(
                        "Accuracy Score: {:.4f}".format(accuracy_score(pred, y_test))
                    )
                    try:
                        st.write("F1 Score: {:.4f}".format(f1_score(pred, y_test)))
                        st.write('Precision Score: {:.4f}' .format(precision_score(pred, y_test)))
                        st.write('Recall Score: {:.4f}'.format(recall_score(pred, y_test)))
                    except ValueError:
                        st.write('Macro Precision Score: {:.4f}' .format(precision_score(pred, y_test, average='macro')))
                        st.write('Macro Recall Score: {:.4f}'.format(recall_score(pred, y_test, average='macro'))) 
                        st.write("Macro F1 Score: {:.4f}".format(f1_score(pred, y_test, average='macro')))
                                           
                    plot_confusion_matrix(
                        pred, y_test, "DecisionTree Classifier Confusion Matrix "
                    )

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(clf),
                        file_name="decision_tree_classifier_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open(
                            "decision_tree_classifier_model.pkl", "wb"
                        ) as model_file:
                            pickle.dump(clf, model_file)

                if model == "Random Forest Classifier":
                    clf = RandomForestClassifier(n_estimators=100, max_depth=5)
                    clf.fit(X_train, y_train)
                    pred = clf.predict(X_test)
                    st.write(
                        "Accuracy Score: {:.4f}".format(accuracy_score(pred, y_test))
                    )
                    try:
                        st.write("F1 Score: {:.4f}".format(f1_score(pred, y_test)))
                        st.write('Precision Score: {:.4f}' .format(precision_score(pred, y_test)))
                        st.write('Recall Score: {:.4f}'.format(recall_score(pred, y_test)))
                    except ValueError:
                        st.write('Macro Precision Score: {:.4f}' .format(precision_score(pred, y_test, average='macro')))
                        st.write('Macro Recall Score: {:.4f}'.format(recall_score(pred, y_test, average='macro'))) 
                        st.write("Macro F1 Score: {:.4f}".format(f1_score(pred, y_test, average='macro')))
                    
                    plot_confusion_matrix(
                        pred, y_test, "RandomForest Classifier Confusion Matrix "
                    )

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(clf),
                        file_name="random_forest_classifier_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open(
                            "random_forest_classifier_model.pkl", "wb"
                        ) as model_file:
                            pickle.dump(clf, model_file)

                if model == "SVC":
                    clf = SVC(C=1.5)
                    clf.fit(X_train, y_train)
                    pred = clf.predict(X_test)
                    st.write(
                        "Accuracy Score: {:.4f}".format(accuracy_score(pred, y_test))
                    )
                    try:
                        st.write("F1 Score: {:.4f}".format(f1_score(pred, y_test)))
                        st.write('Precision Score: {:.4f}' .format(precision_score(pred, y_test)))
                        st.write('Recall Score: {:.4f}'.format(recall_score(pred, y_test)))
                    except ValueError:
                        st.write('Macro Precision Score: {:.4f}' .format(precision_score(pred, y_test, average='macro')))
                        st.write('Macro Recall Score: {:.4f}'.format(recall_score(pred, y_test, average='macro'))) 
                        st.write("Macro F1 Score: {:.4f}".format(f1_score(pred, y_test, average='macro')))
                    

                    plot_confusion_matrix(pred, y_test, "SVC Confusion Matrix ")

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(clf),
                        file_name="svc_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("svc_model.pkl", "wb") as model_file:
                            pickle.dump(clf, model_file)

                if model == "XGBRF Classifier":
                    clf = XGBRFClassifier(reg_lambda=1.0)
                    clf.fit(X_train, y_train)
                    pred = clf.predict(X_test)
                    st.write(
                        "Accuracy Score: {:.4f}".format(accuracy_score(pred, y_test))
                    )
                    try:
                        st.write("F1 Score: {:.4f}".format(f1_score(pred, y_test)))
                        st.write('Precision Score: {:.4f}' .format(precision_score(pred, y_test)))
                        st.write('Recall Score: {:.4f}'.format(recall_score(pred, y_test)))
                    except ValueError:
                        st.write('Macro Precision Score: {:.4f}' .format(precision_score(pred, y_test, average='macro')))
                        st.write('Macro Recall Score: {:.4f}'.format(recall_score(pred, y_test, average='macro'))) 
                        st.write("Macro F1 Score: {:.4f}".format(f1_score(pred, y_test, average='macro')))
                    

                    plot_confusion_matrix(
                        pred, y_test, "XGBRF Classifier Confusion Matrix "
                    )

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(clf),
                        file_name="xgbrf_classifier_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("xgbrf_classifier_model.pkl", "wb") as model_file:
                            pickle.dump(clf, model_file)

                if model == "LGBM Classifier":
                    clf = LGBMClassifier(reg_lambda=1.0)
                    clf.fit(X_train, y_train)
                    pred = clf.predict(X_test)
                    st.write(
                        "Accuracy Score: {:.4f}".format(accuracy_score(pred, y_test))
                    )
                    try:
                        st.write("F1 Score: {:.4f}".format(f1_score(pred, y_test)))
                        st.write('Precision Score: {:.4f}' .format(precision_score(pred, y_test)))
                        st.write('Recall Score: {:.4f}'.format(recall_score(pred, y_test)))
                    except ValueError:
                        st.write('Macro Precision Score: {:.4f}' .format(precision_score(pred, y_test, average='macro')))
                        st.write('Macro Recall Score: {:.4f}'.format(recall_score(pred, y_test, average='macro'))) 
                        st.write("Macro F1 Score: {:.4f}".format(f1_score(pred, y_test, average='macro')))
                   
                    plot_confusion_matrix(
                        pred, y_test, "LGBM Classifier Confusion Matrix "
                    )

                    if st.download_button(
                        label="Download Trained Model",
                        key="trained_model",
                        on_click=None,
                        data=pickle.dumps(clf),
                        file_name="lgbm_classifier_model.pkl",
                        mime="application/octet-stream",
                    ):
                        with open("lgbm_classifier_model.pkl", "wb") as model_file:
                            pickle.dump(clf, model_file)


def load_csv(file):
    data = pd.read_csv(file)
    return data


def data_overview(data):
    r, c = data.shape
    st.write(f"Number of Rows: {r}")
    return f"Number of Columns: {c}"


def missing_data(data):
    missing_values = data.isna().sum()
    missing_values = missing_values[missing_values > 0]
    missing_value_per = (missing_values / data.shape[0]) * 100
    missing_value_per = missing_value_per.round(2).astype(str) + "%"
    missing_df = pd.DataFrame(
        {"Missing Values": missing_values, "Percentage": missing_value_per}
    )
    missing_df_html = missing_df.to_html(
        classes="table table-striped", justify="center"
    )
    return st.markdown(missing_df_html, unsafe_allow_html=True)


def display_data_info(data):
    dtypes = pd.DataFrame(data.dtypes, columns=["Data Type"])
    dtypes.reset_index(inplace=True)
    nunique = pd.DataFrame(data.nunique(), columns=["Unique Counts"])
    nunique.reset_index(inplace=True)
    dtypes.columns = ["Column", "Data Type"]
    nunique.columns = ["Column", "Unique Counts"]
    combined_df = pd.merge(dtypes, nunique, on="Column")
    combined_df_html = combined_df.to_html(
        classes="table table-striped", justify="center"
    )
    return st.markdown(combined_df_html, unsafe_allow_html=True)


def value_counts(data):
    column = st.selectbox("Select a Column", [""] + list(data.columns))
    if column:
        st.write(data[column].value_counts())


def duplicate(data):
    if data.duplicated().any():
        st.write(
            f"There is/are {data.duplicated().sum()} duplicate rows in the DataFrame. Duplicated values will be dropped."
        )
        data.drop_duplicates(keep="first", inplace=True)
        return ""

    else:
        return "There are no duplicate rows in the DataFrame."

def countplot(data, col):
    plt.figure(figsize=(10, 6))
    sns.countplot(y=data[col], palette=palette[1:], edgecolor="#1c1c1c", linewidth=2)
    plt.title(f"Countplot of {col} Column")
    st.pyplot(plt)


def piechart(data, col):
    value_counts = data[col].value_counts()
    plt.figure(figsize=(8, 6))
    plt.pie(
        value_counts,
        labels=value_counts.index,
        autopct="%1.1f%%",
        colors=palette,
        shadow=False,
        wedgeprops=dict(edgecolor="#1c1c1c"),
    )
    plt.title(f"Pie Chart of {col} Column")
    st.pyplot(plt)


def histogram(data, col):
    plt.figure(figsize=(10, 6))
    sns.histplot(
        data[col],
        kde=True,
        color=palette[4],
        fill=True,
        edgecolor="#1c1c1c",
        linewidth=2,
    )
    plt.title(f"Histogram of {col} Column")
    st.pyplot(plt)


def violinplot(data, col):
    plt.figure(figsize=(10, 6))
    sns.violinplot(data[col], color=palette[8])
    plt.title(f"Violin Plot of {col} Column")
    st.pyplot(plt)


def scatterplot(data, col):
    plt.figure(figsize=(10, 8))
    sns.scatterplot(data[col], color=palette[3])
    plt.title(f"Scatter Plot of {col} Column")
    st.pyplot(plt)


def biscatterplot(data, cols):
    try:
        plt.figure(figsize=(10, 8))
        sns.scatterplot(
            data=data,
            x=cols[0],
            y=cols[1],
            palette=palette[1:],
            edgecolor="#1c1c1c",
            linewidth=2,
        )
        plt.title(f"Scatter Plot of {cols[0]} and {cols[1]} Columns")
        st.pyplot(plt)
    except Exception as e:
        st.write(str(e))


def bibarplot(data, cols):
    try:
        plt.figure(figsize=(10, 8))
        sns.barplot(
            data=data,
            x=cols[0],
            y=cols[1],
            palette=palette[1:],
            edgecolor="#1c1c1c",
            linewidth=2,
        )
        plt.title(f"Bar Plot of {cols[0]} and {cols[1]} Columns")
        st.pyplot(plt)
    except Exception as e:
        st.write(str(e))


def biboxplot(data, cols):
    try:
        plt.figure(figsize=(10, 8))
        sns.boxplot(data=data, x=cols[0], y=cols[1], palette=palette[1:], linewidth=2)
        plt.title(f"Box Plot of {cols[0]} and {cols[1]} Columns")
        st.pyplot(plt)
    except Exception as e:
        st.write(str(e))


def paretoplot(data, categorical_col):
    try:
        value_counts = data[categorical_col].value_counts()
        cumulative_percentage = (value_counts / value_counts.sum()).cumsum()
        pareto_df = pd.DataFrame(
            {
                "Categories": value_counts.index,
                "Frequency": value_counts.values,
                "Cumulative Percentage": cumulative_percentage.values * 100,
            }
        )
        pareto_df = pareto_df.sort_values(by="Frequency", ascending=False)

        fig, ax1 = plt.subplots(figsize=(10, 8))
        ax1.bar(
            pareto_df["Categories"],
            pareto_df["Frequency"],
            color=palette[1:],
            edgecolor="#1c1c1c",
            linewidth=2,
        )
        ax2 = ax1.twinx()
        ax2.yaxis.set_major_formatter(PercentFormatter())
        ax2.plot(
            pareto_df["Categories"],
            pareto_df["Cumulative Percentage"],
            color=palette[3],
            marker="D",
            ms=10,
        )
        ax1.set_xlabel(categorical_col)
        ax1.set_ylabel("Frequency", color=palette[0])
        ax2.set_ylabel("Cumulative Percentage", color=palette[3])
        st.pyplot(fig)

    except Exception as e:
        pass


def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(title)
    st.pyplot(plt)


if __name__ == "__main__":
    main()