import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Load
train_df = pd.read_csv("datasets/train.csv")

# Preprocess
train_df["Age"].fillna(train_df["Age"].median(), inplace=True)
train_df["Embarked"].fillna(train_df["Embarked"].mode()[0], inplace=True)
train_df["Sex"] = train_df["Sex"].map({"male": 0, "female": 1})
train_df = pd.get_dummies(train_df, columns=["Embarked"])
train_df["FamilySize"] = train_df["SibSp"] + train_df["Parch"] + 1
train_df.drop(["Name", "Ticket", "Cabin", "PassengerId"], axis=1, inplace=True)

# Train
X = train_df.drop("Survived", axis=1)
y = train_df["Survived"]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)
model = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=1 )
model.fit(X_train, y_train)
y_pred = model.predict(X_val)

# App
st.title("Classification: Titanic Survival Prediction")
st.caption("dataset: https://www.kaggle.com/code/mrisdal/exploring-survival-on-the-titanic/input -> test.csv || train.csv")

tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Survival Chance  Predictor"])

with tab1:
    # Model Assessment
    st.header("Model Performance")
    
    st.write(f"**Accuracy:** {accuracy_score(y_val, y_pred):.2f}")
    st.write(f"**Precision:** {precision_score(y_val, y_pred):.2f}")
    st.write(f"**Recall:** {recall_score(y_val, y_pred):.2f}")
    st.write(f"**F1 Score:** {f1_score(y_val, y_pred):.2f}")
    fig, ax = plt.subplots()
    sns.heatmap(confusion_matrix(y_val, y_pred), annot=True, fmt="d", cmap="Blues", ax=ax)
    st.pyplot(fig)

    # Feature Importance
    st.header("Feature Importance")
    
    feature_importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
    
    # Correct get_dummies column splitting
    # Concatenate
    embarked_importance = (
        feature_importances["Embarked_C"] + 
        feature_importances["Embarked_Q"] + 
        feature_importances["Embarked_S"]
    )
    # Replace
    feature_importances = feature_importances.drop(["Embarked_C", "Embarked_Q", "Embarked_S"])
    feature_importances["Embarked"] = embarked_importance
    feature_importances = feature_importances.sort_values(ascending=False)
    
    fig, ax = plt.subplots()
    sns.barplot(x=feature_importances, y=feature_importances.index, ax=ax)
    ax.set_xlabel("Importance")
    ax.set_ylabel("Feature")
    ax.set_title("Feature Importance in Titanic Survival Prediction")
    st.pyplot(fig)
    st.divider()

with tab2:
    # Dataset
    st.header("Dataset")
    
    @st.cache_data()
    def load():
        return pd.read_csv("datasets/train.csv")
    dataset = load()
    dataset_processed = train_df
    
    # Quick preprocess, just for display
    def preprocess(data):
        data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
        data["FamilySize"] = data["SibSp"] + data["Parch"] + 1
        return data

    def corr(data, title):
        data = data.select_dtypes(include=["number"])
        fig, ax = plt.subplots(figsize=(8, 6))
        sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax)
        ax.set_title(title)
        st.pyplot(fig)
    corr(preprocess(dataset), "Correlation Matrix")
    st.caption("'tis all the correlatable data, minus the other irrelevant, non-correlatable data. [Embarked] feature is missing cos' it is string data, and was one-hotted.")
    
    # Toggle order
    view_type = st.radio("Order:", ["Top -> Bottom", "Bottom -> Top"])
    
    # Display head() or tail()
    if view_type == "Top -> Bottom":
        st.caption("datasets/train.csv")
        st.dataframe(dataset.head(len(dataset)))
        st.caption("df")
        st.dataframe(dataset_processed.head(len(dataset_processed)))
        
    elif view_type == "Bottom -> Top":
        st.caption("datasets/train.csv")
        st.dataframe(dataset.tail(len(dataset)).iloc[::-1])
        st.caption("df")
        st.dataframe(dataset_processed.tail(len(dataset_processed)).iloc[::-1])
    st.caption("There's a test.csv file in my datasets/ folder, but it's just an artifact from my Kaggle download. Can't be bothered to organize.")
    st.divider()
    
with tab3:
    # Predictor
    st.header("Survival Chance Prediction")

    # Preset Character Data
    def set_character(pclass, sex, age, sibsp, parch, fare, embarked):
        st.session_state.pclass = pclass
        st.session_state.sex = sex
        st.session_state.age = age
        st.session_state.sibsp = sibsp
        st.session_state.parch = parch
        st.session_state.fare = fare
        st.session_state.embarked = embarked
        
    with st.expander("Movie Characters"):
        col1, col2 = st.columns(2)
        with col1:
            st.button("Rose DeWitt Bukater", on_click=set_character, args=("First Class", "Female", 17, 0, 1, 300.0, "South Hampton")) # Rose was 17, embarked from South Hampton, was in first class and paid $300 as an estimate of the average of the 1st class fare, maybe a slightly less luxurious cabin. . She was with her mother.
            st.button("Jack Dawson", on_click=set_character, args=("Third Class", "Male", 20, 0, 0, 10.0, "South Hampton")) # Jack was 20, embarked from South Hampton, was in third class and paid a random estimate of $10 as the bet for the poker game where he won is 3rd class ticket. Of course, he was alone.
        with col2:
            st.button("Caledon Hockley", on_click=set_character, args=("First Class", "Male", 30, 0, 0, 500.0, "Cherbourg")) # Caledon was 30, embarked from Cherbourg, was in first class and paid $500 as an estimate of the high average of the 1st class fare. He was alone.
            st.button("Ruth DeWitt Bukater", on_click=set_character, args=("First Class", "Female", 45, 0, 1, 300.0, "South Hampton")) # Ruth was 45, embarked from South Hampton, was in first class and paid $300 as an estimate of the average of the 1st class fare. She was with her daughter.

    pclass = st.radio("Passenger Class", ["First Class", "Second Class", "Third Class"], index=["First Class", "Second Class", "Third Class"].index(st.session_state.get("pclass", "Second Class")))
    sex = st.radio("Sex", ["Male", "Female"], index=["Male", "Female"].index(st.session_state.get("sex", "Male")))
    age = st.slider("Age", 0, 100, st.session_state.get("age", 30))
    sibsp = st.slider("Siblings/Spouses Aboard", 0, 8, st.session_state.get("sibsp", 0))
    parch = st.slider("Parents/Children Aboard", 0, 6, st.session_state.get("parch", 0))
    fare = st.slider("Fare", 0.0, 500.0, st.session_state.get("fare", 30.0), step=5.0)
    embarked = st.radio("Port of Embarkation", ["Cherbourg", "Queenstown", "South Hampton"], index=["Cherbourg", "Queenstown", "South Hampton"].index(st.session_state.get("embarked", "South Hampton")))

    # Process Input
    sex = 1 if sex == "Female" else 0
    embarked_C = 1 if embarked == "Cherbourg" else 0
    embarked_Q = 1 if embarked == "Queenstown" else 0
    embarked_S = 1 if embarked == "South Hampton" else 0
    if pclass == "First Class":
        pclass = 1
    elif pclass == "Second Class":
        pclass = 2
    else:
        pclass = 3
    family_size = sibsp + parch + 1
    input_data = np.array([[pclass, sex, age, sibsp, parch, fare, family_size, embarked_C, embarked_Q, embarked_S]])

    # Predict and Output
    survival_probability = model.predict_proba(input_data)[0][1]
    st.subheader("Survival Probability")
    # Thresholds to add color probability
    if survival_probability < 0.25:
        color = "red"
    elif survival_probability < 0.50:
        color = "orange"
    elif survival_probability < 0.75:
        color = "yellow"
    elif survival_probability < 0.90:
        color = "#FFFF99"
    else:
        color = "green"
    st.markdown(f"<h1 style='font-size:50px; color:{color};'>{survival_probability:.2%}", unsafe_allow_html=True)
    st.divider()