import streamlit as st
import joblib
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from datasets import load_dataset

def load_clustered_data():
    df = joblib.load("FIFA_Standardized_Data.joblib")
    
    # Ensure required clustering columns exist
    required_columns = ["DBSCAN_Cluster", "PCA1", "PCA2", "TSNE1", "TSNE2"]
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        st.error(f"⚠️ Missing columns in dataset: {', '.join(missing_columns)}. Please re-run clustering and save the dataset.")
        return None
    
    return df

def load_fifa_dataset():
    dataset = load_dataset("Ci-Dave/FIFA2019")
    df = pd.DataFrame(dataset["train"])
    df.rename(columns={"ShortPassing": "Passing", "StandingTackle": "Defending", "Strength": "Physical"}, inplace=True)
    return df

def home_page():
    st.title("⚽ FIFA 2019 Clustering Analysis")
    st.write("""
    This Streamlit app demonstrates unsupervised learning using **clustering techniques** on the FIFA 2019 dataset.
    
    **Key Features:**
    - Displays the dataset
    - Allows user interaction for visualizing clusters
    - Uses models like **DBSCAN, PCA, and t-SNE**
    """)

def dataset_page():
    st.title("📊 FIFA 2019 Dataset")
    df = load_fifa_dataset()
    st.dataframe(df)

def visualization_page():
    st.title("📈 Clustering Visualization")
    df = load_clustered_data()
    
    if df is None:
        return  # Stop execution if dataset is missing required columns
    
    clustering_algorithms = ["DBSCAN", "PCA", "t-SNE"]
    selected_algo = st.selectbox("Choose a Clustering Algorithm:", clustering_algorithms)
    
    if selected_algo == "DBSCAN":
        st.subheader("DBSCAN Clustering")
        plt.figure(figsize=(8,5))
        sns.scatterplot(x=df["PCA1"], y=df["PCA2"], hue=df["DBSCAN_Cluster"], palette="coolwarm")
        st.pyplot(plt)
    
    elif selected_algo == "PCA":
        st.subheader("PCA Visualization")
        pca = PCA(n_components=2)
        pca_result = pca.fit_transform(df.iloc[:, :-1])
        plt.scatter(pca_result[:, 0], pca_result[:, 1], c=df["DBSCAN_Cluster"], cmap="plasma")
        plt.xlabel("PCA Component 1")
        plt.ylabel("PCA Component 2")
        st.pyplot(plt)
    
    elif selected_algo == "t-SNE":
        st.subheader("t-SNE Visualization")
        plt.figure(figsize=(8,5))
        sns.scatterplot(x=df["TSNE1"], y=df["TSNE2"], hue=df["DBSCAN_Cluster"], palette="coolwarm")
        st.pyplot(plt)

def main():
    st.sidebar.title("Navigation")
    pages = {
        "🏠 Home": home_page,
        "📊 Dataset": dataset_page,
        "📈 Visualizations": visualization_page,
    }
    
    choice = st.sidebar.radio("Go to", list(pages.keys()))
    pages[choice]()
    
if __name__ == "__main__":
    main()