import streamlit as st import joblib import pandas as pd import os import matplotlib.pyplot as plt import seaborn as sns from sklearn.decomposition import PCA from datasets import load_dataset def load_clustered_data(): df = joblib.load("FIFA_Standardized_Data.joblib") # Ensure required clustering columns exist required_columns = ["DBSCAN_Cluster", "PCA1", "PCA2", "TSNE1", "TSNE2"] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"⚠️ Missing columns in dataset: {', '.join(missing_columns)}. Please re-run clustering and save the dataset.") return None return df def load_fifa_dataset(): dataset = load_dataset("Ci-Dave/FIFA2019") df = pd.DataFrame(dataset["train"]) df.rename(columns={"ShortPassing": "Passing", "StandingTackle": "Defending", "Strength": "Physical"}, inplace=True) return df def home_page(): st.title("⚽ FIFA 2019 Clustering Analysis") st.write(""" This Streamlit app demonstrates unsupervised learning using **clustering techniques** on the FIFA 2019 dataset. **Key Features:** - Displays the dataset - Allows user interaction for visualizing clusters - Uses models like **DBSCAN, PCA, and t-SNE** """) def dataset_page(): st.title("📊 FIFA 2019 Dataset") df = load_fifa_dataset() st.dataframe(df) def visualization_page(): st.title("📈 Clustering Visualization") df = load_clustered_data() if df is None: return # Stop execution if dataset is missing required columns clustering_algorithms = ["DBSCAN", "PCA", "t-SNE"] selected_algo = st.selectbox("Choose a Clustering Algorithm:", clustering_algorithms) if selected_algo == "DBSCAN": st.subheader("DBSCAN Clustering") plt.figure(figsize=(8,5)) sns.scatterplot(x=df["PCA1"], y=df["PCA2"], hue=df["DBSCAN_Cluster"], palette="coolwarm") st.pyplot(plt) elif selected_algo == "PCA": st.subheader("PCA Visualization") pca = PCA(n_components=2) pca_result = pca.fit_transform(df.iloc[:, :-1]) plt.scatter(pca_result[:, 0], pca_result[:, 1], c=df["DBSCAN_Cluster"], cmap="plasma") plt.xlabel("PCA Component 1") plt.ylabel("PCA Component 2") st.pyplot(plt) elif selected_algo == "t-SNE": st.subheader("t-SNE Visualization") plt.figure(figsize=(8,5)) sns.scatterplot(x=df["TSNE1"], y=df["TSNE2"], hue=df["DBSCAN_Cluster"], palette="coolwarm") st.pyplot(plt) def main(): st.sidebar.title("Navigation") pages = { "🏠 Home": home_page, "📊 Dataset": dataset_page, "📈 Visualizations": visualization_page, } choice = st.sidebar.radio("Go to", list(pages.keys())) pages[choice]() if __name__ == "__main__": main()