Ci-Dave's picture
Added files
ad53465
import streamlit as st
import joblib
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from datasets import load_dataset
def load_clustered_data():
df = joblib.load("FIFA_Standardized_Data.joblib")
# Ensure required clustering columns exist
required_columns = ["DBSCAN_Cluster", "PCA1", "PCA2", "TSNE1", "TSNE2"]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"⚠️ Missing columns in dataset: {', '.join(missing_columns)}. Please re-run clustering and save the dataset.")
return None
return df
def load_fifa_dataset():
dataset = load_dataset("Ci-Dave/FIFA2019")
df = pd.DataFrame(dataset["train"])
df.rename(columns={"ShortPassing": "Passing", "StandingTackle": "Defending", "Strength": "Physical"}, inplace=True)
return df
def home_page():
st.title("⚽ FIFA 2019 Clustering Analysis")
st.write("""
This Streamlit app demonstrates unsupervised learning using **clustering techniques** on the FIFA 2019 dataset.
**Key Features:**
- Displays the dataset
- Allows user interaction for visualizing clusters
- Uses models like **DBSCAN, PCA, and t-SNE**
""")
def dataset_page():
st.title("πŸ“Š FIFA 2019 Dataset")
df = load_fifa_dataset()
st.dataframe(df)
def visualization_page():
st.title("πŸ“ˆ Clustering Visualization")
df = load_clustered_data()
if df is None:
return # Stop execution if dataset is missing required columns
clustering_algorithms = ["DBSCAN", "PCA", "t-SNE"]
selected_algo = st.selectbox("Choose a Clustering Algorithm:", clustering_algorithms)
if selected_algo == "DBSCAN":
st.subheader("DBSCAN Clustering")
plt.figure(figsize=(8,5))
sns.scatterplot(x=df["PCA1"], y=df["PCA2"], hue=df["DBSCAN_Cluster"], palette="coolwarm")
st.pyplot(plt)
elif selected_algo == "PCA":
st.subheader("PCA Visualization")
pca = PCA(n_components=2)
pca_result = pca.fit_transform(df.iloc[:, :-1])
plt.scatter(pca_result[:, 0], pca_result[:, 1], c=df["DBSCAN_Cluster"], cmap="plasma")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
st.pyplot(plt)
elif selected_algo == "t-SNE":
st.subheader("t-SNE Visualization")
plt.figure(figsize=(8,5))
sns.scatterplot(x=df["TSNE1"], y=df["TSNE2"], hue=df["DBSCAN_Cluster"], palette="coolwarm")
st.pyplot(plt)
def main():
st.sidebar.title("Navigation")
pages = {
"🏠 Home": home_page,
"πŸ“Š Dataset": dataset_page,
"πŸ“ˆ Visualizations": visualization_page,
}
choice = st.sidebar.radio("Go to", list(pages.keys()))
pages[choice]()
if __name__ == "__main__":
main()