|
import streamlit as st |
|
import joblib |
|
import pandas as pd |
|
import os |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from sklearn.decomposition import PCA |
|
from datasets import load_dataset |
|
|
|
def load_clustered_data(): |
|
df = joblib.load("FIFA_Standardized_Data.joblib") |
|
|
|
|
|
required_columns = ["DBSCAN_Cluster", "PCA1", "PCA2", "TSNE1", "TSNE2"] |
|
missing_columns = [col for col in required_columns if col not in df.columns] |
|
|
|
if missing_columns: |
|
st.error(f"β οΈ Missing columns in dataset: {', '.join(missing_columns)}. Please re-run clustering and save the dataset.") |
|
return None |
|
|
|
return df |
|
|
|
def load_fifa_dataset(): |
|
dataset = load_dataset("Ci-Dave/FIFA2019") |
|
df = pd.DataFrame(dataset["train"]) |
|
df.rename(columns={"ShortPassing": "Passing", "StandingTackle": "Defending", "Strength": "Physical"}, inplace=True) |
|
return df |
|
|
|
def home_page(): |
|
st.title("β½ FIFA 2019 Clustering Analysis") |
|
st.write(""" |
|
This Streamlit app demonstrates unsupervised learning using **clustering techniques** on the FIFA 2019 dataset. |
|
|
|
**Key Features:** |
|
- Displays the dataset |
|
- Allows user interaction for visualizing clusters |
|
- Uses models like **DBSCAN, PCA, and t-SNE** |
|
""") |
|
|
|
def dataset_page(): |
|
st.title("π FIFA 2019 Dataset") |
|
df = load_fifa_dataset() |
|
st.dataframe(df) |
|
|
|
def visualization_page(): |
|
st.title("π Clustering Visualization") |
|
df = load_clustered_data() |
|
|
|
if df is None: |
|
return |
|
|
|
clustering_algorithms = ["DBSCAN", "PCA", "t-SNE"] |
|
selected_algo = st.selectbox("Choose a Clustering Algorithm:", clustering_algorithms) |
|
|
|
if selected_algo == "DBSCAN": |
|
st.subheader("DBSCAN Clustering") |
|
plt.figure(figsize=(8,5)) |
|
sns.scatterplot(x=df["PCA1"], y=df["PCA2"], hue=df["DBSCAN_Cluster"], palette="coolwarm") |
|
st.pyplot(plt) |
|
|
|
elif selected_algo == "PCA": |
|
st.subheader("PCA Visualization") |
|
pca = PCA(n_components=2) |
|
pca_result = pca.fit_transform(df.iloc[:, :-1]) |
|
plt.scatter(pca_result[:, 0], pca_result[:, 1], c=df["DBSCAN_Cluster"], cmap="plasma") |
|
plt.xlabel("PCA Component 1") |
|
plt.ylabel("PCA Component 2") |
|
st.pyplot(plt) |
|
|
|
elif selected_algo == "t-SNE": |
|
st.subheader("t-SNE Visualization") |
|
plt.figure(figsize=(8,5)) |
|
sns.scatterplot(x=df["TSNE1"], y=df["TSNE2"], hue=df["DBSCAN_Cluster"], palette="coolwarm") |
|
st.pyplot(plt) |
|
|
|
def main(): |
|
st.sidebar.title("Navigation") |
|
pages = { |
|
"π Home": home_page, |
|
"π Dataset": dataset_page, |
|
"π Visualizations": visualization_page, |
|
} |
|
|
|
choice = st.sidebar.radio("Go to", list(pages.keys())) |
|
pages[choice]() |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|