File size: 4,606 Bytes
a487e3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# Load dataset
@st.cache_data
def load_data():
file_path = "Mall_Customers.csv"
df = pd.read_csv(file_path)
return df
df = load_data()
# Sidebar for navigation
st.sidebar.title("K-Means Clustering App")
section = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "K-Means Model", "Test Model"])
if section == "Introduction":
st.title("Introduction to K-Means Clustering")
st.write("### About the Model")
st.write("K-Means Clustering is an unsupervised machine learning algorithm used for customer segmentation. It helps identify different groups of customers based on their spending behavior and income.")
st.write("### About the Dataset")
st.write("The dataset consists of customer information, including:")
st.markdown("- **CustomerID**: Unique identifier for each customer.")
st.markdown("- **Gender**: Male or Female.")
st.markdown("- **Age**: Age of the customer.")
st.markdown("- **Annual Income (k$)**: Customer's yearly income.")
st.markdown("- **Spending Score (1-100)**: A score assigned based on spending behavior.")
st.write("### How to Use the App")
st.markdown("1. **Go to 'Data Exploration'**: Understand the dataset using statistics and visualizations.")
st.markdown("2. **Go to 'K-Means Model'**: Train the model and visualize clusters.")
st.markdown("3. **Go to 'Test Model'**: Input values to predict customer cluster.")
st.write("### Insights")
st.markdown("- Customers can be grouped into different segments based on their income and spending habits.")
st.markdown("- The Elbow Method helps determine the optimal number of clusters.")
st.markdown("- Businesses can use these insights to tailor marketing strategies and improve customer engagement.")
elif section == "Data Exploration":
st.title("Data Exploration")
st.write("### First 5 rows of dataset")
st.dataframe(df.head())
st.write("### Summary Statistics")
st.write(df.describe())
st.write("### Pairplot")
sns.pairplot(df.drop(columns=["CustomerID", "Gender"]), diag_kind="kde")
st.pyplot()
st.write("### Correlation Heatmap")
plt.figure(figsize=(8, 6))
sns.heatmap(df.drop(columns=["CustomerID", "Gender"]).corr(), annot=True, cmap="coolwarm")
st.pyplot()
elif section == "K-Means Model":
st.title("K-Means Clustering")
# Selecting features for clustering
features = df[["Annual Income (k$)", "Spending Score (1-100)"]]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Finding the optimal number of clusters using Elbow Method
st.write("### Elbow Method")
inertia = []
for k in range(1, 11):
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
kmeans.fit(scaled_features)
inertia.append(kmeans.inertia_)
plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), inertia, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
st.pyplot()
# Train K-Means Model
k = st.slider("Select Number of Clusters", 2, 10, 5)
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(scaled_features)
st.write("### Clustered Data")
st.dataframe(df)
# Visualization of clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df['Cluster'], palette='viridis')
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.title("Customer Segmentation using K-Means")
st.pyplot()
# Store the model and scaler globally
st.session_state['scaler'] = scaler
st.session_state['kmeans'] = kmeans
elif section == "Test Model":
st.title("Test K-Means Model")
income = st.number_input("Enter Annual Income (k$)", min_value=0, max_value=200, value=50)
score = st.number_input("Enter Spending Score (1-100)", min_value=1, max_value=100, value=50)
if 'scaler' in st.session_state and 'kmeans' in st.session_state:
input_data = st.session_state['scaler'].transform([[income, score]])
prediction = st.session_state['kmeans'].predict(input_data)
st.write(f"### Predicted Cluster: {prediction[0]}")
else:
st.write("### Please run the K-Means Model section first.")
|