Commit
·
322abb6
1
Parent(s):
0409b2d
uploading initial files
Browse files- app.ipynb +0 -0
- app.py +85 -0
- datasets/Mall_Customers.csv +201 -0
- requirements.txt +6 -0
app.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
from sklearn.cluster import KMeans
|
7 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
8 |
+
from sklearn.metrics import silhouette_score
|
9 |
+
|
10 |
+
# Load dataset
|
11 |
+
df = pd.read_csv("datasets/Mall_Customers.csv")
|
12 |
+
df.drop(columns=['CustomerID'], inplace=True)
|
13 |
+
|
14 |
+
# Encode Gender
|
15 |
+
le = LabelEncoder()
|
16 |
+
df['Genre'] = le.fit_transform(df['Genre'])
|
17 |
+
|
18 |
+
# Standardize numerical columns
|
19 |
+
scaler = StandardScaler()
|
20 |
+
df_scaled = scaler.fit_transform(df)
|
21 |
+
|
22 |
+
# Optimal K estimation
|
23 |
+
k_optimal = 5
|
24 |
+
kmeans = KMeans(n_clusters=k_optimal, init='k-means++', random_state=42)
|
25 |
+
kmeans.fit(df_scaled)
|
26 |
+
df['Cluster'] = kmeans.labels_
|
27 |
+
|
28 |
+
sil_score = silhouette_score(df_scaled, kmeans.labels_)
|
29 |
+
|
30 |
+
# Streamlit App
|
31 |
+
st.title("Customer Segmentation using K-Means")
|
32 |
+
st.caption("Dataset: Mall Customers")
|
33 |
+
|
34 |
+
tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Customer Predictor"])
|
35 |
+
|
36 |
+
with tab1:
|
37 |
+
st.header("Model Performance")
|
38 |
+
st.write(f"**Silhouette Score:** {sil_score:.4f}")
|
39 |
+
|
40 |
+
wcss = []
|
41 |
+
k_values = range(1, 11)
|
42 |
+
for k in k_values:
|
43 |
+
kmeans_temp = KMeans(n_clusters=k, init='k-means++', random_state=42)
|
44 |
+
kmeans_temp.fit(df_scaled)
|
45 |
+
wcss.append(kmeans_temp.inertia_)
|
46 |
+
|
47 |
+
fig, ax = plt.subplots()
|
48 |
+
ax.plot(k_values, wcss, marker='o', linestyle='--')
|
49 |
+
ax.set_xlabel('Number of Clusters (K)')
|
50 |
+
ax.set_ylabel('WCSS')
|
51 |
+
ax.set_title('Elbow Method for Optimal K')
|
52 |
+
st.pyplot(fig)
|
53 |
+
|
54 |
+
with tab2:
|
55 |
+
st.header("Dataset")
|
56 |
+
st.dataframe(df.head())
|
57 |
+
|
58 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
59 |
+
sns.heatmap(df.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax)
|
60 |
+
ax.set_title("Correlation Matrix")
|
61 |
+
st.pyplot(fig)
|
62 |
+
|
63 |
+
with tab3:
|
64 |
+
st.header("Customer Segment Prediction")
|
65 |
+
|
66 |
+
annual_income = st.slider("Annual Income (k$)", min_value=int(df['Annual Income (k$)'].min()), max_value=int(df['Annual Income (k$)'].max()), value=50)
|
67 |
+
spending_score = st.slider("Spending Score (1-100)", min_value=int(df['Spending Score (1-100)'].min()), max_value=int(df['Spending Score (1-100)'].max()), value=50)
|
68 |
+
gender = st.radio("Gender", ["Male", "Female"], index=0)
|
69 |
+
gender_encoded = 1 if gender == "Female" else 0
|
70 |
+
|
71 |
+
input_data = np.array([[gender_encoded, annual_income, spending_score]])
|
72 |
+
input_scaled = scaler.transform(np.hstack((input_data, np.zeros((input_data.shape[0], 1))))) # Ensure same feature shape
|
73 |
+
cluster_prediction = kmeans.predict(input_scaled)[0]
|
74 |
+
|
75 |
+
st.subheader("Predicted Customer Segment")
|
76 |
+
st.markdown(f"<h1 style='color:blue;'>{cluster_prediction}</h1>", unsafe_allow_html=True)
|
77 |
+
|
78 |
+
fig, ax = plt.subplots()
|
79 |
+
sns.scatterplot(x=df['Annual Income (k$)'], y=df['Spending Score (1-100)'], hue=df['Cluster'], palette='viridis', alpha=0.6)
|
80 |
+
ax.scatter(annual_income, spending_score, color='red', s=100, edgecolors='black', label='Input Customer')
|
81 |
+
ax.set_xlabel('Annual Income (k$)')
|
82 |
+
ax.set_ylabel('Spending Score')
|
83 |
+
ax.legend()
|
84 |
+
st.pyplot(fig)
|
85 |
+
st.divider()
|
datasets/Mall_Customers.csv
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
|
2 |
+
0001,Male,19,15,39
|
3 |
+
0002,Male,21,15,81
|
4 |
+
0003,Female,20,16,6
|
5 |
+
0004,Female,23,16,77
|
6 |
+
0005,Female,31,17,40
|
7 |
+
0006,Female,22,17,76
|
8 |
+
0007,Female,35,18,6
|
9 |
+
0008,Female,23,18,94
|
10 |
+
0009,Male,64,19,3
|
11 |
+
0010,Female,30,19,72
|
12 |
+
0011,Male,67,19,14
|
13 |
+
0012,Female,35,19,99
|
14 |
+
0013,Female,58,20,15
|
15 |
+
0014,Female,24,20,77
|
16 |
+
0015,Male,37,20,13
|
17 |
+
0016,Male,22,20,79
|
18 |
+
0017,Female,35,21,35
|
19 |
+
0018,Male,20,21,66
|
20 |
+
0019,Male,52,23,29
|
21 |
+
0020,Female,35,23,98
|
22 |
+
0021,Male,35,24,35
|
23 |
+
0022,Male,25,24,73
|
24 |
+
0023,Female,46,25,5
|
25 |
+
0024,Male,31,25,73
|
26 |
+
0025,Female,54,28,14
|
27 |
+
0026,Male,29,28,82
|
28 |
+
0027,Female,45,28,32
|
29 |
+
0028,Male,35,28,61
|
30 |
+
0029,Female,40,29,31
|
31 |
+
0030,Female,23,29,87
|
32 |
+
0031,Male,60,30,4
|
33 |
+
0032,Female,21,30,73
|
34 |
+
0033,Male,53,33,4
|
35 |
+
0034,Male,18,33,92
|
36 |
+
0035,Female,49,33,14
|
37 |
+
0036,Female,21,33,81
|
38 |
+
0037,Female,42,34,17
|
39 |
+
0038,Female,30,34,73
|
40 |
+
0039,Female,36,37,26
|
41 |
+
0040,Female,20,37,75
|
42 |
+
0041,Female,65,38,35
|
43 |
+
0042,Male,24,38,92
|
44 |
+
0043,Male,48,39,36
|
45 |
+
0044,Female,31,39,61
|
46 |
+
0045,Female,49,39,28
|
47 |
+
0046,Female,24,39,65
|
48 |
+
0047,Female,50,40,55
|
49 |
+
0048,Female,27,40,47
|
50 |
+
0049,Female,29,40,42
|
51 |
+
0050,Female,31,40,42
|
52 |
+
0051,Female,49,42,52
|
53 |
+
0052,Male,33,42,60
|
54 |
+
0053,Female,31,43,54
|
55 |
+
0054,Male,59,43,60
|
56 |
+
0055,Female,50,43,45
|
57 |
+
0056,Male,47,43,41
|
58 |
+
0057,Female,51,44,50
|
59 |
+
0058,Male,69,44,46
|
60 |
+
0059,Female,27,46,51
|
61 |
+
0060,Male,53,46,46
|
62 |
+
0061,Male,70,46,56
|
63 |
+
0062,Male,19,46,55
|
64 |
+
0063,Female,67,47,52
|
65 |
+
0064,Female,54,47,59
|
66 |
+
0065,Male,63,48,51
|
67 |
+
0066,Male,18,48,59
|
68 |
+
0067,Female,43,48,50
|
69 |
+
0068,Female,68,48,48
|
70 |
+
0069,Male,19,48,59
|
71 |
+
0070,Female,32,48,47
|
72 |
+
0071,Male,70,49,55
|
73 |
+
0072,Female,47,49,42
|
74 |
+
0073,Female,60,50,49
|
75 |
+
0074,Female,60,50,56
|
76 |
+
0075,Male,59,54,47
|
77 |
+
0076,Male,26,54,54
|
78 |
+
0077,Female,45,54,53
|
79 |
+
0078,Male,40,54,48
|
80 |
+
0079,Female,23,54,52
|
81 |
+
0080,Female,49,54,42
|
82 |
+
0081,Male,57,54,51
|
83 |
+
0082,Male,38,54,55
|
84 |
+
0083,Male,67,54,41
|
85 |
+
0084,Female,46,54,44
|
86 |
+
0085,Female,21,54,57
|
87 |
+
0086,Male,48,54,46
|
88 |
+
0087,Female,55,57,58
|
89 |
+
0088,Female,22,57,55
|
90 |
+
0089,Female,34,58,60
|
91 |
+
0090,Female,50,58,46
|
92 |
+
0091,Female,68,59,55
|
93 |
+
0092,Male,18,59,41
|
94 |
+
0093,Male,48,60,49
|
95 |
+
0094,Female,40,60,40
|
96 |
+
0095,Female,32,60,42
|
97 |
+
0096,Male,24,60,52
|
98 |
+
0097,Female,47,60,47
|
99 |
+
0098,Female,27,60,50
|
100 |
+
0099,Male,48,61,42
|
101 |
+
0100,Male,20,61,49
|
102 |
+
0101,Female,23,62,41
|
103 |
+
0102,Female,49,62,48
|
104 |
+
0103,Male,67,62,59
|
105 |
+
0104,Male,26,62,55
|
106 |
+
0105,Male,49,62,56
|
107 |
+
0106,Female,21,62,42
|
108 |
+
0107,Female,66,63,50
|
109 |
+
0108,Male,54,63,46
|
110 |
+
0109,Male,68,63,43
|
111 |
+
0110,Male,66,63,48
|
112 |
+
0111,Male,65,63,52
|
113 |
+
0112,Female,19,63,54
|
114 |
+
0113,Female,38,64,42
|
115 |
+
0114,Male,19,64,46
|
116 |
+
0115,Female,18,65,48
|
117 |
+
0116,Female,19,65,50
|
118 |
+
0117,Female,63,65,43
|
119 |
+
0118,Female,49,65,59
|
120 |
+
0119,Female,51,67,43
|
121 |
+
0120,Female,50,67,57
|
122 |
+
0121,Male,27,67,56
|
123 |
+
0122,Female,38,67,40
|
124 |
+
0123,Female,40,69,58
|
125 |
+
0124,Male,39,69,91
|
126 |
+
0125,Female,23,70,29
|
127 |
+
0126,Female,31,70,77
|
128 |
+
0127,Male,43,71,35
|
129 |
+
0128,Male,40,71,95
|
130 |
+
0129,Male,59,71,11
|
131 |
+
0130,Male,38,71,75
|
132 |
+
0131,Male,47,71,9
|
133 |
+
0132,Male,39,71,75
|
134 |
+
0133,Female,25,72,34
|
135 |
+
0134,Female,31,72,71
|
136 |
+
0135,Male,20,73,5
|
137 |
+
0136,Female,29,73,88
|
138 |
+
0137,Female,44,73,7
|
139 |
+
0138,Male,32,73,73
|
140 |
+
0139,Male,19,74,10
|
141 |
+
0140,Female,35,74,72
|
142 |
+
0141,Female,57,75,5
|
143 |
+
0142,Male,32,75,93
|
144 |
+
0143,Female,28,76,40
|
145 |
+
0144,Female,32,76,87
|
146 |
+
0145,Male,25,77,12
|
147 |
+
0146,Male,28,77,97
|
148 |
+
0147,Male,48,77,36
|
149 |
+
0148,Female,32,77,74
|
150 |
+
0149,Female,34,78,22
|
151 |
+
0150,Male,34,78,90
|
152 |
+
0151,Male,43,78,17
|
153 |
+
0152,Male,39,78,88
|
154 |
+
0153,Female,44,78,20
|
155 |
+
0154,Female,38,78,76
|
156 |
+
0155,Female,47,78,16
|
157 |
+
0156,Female,27,78,89
|
158 |
+
0157,Male,37,78,1
|
159 |
+
0158,Female,30,78,78
|
160 |
+
0159,Male,34,78,1
|
161 |
+
0160,Female,30,78,73
|
162 |
+
0161,Female,56,79,35
|
163 |
+
0162,Female,29,79,83
|
164 |
+
0163,Male,19,81,5
|
165 |
+
0164,Female,31,81,93
|
166 |
+
0165,Male,50,85,26
|
167 |
+
0166,Female,36,85,75
|
168 |
+
0167,Male,42,86,20
|
169 |
+
0168,Female,33,86,95
|
170 |
+
0169,Female,36,87,27
|
171 |
+
0170,Male,32,87,63
|
172 |
+
0171,Male,40,87,13
|
173 |
+
0172,Male,28,87,75
|
174 |
+
0173,Male,36,87,10
|
175 |
+
0174,Male,36,87,92
|
176 |
+
0175,Female,52,88,13
|
177 |
+
0176,Female,30,88,86
|
178 |
+
0177,Male,58,88,15
|
179 |
+
0178,Male,27,88,69
|
180 |
+
0179,Male,59,93,14
|
181 |
+
0180,Male,35,93,90
|
182 |
+
0181,Female,37,97,32
|
183 |
+
0182,Female,32,97,86
|
184 |
+
0183,Male,46,98,15
|
185 |
+
0184,Female,29,98,88
|
186 |
+
0185,Female,41,99,39
|
187 |
+
0186,Male,30,99,97
|
188 |
+
0187,Female,54,101,24
|
189 |
+
0188,Male,28,101,68
|
190 |
+
0189,Female,41,103,17
|
191 |
+
0190,Female,36,103,85
|
192 |
+
0191,Female,34,103,23
|
193 |
+
0192,Female,32,103,69
|
194 |
+
0193,Male,33,113,8
|
195 |
+
0194,Female,38,113,91
|
196 |
+
0195,Female,47,120,16
|
197 |
+
0196,Female,35,120,79
|
198 |
+
0197,Female,45,126,28
|
199 |
+
0198,Male,32,126,74
|
200 |
+
0199,Male,32,137,18
|
201 |
+
0200,Male,30,137,83
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib==3.8.0
|
2 |
+
numpy
|
3 |
+
pandas==2.2.3
|
4 |
+
scikit_learn==1.2.2
|
5 |
+
seaborn==0.13.2
|
6 |
+
streamlit==1.30.0
|