allantacuelwvsu commited on
Commit
322abb6
·
1 Parent(s): 0409b2d

uploading initial files

Browse files
Files changed (4) hide show
  1. app.ipynb +0 -0
  2. app.py +85 -0
  3. datasets/Mall_Customers.csv +201 -0
  4. requirements.txt +6 -0
app.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.cluster import KMeans
7
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
8
+ from sklearn.metrics import silhouette_score
9
+
10
+ # Load dataset
11
+ df = pd.read_csv("datasets/Mall_Customers.csv")
12
+ df.drop(columns=['CustomerID'], inplace=True)
13
+
14
+ # Encode Gender
15
+ le = LabelEncoder()
16
+ df['Genre'] = le.fit_transform(df['Genre'])
17
+
18
+ # Standardize numerical columns
19
+ scaler = StandardScaler()
20
+ df_scaled = scaler.fit_transform(df)
21
+
22
+ # Optimal K estimation
23
+ k_optimal = 5
24
+ kmeans = KMeans(n_clusters=k_optimal, init='k-means++', random_state=42)
25
+ kmeans.fit(df_scaled)
26
+ df['Cluster'] = kmeans.labels_
27
+
28
+ sil_score = silhouette_score(df_scaled, kmeans.labels_)
29
+
30
+ # Streamlit App
31
+ st.title("Customer Segmentation using K-Means")
32
+ st.caption("Dataset: Mall Customers")
33
+
34
+ tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Customer Predictor"])
35
+
36
+ with tab1:
37
+ st.header("Model Performance")
38
+ st.write(f"**Silhouette Score:** {sil_score:.4f}")
39
+
40
+ wcss = []
41
+ k_values = range(1, 11)
42
+ for k in k_values:
43
+ kmeans_temp = KMeans(n_clusters=k, init='k-means++', random_state=42)
44
+ kmeans_temp.fit(df_scaled)
45
+ wcss.append(kmeans_temp.inertia_)
46
+
47
+ fig, ax = plt.subplots()
48
+ ax.plot(k_values, wcss, marker='o', linestyle='--')
49
+ ax.set_xlabel('Number of Clusters (K)')
50
+ ax.set_ylabel('WCSS')
51
+ ax.set_title('Elbow Method for Optimal K')
52
+ st.pyplot(fig)
53
+
54
+ with tab2:
55
+ st.header("Dataset")
56
+ st.dataframe(df.head())
57
+
58
+ fig, ax = plt.subplots(figsize=(8, 6))
59
+ sns.heatmap(df.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax)
60
+ ax.set_title("Correlation Matrix")
61
+ st.pyplot(fig)
62
+
63
+ with tab3:
64
+ st.header("Customer Segment Prediction")
65
+
66
+ annual_income = st.slider("Annual Income (k$)", min_value=int(df['Annual Income (k$)'].min()), max_value=int(df['Annual Income (k$)'].max()), value=50)
67
+ spending_score = st.slider("Spending Score (1-100)", min_value=int(df['Spending Score (1-100)'].min()), max_value=int(df['Spending Score (1-100)'].max()), value=50)
68
+ gender = st.radio("Gender", ["Male", "Female"], index=0)
69
+ gender_encoded = 1 if gender == "Female" else 0
70
+
71
+ input_data = np.array([[gender_encoded, annual_income, spending_score]])
72
+ input_scaled = scaler.transform(np.hstack((input_data, np.zeros((input_data.shape[0], 1))))) # Ensure same feature shape
73
+ cluster_prediction = kmeans.predict(input_scaled)[0]
74
+
75
+ st.subheader("Predicted Customer Segment")
76
+ st.markdown(f"<h1 style='color:blue;'>{cluster_prediction}</h1>", unsafe_allow_html=True)
77
+
78
+ fig, ax = plt.subplots()
79
+ sns.scatterplot(x=df['Annual Income (k$)'], y=df['Spending Score (1-100)'], hue=df['Cluster'], palette='viridis', alpha=0.6)
80
+ ax.scatter(annual_income, spending_score, color='red', s=100, edgecolors='black', label='Input Customer')
81
+ ax.set_xlabel('Annual Income (k$)')
82
+ ax.set_ylabel('Spending Score')
83
+ ax.legend()
84
+ st.pyplot(fig)
85
+ st.divider()
datasets/Mall_Customers.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
2
+ 0001,Male,19,15,39
3
+ 0002,Male,21,15,81
4
+ 0003,Female,20,16,6
5
+ 0004,Female,23,16,77
6
+ 0005,Female,31,17,40
7
+ 0006,Female,22,17,76
8
+ 0007,Female,35,18,6
9
+ 0008,Female,23,18,94
10
+ 0009,Male,64,19,3
11
+ 0010,Female,30,19,72
12
+ 0011,Male,67,19,14
13
+ 0012,Female,35,19,99
14
+ 0013,Female,58,20,15
15
+ 0014,Female,24,20,77
16
+ 0015,Male,37,20,13
17
+ 0016,Male,22,20,79
18
+ 0017,Female,35,21,35
19
+ 0018,Male,20,21,66
20
+ 0019,Male,52,23,29
21
+ 0020,Female,35,23,98
22
+ 0021,Male,35,24,35
23
+ 0022,Male,25,24,73
24
+ 0023,Female,46,25,5
25
+ 0024,Male,31,25,73
26
+ 0025,Female,54,28,14
27
+ 0026,Male,29,28,82
28
+ 0027,Female,45,28,32
29
+ 0028,Male,35,28,61
30
+ 0029,Female,40,29,31
31
+ 0030,Female,23,29,87
32
+ 0031,Male,60,30,4
33
+ 0032,Female,21,30,73
34
+ 0033,Male,53,33,4
35
+ 0034,Male,18,33,92
36
+ 0035,Female,49,33,14
37
+ 0036,Female,21,33,81
38
+ 0037,Female,42,34,17
39
+ 0038,Female,30,34,73
40
+ 0039,Female,36,37,26
41
+ 0040,Female,20,37,75
42
+ 0041,Female,65,38,35
43
+ 0042,Male,24,38,92
44
+ 0043,Male,48,39,36
45
+ 0044,Female,31,39,61
46
+ 0045,Female,49,39,28
47
+ 0046,Female,24,39,65
48
+ 0047,Female,50,40,55
49
+ 0048,Female,27,40,47
50
+ 0049,Female,29,40,42
51
+ 0050,Female,31,40,42
52
+ 0051,Female,49,42,52
53
+ 0052,Male,33,42,60
54
+ 0053,Female,31,43,54
55
+ 0054,Male,59,43,60
56
+ 0055,Female,50,43,45
57
+ 0056,Male,47,43,41
58
+ 0057,Female,51,44,50
59
+ 0058,Male,69,44,46
60
+ 0059,Female,27,46,51
61
+ 0060,Male,53,46,46
62
+ 0061,Male,70,46,56
63
+ 0062,Male,19,46,55
64
+ 0063,Female,67,47,52
65
+ 0064,Female,54,47,59
66
+ 0065,Male,63,48,51
67
+ 0066,Male,18,48,59
68
+ 0067,Female,43,48,50
69
+ 0068,Female,68,48,48
70
+ 0069,Male,19,48,59
71
+ 0070,Female,32,48,47
72
+ 0071,Male,70,49,55
73
+ 0072,Female,47,49,42
74
+ 0073,Female,60,50,49
75
+ 0074,Female,60,50,56
76
+ 0075,Male,59,54,47
77
+ 0076,Male,26,54,54
78
+ 0077,Female,45,54,53
79
+ 0078,Male,40,54,48
80
+ 0079,Female,23,54,52
81
+ 0080,Female,49,54,42
82
+ 0081,Male,57,54,51
83
+ 0082,Male,38,54,55
84
+ 0083,Male,67,54,41
85
+ 0084,Female,46,54,44
86
+ 0085,Female,21,54,57
87
+ 0086,Male,48,54,46
88
+ 0087,Female,55,57,58
89
+ 0088,Female,22,57,55
90
+ 0089,Female,34,58,60
91
+ 0090,Female,50,58,46
92
+ 0091,Female,68,59,55
93
+ 0092,Male,18,59,41
94
+ 0093,Male,48,60,49
95
+ 0094,Female,40,60,40
96
+ 0095,Female,32,60,42
97
+ 0096,Male,24,60,52
98
+ 0097,Female,47,60,47
99
+ 0098,Female,27,60,50
100
+ 0099,Male,48,61,42
101
+ 0100,Male,20,61,49
102
+ 0101,Female,23,62,41
103
+ 0102,Female,49,62,48
104
+ 0103,Male,67,62,59
105
+ 0104,Male,26,62,55
106
+ 0105,Male,49,62,56
107
+ 0106,Female,21,62,42
108
+ 0107,Female,66,63,50
109
+ 0108,Male,54,63,46
110
+ 0109,Male,68,63,43
111
+ 0110,Male,66,63,48
112
+ 0111,Male,65,63,52
113
+ 0112,Female,19,63,54
114
+ 0113,Female,38,64,42
115
+ 0114,Male,19,64,46
116
+ 0115,Female,18,65,48
117
+ 0116,Female,19,65,50
118
+ 0117,Female,63,65,43
119
+ 0118,Female,49,65,59
120
+ 0119,Female,51,67,43
121
+ 0120,Female,50,67,57
122
+ 0121,Male,27,67,56
123
+ 0122,Female,38,67,40
124
+ 0123,Female,40,69,58
125
+ 0124,Male,39,69,91
126
+ 0125,Female,23,70,29
127
+ 0126,Female,31,70,77
128
+ 0127,Male,43,71,35
129
+ 0128,Male,40,71,95
130
+ 0129,Male,59,71,11
131
+ 0130,Male,38,71,75
132
+ 0131,Male,47,71,9
133
+ 0132,Male,39,71,75
134
+ 0133,Female,25,72,34
135
+ 0134,Female,31,72,71
136
+ 0135,Male,20,73,5
137
+ 0136,Female,29,73,88
138
+ 0137,Female,44,73,7
139
+ 0138,Male,32,73,73
140
+ 0139,Male,19,74,10
141
+ 0140,Female,35,74,72
142
+ 0141,Female,57,75,5
143
+ 0142,Male,32,75,93
144
+ 0143,Female,28,76,40
145
+ 0144,Female,32,76,87
146
+ 0145,Male,25,77,12
147
+ 0146,Male,28,77,97
148
+ 0147,Male,48,77,36
149
+ 0148,Female,32,77,74
150
+ 0149,Female,34,78,22
151
+ 0150,Male,34,78,90
152
+ 0151,Male,43,78,17
153
+ 0152,Male,39,78,88
154
+ 0153,Female,44,78,20
155
+ 0154,Female,38,78,76
156
+ 0155,Female,47,78,16
157
+ 0156,Female,27,78,89
158
+ 0157,Male,37,78,1
159
+ 0158,Female,30,78,78
160
+ 0159,Male,34,78,1
161
+ 0160,Female,30,78,73
162
+ 0161,Female,56,79,35
163
+ 0162,Female,29,79,83
164
+ 0163,Male,19,81,5
165
+ 0164,Female,31,81,93
166
+ 0165,Male,50,85,26
167
+ 0166,Female,36,85,75
168
+ 0167,Male,42,86,20
169
+ 0168,Female,33,86,95
170
+ 0169,Female,36,87,27
171
+ 0170,Male,32,87,63
172
+ 0171,Male,40,87,13
173
+ 0172,Male,28,87,75
174
+ 0173,Male,36,87,10
175
+ 0174,Male,36,87,92
176
+ 0175,Female,52,88,13
177
+ 0176,Female,30,88,86
178
+ 0177,Male,58,88,15
179
+ 0178,Male,27,88,69
180
+ 0179,Male,59,93,14
181
+ 0180,Male,35,93,90
182
+ 0181,Female,37,97,32
183
+ 0182,Female,32,97,86
184
+ 0183,Male,46,98,15
185
+ 0184,Female,29,98,88
186
+ 0185,Female,41,99,39
187
+ 0186,Male,30,99,97
188
+ 0187,Female,54,101,24
189
+ 0188,Male,28,101,68
190
+ 0189,Female,41,103,17
191
+ 0190,Female,36,103,85
192
+ 0191,Female,34,103,23
193
+ 0192,Female,32,103,69
194
+ 0193,Male,33,113,8
195
+ 0194,Female,38,113,91
196
+ 0195,Female,47,120,16
197
+ 0196,Female,35,120,79
198
+ 0197,Female,45,126,28
199
+ 0198,Male,32,126,74
200
+ 0199,Male,32,137,18
201
+ 0200,Male,30,137,83
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ matplotlib==3.8.0
2
+ numpy
3
+ pandas==2.2.3
4
+ scikit_learn==1.2.2
5
+ seaborn==0.13.2
6
+ streamlit==1.30.0