initial commit
Browse files- Mall_Customers.csv +201 -0
- app.py +116 -0
- requirements.txt +5 -0
Mall_Customers.csv
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
|
2 |
+
1,Male,19,15,39
|
3 |
+
2,Male,21,15,81
|
4 |
+
3,Female,20,16,6
|
5 |
+
4,Female,23,16,77
|
6 |
+
5,Female,31,17,40
|
7 |
+
6,Female,22,17,76
|
8 |
+
7,Female,35,18,6
|
9 |
+
8,Female,23,18,94
|
10 |
+
9,Male,64,19,3
|
11 |
+
10,Female,30,19,72
|
12 |
+
11,Male,67,19,14
|
13 |
+
12,Female,35,19,99
|
14 |
+
13,Female,58,20,15
|
15 |
+
14,Female,24,20,77
|
16 |
+
15,Male,37,20,13
|
17 |
+
16,Male,22,20,79
|
18 |
+
17,Female,35,21,35
|
19 |
+
18,Male,20,21,66
|
20 |
+
19,Male,52,23,29
|
21 |
+
20,Female,35,23,98
|
22 |
+
21,Male,35,24,35
|
23 |
+
22,Male,25,24,73
|
24 |
+
23,Female,46,25,5
|
25 |
+
24,Male,31,25,73
|
26 |
+
25,Female,54,28,14
|
27 |
+
26,Male,29,28,82
|
28 |
+
27,Female,45,28,32
|
29 |
+
28,Male,35,28,61
|
30 |
+
29,Female,40,29,31
|
31 |
+
30,Female,23,29,87
|
32 |
+
31,Male,60,30,4
|
33 |
+
32,Female,21,30,73
|
34 |
+
33,Male,53,33,4
|
35 |
+
34,Male,18,33,92
|
36 |
+
35,Female,49,33,14
|
37 |
+
36,Female,21,33,81
|
38 |
+
37,Female,42,34,17
|
39 |
+
38,Female,30,34,73
|
40 |
+
39,Female,36,37,26
|
41 |
+
40,Female,20,37,75
|
42 |
+
41,Female,65,38,35
|
43 |
+
42,Male,24,38,92
|
44 |
+
43,Male,48,39,36
|
45 |
+
44,Female,31,39,61
|
46 |
+
45,Female,49,39,28
|
47 |
+
46,Female,24,39,65
|
48 |
+
47,Female,50,40,55
|
49 |
+
48,Female,27,40,47
|
50 |
+
49,Female,29,40,42
|
51 |
+
50,Female,31,40,42
|
52 |
+
51,Female,49,42,52
|
53 |
+
52,Male,33,42,60
|
54 |
+
53,Female,31,43,54
|
55 |
+
54,Male,59,43,60
|
56 |
+
55,Female,50,43,45
|
57 |
+
56,Male,47,43,41
|
58 |
+
57,Female,51,44,50
|
59 |
+
58,Male,69,44,46
|
60 |
+
59,Female,27,46,51
|
61 |
+
60,Male,53,46,46
|
62 |
+
61,Male,70,46,56
|
63 |
+
62,Male,19,46,55
|
64 |
+
63,Female,67,47,52
|
65 |
+
64,Female,54,47,59
|
66 |
+
65,Male,63,48,51
|
67 |
+
66,Male,18,48,59
|
68 |
+
67,Female,43,48,50
|
69 |
+
68,Female,68,48,48
|
70 |
+
69,Male,19,48,59
|
71 |
+
70,Female,32,48,47
|
72 |
+
71,Male,70,49,55
|
73 |
+
72,Female,47,49,42
|
74 |
+
73,Female,60,50,49
|
75 |
+
74,Female,60,50,56
|
76 |
+
75,Male,59,54,47
|
77 |
+
76,Male,26,54,54
|
78 |
+
77,Female,45,54,53
|
79 |
+
78,Male,40,54,48
|
80 |
+
79,Female,23,54,52
|
81 |
+
80,Female,49,54,42
|
82 |
+
81,Male,57,54,51
|
83 |
+
82,Male,38,54,55
|
84 |
+
83,Male,67,54,41
|
85 |
+
84,Female,46,54,44
|
86 |
+
85,Female,21,54,57
|
87 |
+
86,Male,48,54,46
|
88 |
+
87,Female,55,57,58
|
89 |
+
88,Female,22,57,55
|
90 |
+
89,Female,34,58,60
|
91 |
+
90,Female,50,58,46
|
92 |
+
91,Female,68,59,55
|
93 |
+
92,Male,18,59,41
|
94 |
+
93,Male,48,60,49
|
95 |
+
94,Female,40,60,40
|
96 |
+
95,Female,32,60,42
|
97 |
+
96,Male,24,60,52
|
98 |
+
97,Female,47,60,47
|
99 |
+
98,Female,27,60,50
|
100 |
+
99,Male,48,61,42
|
101 |
+
100,Male,20,61,49
|
102 |
+
101,Female,23,62,41
|
103 |
+
102,Female,49,62,48
|
104 |
+
103,Male,67,62,59
|
105 |
+
104,Male,26,62,55
|
106 |
+
105,Male,49,62,56
|
107 |
+
106,Female,21,62,42
|
108 |
+
107,Female,66,63,50
|
109 |
+
108,Male,54,63,46
|
110 |
+
109,Male,68,63,43
|
111 |
+
110,Male,66,63,48
|
112 |
+
111,Male,65,63,52
|
113 |
+
112,Female,19,63,54
|
114 |
+
113,Female,38,64,42
|
115 |
+
114,Male,19,64,46
|
116 |
+
115,Female,18,65,48
|
117 |
+
116,Female,19,65,50
|
118 |
+
117,Female,63,65,43
|
119 |
+
118,Female,49,65,59
|
120 |
+
119,Female,51,67,43
|
121 |
+
120,Female,50,67,57
|
122 |
+
121,Male,27,67,56
|
123 |
+
122,Female,38,67,40
|
124 |
+
123,Female,40,69,58
|
125 |
+
124,Male,39,69,91
|
126 |
+
125,Female,23,70,29
|
127 |
+
126,Female,31,70,77
|
128 |
+
127,Male,43,71,35
|
129 |
+
128,Male,40,71,95
|
130 |
+
129,Male,59,71,11
|
131 |
+
130,Male,38,71,75
|
132 |
+
131,Male,47,71,9
|
133 |
+
132,Male,39,71,75
|
134 |
+
133,Female,25,72,34
|
135 |
+
134,Female,31,72,71
|
136 |
+
135,Male,20,73,5
|
137 |
+
136,Female,29,73,88
|
138 |
+
137,Female,44,73,7
|
139 |
+
138,Male,32,73,73
|
140 |
+
139,Male,19,74,10
|
141 |
+
140,Female,35,74,72
|
142 |
+
141,Female,57,75,5
|
143 |
+
142,Male,32,75,93
|
144 |
+
143,Female,28,76,40
|
145 |
+
144,Female,32,76,87
|
146 |
+
145,Male,25,77,12
|
147 |
+
146,Male,28,77,97
|
148 |
+
147,Male,48,77,36
|
149 |
+
148,Female,32,77,74
|
150 |
+
149,Female,34,78,22
|
151 |
+
150,Male,34,78,90
|
152 |
+
151,Male,43,78,17
|
153 |
+
152,Male,39,78,88
|
154 |
+
153,Female,44,78,20
|
155 |
+
154,Female,38,78,76
|
156 |
+
155,Female,47,78,16
|
157 |
+
156,Female,27,78,89
|
158 |
+
157,Male,37,78,1
|
159 |
+
158,Female,30,78,78
|
160 |
+
159,Male,34,78,1
|
161 |
+
160,Female,30,78,73
|
162 |
+
161,Female,56,79,35
|
163 |
+
162,Female,29,79,83
|
164 |
+
163,Male,19,81,5
|
165 |
+
164,Female,31,81,93
|
166 |
+
165,Male,50,85,26
|
167 |
+
166,Female,36,85,75
|
168 |
+
167,Male,42,86,20
|
169 |
+
168,Female,33,86,95
|
170 |
+
169,Female,36,87,27
|
171 |
+
170,Male,32,87,63
|
172 |
+
171,Male,40,87,13
|
173 |
+
172,Male,28,87,75
|
174 |
+
173,Male,36,87,10
|
175 |
+
174,Male,36,87,92
|
176 |
+
175,Female,52,88,13
|
177 |
+
176,Female,30,88,86
|
178 |
+
177,Male,58,88,15
|
179 |
+
178,Male,27,88,69
|
180 |
+
179,Male,59,93,14
|
181 |
+
180,Male,35,93,90
|
182 |
+
181,Female,37,97,32
|
183 |
+
182,Female,32,97,86
|
184 |
+
183,Male,46,98,15
|
185 |
+
184,Female,29,98,88
|
186 |
+
185,Female,41,99,39
|
187 |
+
186,Male,30,99,97
|
188 |
+
187,Female,54,101,24
|
189 |
+
188,Male,28,101,68
|
190 |
+
189,Female,41,103,17
|
191 |
+
190,Female,36,103,85
|
192 |
+
191,Female,34,103,23
|
193 |
+
192,Female,32,103,69
|
194 |
+
193,Male,33,113,8
|
195 |
+
194,Female,38,113,91
|
196 |
+
195,Female,47,120,16
|
197 |
+
196,Female,35,120,79
|
198 |
+
197,Female,45,126,28
|
199 |
+
198,Male,32,126,74
|
200 |
+
199,Male,32,137,18
|
201 |
+
200,Male,30,137,83
|
app.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
from sklearn.cluster import KMeans
|
6 |
+
from sklearn.preprocessing import StandardScaler
|
7 |
+
|
8 |
+
# Load dataset
|
9 |
+
@st.cache_data
|
10 |
+
def load_data():
|
11 |
+
file_path = "Mall_Customers.csv"
|
12 |
+
df = pd.read_csv(file_path)
|
13 |
+
return df
|
14 |
+
|
15 |
+
df = load_data()
|
16 |
+
|
17 |
+
# Sidebar for navigation
|
18 |
+
st.sidebar.title("K-Means Clustering App")
|
19 |
+
section = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "K-Means Model", "Test Model"])
|
20 |
+
|
21 |
+
if section == "Introduction":
|
22 |
+
st.title("Introduction to K-Means Clustering")
|
23 |
+
|
24 |
+
st.write("### About the Model")
|
25 |
+
st.write("K-Means Clustering is an unsupervised machine learning algorithm used for customer segmentation. It helps identify different groups of customers based on their spending behavior and income.")
|
26 |
+
|
27 |
+
st.write("### About the Dataset")
|
28 |
+
st.write("The dataset consists of customer information, including:")
|
29 |
+
st.markdown("- **CustomerID**: Unique identifier for each customer.")
|
30 |
+
st.markdown("- **Gender**: Male or Female.")
|
31 |
+
st.markdown("- **Age**: Age of the customer.")
|
32 |
+
st.markdown("- **Annual Income (k$)**: Customer's yearly income.")
|
33 |
+
st.markdown("- **Spending Score (1-100)**: A score assigned based on spending behavior.")
|
34 |
+
|
35 |
+
st.write("### How to Use the App")
|
36 |
+
st.markdown("1. **Go to 'Data Exploration'**: Understand the dataset using statistics and visualizations.")
|
37 |
+
st.markdown("2. **Go to 'K-Means Model'**: Train the model and visualize clusters.")
|
38 |
+
st.markdown("3. **Go to 'Test Model'**: Input values to predict customer cluster.")
|
39 |
+
|
40 |
+
st.write("### Insights")
|
41 |
+
st.markdown("- Customers can be grouped into different segments based on their income and spending habits.")
|
42 |
+
st.markdown("- The Elbow Method helps determine the optimal number of clusters.")
|
43 |
+
st.markdown("- Businesses can use these insights to tailor marketing strategies and improve customer engagement.")
|
44 |
+
|
45 |
+
elif section == "Data Exploration":
|
46 |
+
st.title("Data Exploration")
|
47 |
+
st.write("### First 5 rows of dataset")
|
48 |
+
st.dataframe(df.head())
|
49 |
+
|
50 |
+
st.write("### Summary Statistics")
|
51 |
+
st.write(df.describe())
|
52 |
+
|
53 |
+
st.write("### Pairplot")
|
54 |
+
sns.pairplot(df.drop(columns=["CustomerID", "Gender"]), diag_kind="kde")
|
55 |
+
st.pyplot()
|
56 |
+
|
57 |
+
st.write("### Correlation Heatmap")
|
58 |
+
plt.figure(figsize=(8, 6))
|
59 |
+
sns.heatmap(df.drop(columns=["CustomerID", "Gender"]).corr(), annot=True, cmap="coolwarm")
|
60 |
+
st.pyplot()
|
61 |
+
|
62 |
+
elif section == "K-Means Model":
|
63 |
+
st.title("K-Means Clustering")
|
64 |
+
|
65 |
+
# Selecting features for clustering
|
66 |
+
features = df[["Annual Income (k$)", "Spending Score (1-100)"]]
|
67 |
+
scaler = StandardScaler()
|
68 |
+
scaled_features = scaler.fit_transform(features)
|
69 |
+
|
70 |
+
# Finding the optimal number of clusters using Elbow Method
|
71 |
+
st.write("### Elbow Method")
|
72 |
+
inertia = []
|
73 |
+
for k in range(1, 11):
|
74 |
+
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
|
75 |
+
kmeans.fit(scaled_features)
|
76 |
+
inertia.append(kmeans.inertia_)
|
77 |
+
|
78 |
+
plt.figure(figsize=(8, 5))
|
79 |
+
plt.plot(range(1, 11), inertia, marker='o')
|
80 |
+
plt.xlabel('Number of Clusters')
|
81 |
+
plt.ylabel('Inertia')
|
82 |
+
plt.title('Elbow Method for Optimal k')
|
83 |
+
st.pyplot()
|
84 |
+
|
85 |
+
# Train K-Means Model
|
86 |
+
k = st.slider("Select Number of Clusters", 2, 10, 5)
|
87 |
+
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
|
88 |
+
df['Cluster'] = kmeans.fit_predict(scaled_features)
|
89 |
+
|
90 |
+
st.write("### Clustered Data")
|
91 |
+
st.dataframe(df)
|
92 |
+
|
93 |
+
# Visualization of clusters
|
94 |
+
plt.figure(figsize=(8, 6))
|
95 |
+
sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df['Cluster'], palette='viridis')
|
96 |
+
plt.xlabel("Annual Income (k$)")
|
97 |
+
plt.ylabel("Spending Score (1-100)")
|
98 |
+
plt.title("Customer Segmentation using K-Means")
|
99 |
+
st.pyplot()
|
100 |
+
|
101 |
+
# Store the model and scaler globally
|
102 |
+
st.session_state['scaler'] = scaler
|
103 |
+
st.session_state['kmeans'] = kmeans
|
104 |
+
|
105 |
+
elif section == "Test Model":
|
106 |
+
st.title("Test K-Means Model")
|
107 |
+
|
108 |
+
income = st.number_input("Enter Annual Income (k$)", min_value=0, max_value=200, value=50)
|
109 |
+
score = st.number_input("Enter Spending Score (1-100)", min_value=1, max_value=100, value=50)
|
110 |
+
|
111 |
+
if 'scaler' in st.session_state and 'kmeans' in st.session_state:
|
112 |
+
input_data = st.session_state['scaler'].transform([[income, score]])
|
113 |
+
prediction = st.session_state['kmeans'].predict(input_data)
|
114 |
+
st.write(f"### Predicted Cluster: {prediction[0]}")
|
115 |
+
else:
|
116 |
+
st.write("### Please run the K-Means Model section first.")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
matplotlib
|
4 |
+
seaborn
|
5 |
+
scikit-learn
|