mrciomnl commited on
Commit
a487e3e
·
1 Parent(s): c087424

initial commit

Browse files
Files changed (3) hide show
  1. Mall_Customers.csv +201 -0
  2. app.py +116 -0
  3. requirements.txt +5 -0
Mall_Customers.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
2
+ 1,Male,19,15,39
3
+ 2,Male,21,15,81
4
+ 3,Female,20,16,6
5
+ 4,Female,23,16,77
6
+ 5,Female,31,17,40
7
+ 6,Female,22,17,76
8
+ 7,Female,35,18,6
9
+ 8,Female,23,18,94
10
+ 9,Male,64,19,3
11
+ 10,Female,30,19,72
12
+ 11,Male,67,19,14
13
+ 12,Female,35,19,99
14
+ 13,Female,58,20,15
15
+ 14,Female,24,20,77
16
+ 15,Male,37,20,13
17
+ 16,Male,22,20,79
18
+ 17,Female,35,21,35
19
+ 18,Male,20,21,66
20
+ 19,Male,52,23,29
21
+ 20,Female,35,23,98
22
+ 21,Male,35,24,35
23
+ 22,Male,25,24,73
24
+ 23,Female,46,25,5
25
+ 24,Male,31,25,73
26
+ 25,Female,54,28,14
27
+ 26,Male,29,28,82
28
+ 27,Female,45,28,32
29
+ 28,Male,35,28,61
30
+ 29,Female,40,29,31
31
+ 30,Female,23,29,87
32
+ 31,Male,60,30,4
33
+ 32,Female,21,30,73
34
+ 33,Male,53,33,4
35
+ 34,Male,18,33,92
36
+ 35,Female,49,33,14
37
+ 36,Female,21,33,81
38
+ 37,Female,42,34,17
39
+ 38,Female,30,34,73
40
+ 39,Female,36,37,26
41
+ 40,Female,20,37,75
42
+ 41,Female,65,38,35
43
+ 42,Male,24,38,92
44
+ 43,Male,48,39,36
45
+ 44,Female,31,39,61
46
+ 45,Female,49,39,28
47
+ 46,Female,24,39,65
48
+ 47,Female,50,40,55
49
+ 48,Female,27,40,47
50
+ 49,Female,29,40,42
51
+ 50,Female,31,40,42
52
+ 51,Female,49,42,52
53
+ 52,Male,33,42,60
54
+ 53,Female,31,43,54
55
+ 54,Male,59,43,60
56
+ 55,Female,50,43,45
57
+ 56,Male,47,43,41
58
+ 57,Female,51,44,50
59
+ 58,Male,69,44,46
60
+ 59,Female,27,46,51
61
+ 60,Male,53,46,46
62
+ 61,Male,70,46,56
63
+ 62,Male,19,46,55
64
+ 63,Female,67,47,52
65
+ 64,Female,54,47,59
66
+ 65,Male,63,48,51
67
+ 66,Male,18,48,59
68
+ 67,Female,43,48,50
69
+ 68,Female,68,48,48
70
+ 69,Male,19,48,59
71
+ 70,Female,32,48,47
72
+ 71,Male,70,49,55
73
+ 72,Female,47,49,42
74
+ 73,Female,60,50,49
75
+ 74,Female,60,50,56
76
+ 75,Male,59,54,47
77
+ 76,Male,26,54,54
78
+ 77,Female,45,54,53
79
+ 78,Male,40,54,48
80
+ 79,Female,23,54,52
81
+ 80,Female,49,54,42
82
+ 81,Male,57,54,51
83
+ 82,Male,38,54,55
84
+ 83,Male,67,54,41
85
+ 84,Female,46,54,44
86
+ 85,Female,21,54,57
87
+ 86,Male,48,54,46
88
+ 87,Female,55,57,58
89
+ 88,Female,22,57,55
90
+ 89,Female,34,58,60
91
+ 90,Female,50,58,46
92
+ 91,Female,68,59,55
93
+ 92,Male,18,59,41
94
+ 93,Male,48,60,49
95
+ 94,Female,40,60,40
96
+ 95,Female,32,60,42
97
+ 96,Male,24,60,52
98
+ 97,Female,47,60,47
99
+ 98,Female,27,60,50
100
+ 99,Male,48,61,42
101
+ 100,Male,20,61,49
102
+ 101,Female,23,62,41
103
+ 102,Female,49,62,48
104
+ 103,Male,67,62,59
105
+ 104,Male,26,62,55
106
+ 105,Male,49,62,56
107
+ 106,Female,21,62,42
108
+ 107,Female,66,63,50
109
+ 108,Male,54,63,46
110
+ 109,Male,68,63,43
111
+ 110,Male,66,63,48
112
+ 111,Male,65,63,52
113
+ 112,Female,19,63,54
114
+ 113,Female,38,64,42
115
+ 114,Male,19,64,46
116
+ 115,Female,18,65,48
117
+ 116,Female,19,65,50
118
+ 117,Female,63,65,43
119
+ 118,Female,49,65,59
120
+ 119,Female,51,67,43
121
+ 120,Female,50,67,57
122
+ 121,Male,27,67,56
123
+ 122,Female,38,67,40
124
+ 123,Female,40,69,58
125
+ 124,Male,39,69,91
126
+ 125,Female,23,70,29
127
+ 126,Female,31,70,77
128
+ 127,Male,43,71,35
129
+ 128,Male,40,71,95
130
+ 129,Male,59,71,11
131
+ 130,Male,38,71,75
132
+ 131,Male,47,71,9
133
+ 132,Male,39,71,75
134
+ 133,Female,25,72,34
135
+ 134,Female,31,72,71
136
+ 135,Male,20,73,5
137
+ 136,Female,29,73,88
138
+ 137,Female,44,73,7
139
+ 138,Male,32,73,73
140
+ 139,Male,19,74,10
141
+ 140,Female,35,74,72
142
+ 141,Female,57,75,5
143
+ 142,Male,32,75,93
144
+ 143,Female,28,76,40
145
+ 144,Female,32,76,87
146
+ 145,Male,25,77,12
147
+ 146,Male,28,77,97
148
+ 147,Male,48,77,36
149
+ 148,Female,32,77,74
150
+ 149,Female,34,78,22
151
+ 150,Male,34,78,90
152
+ 151,Male,43,78,17
153
+ 152,Male,39,78,88
154
+ 153,Female,44,78,20
155
+ 154,Female,38,78,76
156
+ 155,Female,47,78,16
157
+ 156,Female,27,78,89
158
+ 157,Male,37,78,1
159
+ 158,Female,30,78,78
160
+ 159,Male,34,78,1
161
+ 160,Female,30,78,73
162
+ 161,Female,56,79,35
163
+ 162,Female,29,79,83
164
+ 163,Male,19,81,5
165
+ 164,Female,31,81,93
166
+ 165,Male,50,85,26
167
+ 166,Female,36,85,75
168
+ 167,Male,42,86,20
169
+ 168,Female,33,86,95
170
+ 169,Female,36,87,27
171
+ 170,Male,32,87,63
172
+ 171,Male,40,87,13
173
+ 172,Male,28,87,75
174
+ 173,Male,36,87,10
175
+ 174,Male,36,87,92
176
+ 175,Female,52,88,13
177
+ 176,Female,30,88,86
178
+ 177,Male,58,88,15
179
+ 178,Male,27,88,69
180
+ 179,Male,59,93,14
181
+ 180,Male,35,93,90
182
+ 181,Female,37,97,32
183
+ 182,Female,32,97,86
184
+ 183,Male,46,98,15
185
+ 184,Female,29,98,88
186
+ 185,Female,41,99,39
187
+ 186,Male,30,99,97
188
+ 187,Female,54,101,24
189
+ 188,Male,28,101,68
190
+ 189,Female,41,103,17
191
+ 190,Female,36,103,85
192
+ 191,Female,34,103,23
193
+ 192,Female,32,103,69
194
+ 193,Male,33,113,8
195
+ 194,Female,38,113,91
196
+ 195,Female,47,120,16
197
+ 196,Female,35,120,79
198
+ 197,Female,45,126,28
199
+ 198,Male,32,126,74
200
+ 199,Male,32,137,18
201
+ 200,Male,30,137,83
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from sklearn.cluster import KMeans
6
+ from sklearn.preprocessing import StandardScaler
7
+
8
+ # Load dataset
9
+ @st.cache_data
10
+ def load_data():
11
+ file_path = "Mall_Customers.csv"
12
+ df = pd.read_csv(file_path)
13
+ return df
14
+
15
+ df = load_data()
16
+
17
+ # Sidebar for navigation
18
+ st.sidebar.title("K-Means Clustering App")
19
+ section = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "K-Means Model", "Test Model"])
20
+
21
+ if section == "Introduction":
22
+ st.title("Introduction to K-Means Clustering")
23
+
24
+ st.write("### About the Model")
25
+ st.write("K-Means Clustering is an unsupervised machine learning algorithm used for customer segmentation. It helps identify different groups of customers based on their spending behavior and income.")
26
+
27
+ st.write("### About the Dataset")
28
+ st.write("The dataset consists of customer information, including:")
29
+ st.markdown("- **CustomerID**: Unique identifier for each customer.")
30
+ st.markdown("- **Gender**: Male or Female.")
31
+ st.markdown("- **Age**: Age of the customer.")
32
+ st.markdown("- **Annual Income (k$)**: Customer's yearly income.")
33
+ st.markdown("- **Spending Score (1-100)**: A score assigned based on spending behavior.")
34
+
35
+ st.write("### How to Use the App")
36
+ st.markdown("1. **Go to 'Data Exploration'**: Understand the dataset using statistics and visualizations.")
37
+ st.markdown("2. **Go to 'K-Means Model'**: Train the model and visualize clusters.")
38
+ st.markdown("3. **Go to 'Test Model'**: Input values to predict customer cluster.")
39
+
40
+ st.write("### Insights")
41
+ st.markdown("- Customers can be grouped into different segments based on their income and spending habits.")
42
+ st.markdown("- The Elbow Method helps determine the optimal number of clusters.")
43
+ st.markdown("- Businesses can use these insights to tailor marketing strategies and improve customer engagement.")
44
+
45
+ elif section == "Data Exploration":
46
+ st.title("Data Exploration")
47
+ st.write("### First 5 rows of dataset")
48
+ st.dataframe(df.head())
49
+
50
+ st.write("### Summary Statistics")
51
+ st.write(df.describe())
52
+
53
+ st.write("### Pairplot")
54
+ sns.pairplot(df.drop(columns=["CustomerID", "Gender"]), diag_kind="kde")
55
+ st.pyplot()
56
+
57
+ st.write("### Correlation Heatmap")
58
+ plt.figure(figsize=(8, 6))
59
+ sns.heatmap(df.drop(columns=["CustomerID", "Gender"]).corr(), annot=True, cmap="coolwarm")
60
+ st.pyplot()
61
+
62
+ elif section == "K-Means Model":
63
+ st.title("K-Means Clustering")
64
+
65
+ # Selecting features for clustering
66
+ features = df[["Annual Income (k$)", "Spending Score (1-100)"]]
67
+ scaler = StandardScaler()
68
+ scaled_features = scaler.fit_transform(features)
69
+
70
+ # Finding the optimal number of clusters using Elbow Method
71
+ st.write("### Elbow Method")
72
+ inertia = []
73
+ for k in range(1, 11):
74
+ kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
75
+ kmeans.fit(scaled_features)
76
+ inertia.append(kmeans.inertia_)
77
+
78
+ plt.figure(figsize=(8, 5))
79
+ plt.plot(range(1, 11), inertia, marker='o')
80
+ plt.xlabel('Number of Clusters')
81
+ plt.ylabel('Inertia')
82
+ plt.title('Elbow Method for Optimal k')
83
+ st.pyplot()
84
+
85
+ # Train K-Means Model
86
+ k = st.slider("Select Number of Clusters", 2, 10, 5)
87
+ kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
88
+ df['Cluster'] = kmeans.fit_predict(scaled_features)
89
+
90
+ st.write("### Clustered Data")
91
+ st.dataframe(df)
92
+
93
+ # Visualization of clusters
94
+ plt.figure(figsize=(8, 6))
95
+ sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df['Cluster'], palette='viridis')
96
+ plt.xlabel("Annual Income (k$)")
97
+ plt.ylabel("Spending Score (1-100)")
98
+ plt.title("Customer Segmentation using K-Means")
99
+ st.pyplot()
100
+
101
+ # Store the model and scaler globally
102
+ st.session_state['scaler'] = scaler
103
+ st.session_state['kmeans'] = kmeans
104
+
105
+ elif section == "Test Model":
106
+ st.title("Test K-Means Model")
107
+
108
+ income = st.number_input("Enter Annual Income (k$)", min_value=0, max_value=200, value=50)
109
+ score = st.number_input("Enter Spending Score (1-100)", min_value=1, max_value=100, value=50)
110
+
111
+ if 'scaler' in st.session_state and 'kmeans' in st.session_state:
112
+ input_data = st.session_state['scaler'].transform([[income, score]])
113
+ prediction = st.session_state['kmeans'].predict(input_data)
114
+ st.write(f"### Predicted Cluster: {prediction[0]}")
115
+ else:
116
+ st.write("### Please run the K-Means Model section first.")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ matplotlib
4
+ seaborn
5
+ scikit-learn