Spaces:
Sleeping
Sleeping
Upload 16 files
Browse files- colelithiasis_dataset.xlsx +0 -0
- encoder.pkl +3 -0
- gda.pkl +3 -0
- health_status_classification.ipynb +569 -0
- installation.ipynb +0 -0
- lr_model.pkl +3 -0
- main.py +314 -0
- requirements.txt +0 -0
- rf_boosted.pkl +3 -0
- rf_model.pkl +3 -0
- scaler.pkl +3 -0
- svm_model_linear.pkl +3 -0
- svm_model_poly.pkl +3 -0
- svm_model_rbf.pkl +3 -0
- test_data.xlsx +0 -0
- training.ipynb +0 -0
colelithiasis_dataset.xlsx
ADDED
Binary file (16.7 kB). View file
|
|
encoder.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f931d3e82fc7ab6868eb41036e2c84c14dc0efb7920b70d0a0a547f7e4975155
|
3 |
+
size 1604
|
gda.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1062eb7b2283f57c001ad6546390ac969fec7aa91d55182947db1307887cc256
|
3 |
+
size 3224
|
health_status_classification.ipynb
ADDED
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "7abd29b8",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Health Status Classification\n",
|
9 |
+
"\n",
|
10 |
+
"This notebook classifies individuals into \"Healthy\" or \"Patient\" categories using SVM and Random Forest classifiers. It includes:\n",
|
11 |
+
"- Data preprocessing\n",
|
12 |
+
"- Training of classifiers\n",
|
13 |
+
"- Comparison of performance metrics\n",
|
14 |
+
"- Visualization of results\n"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "markdown",
|
19 |
+
"id": "299604b4",
|
20 |
+
"metadata": {},
|
21 |
+
"source": [
|
22 |
+
"## Data Preprocessing"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "markdown",
|
27 |
+
"id": "22ee7ce2",
|
28 |
+
"metadata": {},
|
29 |
+
"source": [
|
30 |
+
"### 1. Import Dependecies"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"execution_count": 252,
|
36 |
+
"id": "76a44a0d",
|
37 |
+
"metadata": {},
|
38 |
+
"outputs": [],
|
39 |
+
"source": [
|
40 |
+
"import pandas as pd\n",
|
41 |
+
"from sklearn.model_selection import train_test_split\n",
|
42 |
+
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
43 |
+
"from sklearn.svm import SVC\n",
|
44 |
+
"from sklearn.ensemble import RandomForestClassifier\n",
|
45 |
+
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n",
|
46 |
+
"import matplotlib.pyplot as plt\n",
|
47 |
+
"import seaborn as sns"
|
48 |
+
]
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"cell_type": "markdown",
|
52 |
+
"id": "a27c6dc7",
|
53 |
+
"metadata": {},
|
54 |
+
"source": [
|
55 |
+
"### 2. Load Dataset"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cell_type": "code",
|
60 |
+
"execution_count": 253,
|
61 |
+
"id": "3772870e",
|
62 |
+
"metadata": {},
|
63 |
+
"outputs": [
|
64 |
+
{
|
65 |
+
"data": {
|
66 |
+
"text/html": [
|
67 |
+
"<div>\n",
|
68 |
+
"<style scoped>\n",
|
69 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
70 |
+
" vertical-align: middle;\n",
|
71 |
+
" }\n",
|
72 |
+
"\n",
|
73 |
+
" .dataframe tbody tr th {\n",
|
74 |
+
" vertical-align: top;\n",
|
75 |
+
" }\n",
|
76 |
+
"\n",
|
77 |
+
" .dataframe thead th {\n",
|
78 |
+
" text-align: right;\n",
|
79 |
+
" }\n",
|
80 |
+
"</style>\n",
|
81 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
82 |
+
" <thead>\n",
|
83 |
+
" <tr style=\"text-align: right;\">\n",
|
84 |
+
" <th></th>\n",
|
85 |
+
" <th>Patient No.</th>\n",
|
86 |
+
" <th>Gender</th>\n",
|
87 |
+
" <th>Age</th>\n",
|
88 |
+
" <th>Family history</th>\n",
|
89 |
+
" <th>Height</th>\n",
|
90 |
+
" <th>Weight</th>\n",
|
91 |
+
" <th>BMI</th>\n",
|
92 |
+
" <th>Obese/non obese</th>\n",
|
93 |
+
" <th>Cholesterol</th>\n",
|
94 |
+
" <th>Triglycerides level</th>\n",
|
95 |
+
" <th>HDL level</th>\n",
|
96 |
+
" <th>LDL level</th>\n",
|
97 |
+
" <th>VLDL level</th>\n",
|
98 |
+
" <th>Health_status</th>\n",
|
99 |
+
" </tr>\n",
|
100 |
+
" </thead>\n",
|
101 |
+
" <tbody>\n",
|
102 |
+
" <tr>\n",
|
103 |
+
" <th>0</th>\n",
|
104 |
+
" <td>1</td>\n",
|
105 |
+
" <td>Female</td>\n",
|
106 |
+
" <td>65</td>\n",
|
107 |
+
" <td>No</td>\n",
|
108 |
+
" <td>1.64</td>\n",
|
109 |
+
" <td>64</td>\n",
|
110 |
+
" <td>23.80</td>\n",
|
111 |
+
" <td>Non-obese</td>\n",
|
112 |
+
" <td>145</td>\n",
|
113 |
+
" <td>119</td>\n",
|
114 |
+
" <td>60</td>\n",
|
115 |
+
" <td>66.0</td>\n",
|
116 |
+
" <td>19.0</td>\n",
|
117 |
+
" <td>healthy</td>\n",
|
118 |
+
" </tr>\n",
|
119 |
+
" <tr>\n",
|
120 |
+
" <th>1</th>\n",
|
121 |
+
" <td>2</td>\n",
|
122 |
+
" <td>Female</td>\n",
|
123 |
+
" <td>50</td>\n",
|
124 |
+
" <td>Yes</td>\n",
|
125 |
+
" <td>1.70</td>\n",
|
126 |
+
" <td>70</td>\n",
|
127 |
+
" <td>24.22</td>\n",
|
128 |
+
" <td>Non-obese</td>\n",
|
129 |
+
" <td>220</td>\n",
|
130 |
+
" <td>107</td>\n",
|
131 |
+
" <td>69</td>\n",
|
132 |
+
" <td>134.0</td>\n",
|
133 |
+
" <td>17.0</td>\n",
|
134 |
+
" <td>healthy</td>\n",
|
135 |
+
" </tr>\n",
|
136 |
+
" <tr>\n",
|
137 |
+
" <th>2</th>\n",
|
138 |
+
" <td>3</td>\n",
|
139 |
+
" <td>Female</td>\n",
|
140 |
+
" <td>45</td>\n",
|
141 |
+
" <td>No</td>\n",
|
142 |
+
" <td>1.67</td>\n",
|
143 |
+
" <td>63</td>\n",
|
144 |
+
" <td>22.59</td>\n",
|
145 |
+
" <td>Non-obese</td>\n",
|
146 |
+
" <td>190</td>\n",
|
147 |
+
" <td>251</td>\n",
|
148 |
+
" <td>42</td>\n",
|
149 |
+
" <td>108.0</td>\n",
|
150 |
+
" <td>40.0</td>\n",
|
151 |
+
" <td>healthy</td>\n",
|
152 |
+
" </tr>\n",
|
153 |
+
" <tr>\n",
|
154 |
+
" <th>3</th>\n",
|
155 |
+
" <td>4</td>\n",
|
156 |
+
" <td>Female</td>\n",
|
157 |
+
" <td>48</td>\n",
|
158 |
+
" <td>No</td>\n",
|
159 |
+
" <td>1.61</td>\n",
|
160 |
+
" <td>79</td>\n",
|
161 |
+
" <td>30.48</td>\n",
|
162 |
+
" <td>Obese</td>\n",
|
163 |
+
" <td>228</td>\n",
|
164 |
+
" <td>185</td>\n",
|
165 |
+
" <td>65</td>\n",
|
166 |
+
" <td>134.0</td>\n",
|
167 |
+
" <td>29.0</td>\n",
|
168 |
+
" <td>healthy</td>\n",
|
169 |
+
" </tr>\n",
|
170 |
+
" <tr>\n",
|
171 |
+
" <th>4</th>\n",
|
172 |
+
" <td>5</td>\n",
|
173 |
+
" <td>Male</td>\n",
|
174 |
+
" <td>74</td>\n",
|
175 |
+
" <td>No</td>\n",
|
176 |
+
" <td>1.76</td>\n",
|
177 |
+
" <td>83</td>\n",
|
178 |
+
" <td>26.79</td>\n",
|
179 |
+
" <td>Non-obese</td>\n",
|
180 |
+
" <td>157</td>\n",
|
181 |
+
" <td>113</td>\n",
|
182 |
+
" <td>49</td>\n",
|
183 |
+
" <td>90.0</td>\n",
|
184 |
+
" <td>18.0</td>\n",
|
185 |
+
" <td>healthy</td>\n",
|
186 |
+
" </tr>\n",
|
187 |
+
" </tbody>\n",
|
188 |
+
"</table>\n",
|
189 |
+
"</div>"
|
190 |
+
],
|
191 |
+
"text/plain": [
|
192 |
+
" Patient No. Gender Age Family history Height Weight BMI \\\n",
|
193 |
+
"0 1 Female 65 No 1.64 64 23.80 \n",
|
194 |
+
"1 2 Female 50 Yes 1.70 70 24.22 \n",
|
195 |
+
"2 3 Female 45 No 1.67 63 22.59 \n",
|
196 |
+
"3 4 Female 48 No 1.61 79 30.48 \n",
|
197 |
+
"4 5 Male 74 No 1.76 83 26.79 \n",
|
198 |
+
"\n",
|
199 |
+
" Obese/non obese Cholesterol Triglycerides level HDL level LDL level \\\n",
|
200 |
+
"0 Non-obese 145 119 60 66.0 \n",
|
201 |
+
"1 Non-obese 220 107 69 134.0 \n",
|
202 |
+
"2 Non-obese 190 251 42 108.0 \n",
|
203 |
+
"3 Obese 228 185 65 134.0 \n",
|
204 |
+
"4 Non-obese 157 113 49 90.0 \n",
|
205 |
+
"\n",
|
206 |
+
" VLDL level Health_status \n",
|
207 |
+
"0 19.0 healthy \n",
|
208 |
+
"1 17.0 healthy \n",
|
209 |
+
"2 40.0 healthy \n",
|
210 |
+
"3 29.0 healthy \n",
|
211 |
+
"4 18.0 healthy "
|
212 |
+
]
|
213 |
+
},
|
214 |
+
"execution_count": 253,
|
215 |
+
"metadata": {},
|
216 |
+
"output_type": "execute_result"
|
217 |
+
}
|
218 |
+
],
|
219 |
+
"source": [
|
220 |
+
"# Load dataset\n",
|
221 |
+
"data = pd.read_excel(r'colelithiasis_dataset.xlsx')\n",
|
222 |
+
"data.head()"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"cell_type": "code",
|
227 |
+
"execution_count": 9,
|
228 |
+
"id": "7edf91ac",
|
229 |
+
"metadata": {},
|
230 |
+
"outputs": [
|
231 |
+
{
|
232 |
+
"name": "stdout",
|
233 |
+
"output_type": "stream",
|
234 |
+
"text": [
|
235 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
236 |
+
"RangeIndex: 100 entries, 0 to 99\n",
|
237 |
+
"Data columns (total 14 columns):\n",
|
238 |
+
" # Column Non-Null Count Dtype \n",
|
239 |
+
"--- ------ -------------- ----- \n",
|
240 |
+
" 0 Patient No. 100 non-null int64 \n",
|
241 |
+
" 1 Gender 100 non-null object \n",
|
242 |
+
" 2 Age 100 non-null int64 \n",
|
243 |
+
" 3 Family history 100 non-null object \n",
|
244 |
+
" 4 Height 100 non-null float64\n",
|
245 |
+
" 5 Weight 100 non-null int64 \n",
|
246 |
+
" 6 BMI 100 non-null float64\n",
|
247 |
+
" 7 Obese/non obese 100 non-null object \n",
|
248 |
+
" 8 Cholesterol 100 non-null int64 \n",
|
249 |
+
" 9 Triglycerides level 100 non-null int64 \n",
|
250 |
+
" 10 HDL level 100 non-null int64 \n",
|
251 |
+
" 11 LDL level 100 non-null float64\n",
|
252 |
+
" 12 VLDL level 100 non-null float64\n",
|
253 |
+
" 13 Health_status 100 non-null object \n",
|
254 |
+
"dtypes: float64(4), int64(6), object(4)\n",
|
255 |
+
"memory usage: 11.1+ KB\n"
|
256 |
+
]
|
257 |
+
}
|
258 |
+
],
|
259 |
+
"source": [
|
260 |
+
"data.info()"
|
261 |
+
]
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"cell_type": "code",
|
265 |
+
"execution_count": 10,
|
266 |
+
"id": "aae142a0",
|
267 |
+
"metadata": {},
|
268 |
+
"outputs": [
|
269 |
+
{
|
270 |
+
"data": {
|
271 |
+
"text/plain": [
|
272 |
+
"Health_status\n",
|
273 |
+
"patient 60\n",
|
274 |
+
"healthy 40\n",
|
275 |
+
"Name: count, dtype: int64"
|
276 |
+
]
|
277 |
+
},
|
278 |
+
"execution_count": 10,
|
279 |
+
"metadata": {},
|
280 |
+
"output_type": "execute_result"
|
281 |
+
}
|
282 |
+
],
|
283 |
+
"source": [
|
284 |
+
"data['Health_status'].value_counts()"
|
285 |
+
]
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"cell_type": "code",
|
289 |
+
"execution_count": 11,
|
290 |
+
"id": "aafe0526",
|
291 |
+
"metadata": {},
|
292 |
+
"outputs": [],
|
293 |
+
"source": [
|
294 |
+
"# Drop unnecessary columns (e.g., Patient No.)\n",
|
295 |
+
"data = data.drop(columns=['Patient No.'])"
|
296 |
+
]
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"cell_type": "markdown",
|
300 |
+
"id": "c7907326",
|
301 |
+
"metadata": {},
|
302 |
+
"source": [
|
303 |
+
"### 3. Feature Encoding"
|
304 |
+
]
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"cell_type": "code",
|
308 |
+
"execution_count": 12,
|
309 |
+
"id": "7f22b9a6",
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [],
|
312 |
+
"source": [
|
313 |
+
"# Encode categorical variables\n",
|
314 |
+
"le_health_status = LabelEncoder()\n",
|
315 |
+
"data['Health_status'] = le_health_status.fit_transform(data['Health_status']) # 0 for healthy, 1 for patient\n",
|
316 |
+
"le_gender = LabelEncoder()\n",
|
317 |
+
"data['Gender'] = le_gender.fit_transform(data['Gender']) # 0 for Female, 1 for Male\n",
|
318 |
+
"le_family_history = LabelEncoder()\n",
|
319 |
+
"data['Family history'] = le_family_history.fit_transform(data['Family history']) # 0 for No, 1 for Yes\n",
|
320 |
+
"le_obese = LabelEncoder()\n",
|
321 |
+
"data['Obese/non obese'] = le_obese.fit_transform(data['Obese/non obese']) # 0 for Non-obese, 1 for Obese"
|
322 |
+
]
|
323 |
+
},
|
324 |
+
{
|
325 |
+
"cell_type": "markdown",
|
326 |
+
"id": "bcf93f5f",
|
327 |
+
"metadata": {},
|
328 |
+
"source": [
|
329 |
+
"### 4. Split features and target"
|
330 |
+
]
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"cell_type": "code",
|
334 |
+
"execution_count": 13,
|
335 |
+
"id": "eab4be22",
|
336 |
+
"metadata": {},
|
337 |
+
"outputs": [],
|
338 |
+
"source": [
|
339 |
+
"# Features and target\n",
|
340 |
+
"X = data.drop(columns=['Health_status'])\n",
|
341 |
+
"y = data['Health_status']\n"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"cell_type": "markdown",
|
346 |
+
"id": "c5ed059c",
|
347 |
+
"metadata": {},
|
348 |
+
"source": [
|
349 |
+
"### 5. Split data into training and testing sets"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"cell_type": "code",
|
354 |
+
"execution_count": 244,
|
355 |
+
"id": "cdeca4f2",
|
356 |
+
"metadata": {},
|
357 |
+
"outputs": [],
|
358 |
+
"source": [
|
359 |
+
"# Split the data\n",
|
360 |
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
|
361 |
+
]
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"cell_type": "markdown",
|
365 |
+
"id": "0558f2c8",
|
366 |
+
"metadata": {},
|
367 |
+
"source": [
|
368 |
+
"### 6. Scalling the features"
|
369 |
+
]
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"cell_type": "code",
|
373 |
+
"execution_count": 245,
|
374 |
+
"id": "a037923f",
|
375 |
+
"metadata": {},
|
376 |
+
"outputs": [],
|
377 |
+
"source": [
|
378 |
+
"# Scale the features using StandardScaler\n",
|
379 |
+
"scaler = StandardScaler()\n",
|
380 |
+
"X_train = scaler.fit_transform(X_train)\n",
|
381 |
+
"X_test = scaler.transform(X_test)"
|
382 |
+
]
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"cell_type": "markdown",
|
386 |
+
"id": "d934c22c",
|
387 |
+
"metadata": {},
|
388 |
+
"source": [
|
389 |
+
"## Training of classifiers"
|
390 |
+
]
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"cell_type": "markdown",
|
394 |
+
"id": "c7fd1e71",
|
395 |
+
"metadata": {},
|
396 |
+
"source": [
|
397 |
+
"### 1. Support Vector Machine (SVM)"
|
398 |
+
]
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"cell_type": "code",
|
402 |
+
"execution_count": 246,
|
403 |
+
"id": "aca26b71",
|
404 |
+
"metadata": {},
|
405 |
+
"outputs": [],
|
406 |
+
"source": [
|
407 |
+
"# SVM Classifier\n",
|
408 |
+
"svm_model = SVC(kernel='linear', C=0.9, random_state=42)\n",
|
409 |
+
"svm_model.fit(X_train, y_train)\n",
|
410 |
+
"svm_preds = svm_model.predict(X_test)"
|
411 |
+
]
|
412 |
+
},
|
413 |
+
{
|
414 |
+
"cell_type": "markdown",
|
415 |
+
"id": "e8bfefa5",
|
416 |
+
"metadata": {},
|
417 |
+
"source": [
|
418 |
+
"### 2. Random Forest Classifier"
|
419 |
+
]
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"cell_type": "code",
|
423 |
+
"execution_count": 247,
|
424 |
+
"id": "a5ad9d40",
|
425 |
+
"metadata": {},
|
426 |
+
"outputs": [],
|
427 |
+
"source": [
|
428 |
+
"# Random Forest Classifier\n",
|
429 |
+
"rf_model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)\n",
|
430 |
+
"rf_model.fit(X_train, y_train)\n",
|
431 |
+
"rf_preds = rf_model.predict(X_test)"
|
432 |
+
]
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"cell_type": "markdown",
|
436 |
+
"id": "e3fb4a5a",
|
437 |
+
"metadata": {},
|
438 |
+
"source": [
|
439 |
+
"## Comparison of performance metrics"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"cell_type": "code",
|
444 |
+
"execution_count": 249,
|
445 |
+
"id": "2ddc5b12",
|
446 |
+
"metadata": {},
|
447 |
+
"outputs": [
|
448 |
+
{
|
449 |
+
"name": "stdout",
|
450 |
+
"output_type": "stream",
|
451 |
+
"text": [
|
452 |
+
"\n",
|
453 |
+
"Performance Metrics for SVM\n",
|
454 |
+
"Accuracy: 0.65\n",
|
455 |
+
"\n",
|
456 |
+
"Classification Report:\n",
|
457 |
+
" precision recall f1-score support\n",
|
458 |
+
"\n",
|
459 |
+
" 0 0.55 0.75 0.63 8\n",
|
460 |
+
" 1 0.78 0.58 0.67 12\n",
|
461 |
+
"\n",
|
462 |
+
" accuracy 0.65 20\n",
|
463 |
+
" macro avg 0.66 0.67 0.65 20\n",
|
464 |
+
"weighted avg 0.68 0.65 0.65 20\n",
|
465 |
+
"\n",
|
466 |
+
"\n",
|
467 |
+
"Performance Metrics for Random Forest\n",
|
468 |
+
"Accuracy: 0.7\n",
|
469 |
+
"\n",
|
470 |
+
"Classification Report:\n",
|
471 |
+
" precision recall f1-score support\n",
|
472 |
+
"\n",
|
473 |
+
" 0 0.60 0.75 0.67 8\n",
|
474 |
+
" 1 0.80 0.67 0.73 12\n",
|
475 |
+
"\n",
|
476 |
+
" accuracy 0.70 20\n",
|
477 |
+
" macro avg 0.70 0.71 0.70 20\n",
|
478 |
+
"weighted avg 0.72 0.70 0.70 20\n",
|
479 |
+
"\n"
|
480 |
+
]
|
481 |
+
}
|
482 |
+
],
|
483 |
+
"source": [
|
484 |
+
"def print_metrics(y_true, y_pred, model_name):\n",
|
485 |
+
" print(f\"\\nPerformance Metrics for {model_name}\")\n",
|
486 |
+
" print(\"Accuracy:\", accuracy_score(y_true, y_pred))\n",
|
487 |
+
" print(\"\\nClassification Report:\")\n",
|
488 |
+
" print(classification_report(y_true, y_pred))\n",
|
489 |
+
"\n",
|
490 |
+
"print_metrics(y_test, svm_preds, \"SVM\")\n",
|
491 |
+
"print_metrics(y_test, rf_preds, \"Random Forest\")"
|
492 |
+
]
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"cell_type": "markdown",
|
496 |
+
"id": "38eefbb6",
|
497 |
+
"metadata": {},
|
498 |
+
"source": [
|
499 |
+
"## Visualization of results"
|
500 |
+
]
|
501 |
+
},
|
502 |
+
{
|
503 |
+
"cell_type": "code",
|
504 |
+
"execution_count": 251,
|
505 |
+
"id": "f74a2f74",
|
506 |
+
"metadata": {},
|
507 |
+
"outputs": [
|
508 |
+
{
|
509 |
+
"data": {
|
510 |
+
"image/png": "",
|
511 |
+
"text/plain": [
|
512 |
+
"<Figure size 1200x500 with 4 Axes>"
|
513 |
+
]
|
514 |
+
},
|
515 |
+
"metadata": {},
|
516 |
+
"output_type": "display_data"
|
517 |
+
}
|
518 |
+
],
|
519 |
+
"source": [
|
520 |
+
"# Confusion Matrices\n",
|
521 |
+
"svm_cm = confusion_matrix(y_test, svm_preds)\n",
|
522 |
+
"rf_cm = confusion_matrix(y_test, rf_preds)\n",
|
523 |
+
"\n",
|
524 |
+
"fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
|
525 |
+
"sns.heatmap(svm_cm, annot=True, fmt='d', cmap='Blues', ax=axes[0])\n",
|
526 |
+
"axes[0].set_title('SVM Confusion Matrix')\n",
|
527 |
+
"axes[0].set_xlabel('Predicted')\n",
|
528 |
+
"axes[0].set_ylabel('Actual')\n",
|
529 |
+
"\n",
|
530 |
+
"sns.heatmap(rf_cm, annot=True, fmt='d', cmap='Greens', ax=axes[1])\n",
|
531 |
+
"axes[1].set_title('Random Forest Confusion Matrix')\n",
|
532 |
+
"axes[1].set_xlabel('Predicted')\n",
|
533 |
+
"axes[1].set_ylabel('Actual')\n",
|
534 |
+
"\n",
|
535 |
+
"plt.tight_layout()\n",
|
536 |
+
"plt.show()\n"
|
537 |
+
]
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"cell_type": "code",
|
541 |
+
"execution_count": null,
|
542 |
+
"id": "3a2c284f",
|
543 |
+
"metadata": {},
|
544 |
+
"outputs": [],
|
545 |
+
"source": []
|
546 |
+
}
|
547 |
+
],
|
548 |
+
"metadata": {
|
549 |
+
"kernelspec": {
|
550 |
+
"display_name": "ml_env",
|
551 |
+
"language": "python",
|
552 |
+
"name": "python3"
|
553 |
+
},
|
554 |
+
"language_info": {
|
555 |
+
"codemirror_mode": {
|
556 |
+
"name": "ipython",
|
557 |
+
"version": 3
|
558 |
+
},
|
559 |
+
"file_extension": ".py",
|
560 |
+
"mimetype": "text/x-python",
|
561 |
+
"name": "python",
|
562 |
+
"nbconvert_exporter": "python",
|
563 |
+
"pygments_lexer": "ipython3",
|
564 |
+
"version": "3.13.1"
|
565 |
+
}
|
566 |
+
},
|
567 |
+
"nbformat": 4,
|
568 |
+
"nbformat_minor": 5
|
569 |
+
}
|
installation.ipynb
ADDED
File without changes
|
lr_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3afb60ab51259121d4188b2e23fa705e3f6c100a4965e4769e0812d36677d3b6
|
3 |
+
size 1391
|
main.py
ADDED
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import joblib
|
7 |
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
8 |
+
from plotly import graph_objects as go
|
9 |
+
|
10 |
+
st.set_page_config(layout="wide")
|
11 |
+
|
12 |
+
# Load Dataset
|
13 |
+
def load_data():
|
14 |
+
data = pd.read_excel(r'Model Training/colelithiasis_dataset.xlsx') # Update with your dataset file path
|
15 |
+
data.drop('Patient No.', axis=1, inplace=True)
|
16 |
+
return data
|
17 |
+
|
18 |
+
# Initialize Session State
|
19 |
+
if "data" not in st.session_state:
|
20 |
+
st.session_state.data = load_data()
|
21 |
+
|
22 |
+
def introduction_page():
|
23 |
+
st.title("Introduction")
|
24 |
+
st.markdown("""
|
25 |
+
## Project Overview
|
26 |
+
This project analyzes the Colelithiasis dataset to perform exploratory data analysis (EDA) and prediction using pre-trained machine learning models. The goal is to provide insights into the data and make predictions efficiently.
|
27 |
+
|
28 |
+
## Objectives
|
29 |
+
- Perform EDA to uncover patterns and insights.
|
30 |
+
- Use pre-trained machine learning models for predictions.
|
31 |
+
- Create an interactive Streamlit application.
|
32 |
+
""")
|
33 |
+
|
34 |
+
def stats_page():
|
35 |
+
st.title("Exploratory Data Analysis")
|
36 |
+
|
37 |
+
# Dataset Overview
|
38 |
+
st.subheader("Dataset Overview")
|
39 |
+
st.dataframe(st.session_state.data.head())
|
40 |
+
|
41 |
+
# Summary Statistics
|
42 |
+
st.subheader("Summary Statistics")
|
43 |
+
st.write(st.session_state.data.describe())
|
44 |
+
|
45 |
+
# Correlation Matrix
|
46 |
+
st.subheader("Correlation Analysis")
|
47 |
+
|
48 |
+
# encode the target variable
|
49 |
+
data = st.session_state.data.copy()
|
50 |
+
data['Health_status'].replace({'healthy': 0, 'patient': 1}, inplace=True)
|
51 |
+
|
52 |
+
# apply ordinal encoding to the categorical columns
|
53 |
+
categorical_columns = ['Gender','Family history','Obese/non obese']
|
54 |
+
encoder = joblib.load('Model Training\encoder.pkl')
|
55 |
+
data[categorical_columns] = encoder.transform(data[categorical_columns])
|
56 |
+
|
57 |
+
correlation = data.corr()
|
58 |
+
plt.figure(figsize=(5, 3))
|
59 |
+
# reduce the font size of the heatmap
|
60 |
+
sns.set(font_scale=0.5)
|
61 |
+
sns.heatmap(correlation, annot=True, cmap="coolwarm", fmt=".2f")
|
62 |
+
st.pyplot(plt, use_container_width=False)
|
63 |
+
|
64 |
+
def eda_page():
|
65 |
+
st.title("Exploratory Data Analysis")
|
66 |
+
|
67 |
+
# Interactive Visualizations
|
68 |
+
st.subheader("Visualizations")
|
69 |
+
chart_type = st.selectbox("Choose Chart Type", ["Histogram", "Scatter Plot", "Box Plot"])
|
70 |
+
|
71 |
+
if chart_type == "Histogram":
|
72 |
+
column = st.selectbox("Choose Column for Visualization", st.session_state.data.columns)
|
73 |
+
fig = go.Figure()
|
74 |
+
fig.add_trace(go.Histogram(x=st.session_state.data[column], name=column, marker_color="indigo"))
|
75 |
+
fig.update_layout(
|
76 |
+
title=dict(text="Histogram Analysis", x=0.5, font=dict(size=22)),
|
77 |
+
xaxis_title=column,
|
78 |
+
yaxis_title="Count",
|
79 |
+
legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
|
80 |
+
bargap=0.2,
|
81 |
+
hovermode="x unified",
|
82 |
+
template="plotly_dark"
|
83 |
+
)
|
84 |
+
st.plotly_chart(fig)
|
85 |
+
|
86 |
+
elif chart_type == "Scatter Plot":
|
87 |
+
x_col = st.selectbox("Choose X-axis Column", st.session_state.data.columns)
|
88 |
+
y_col = st.selectbox("Choose Y-axis Column", st.session_state.data.columns)
|
89 |
+
fig = go.Figure()
|
90 |
+
fig.add_trace(go.Scatter(
|
91 |
+
x=st.session_state.data[x_col],
|
92 |
+
y=st.session_state.data[y_col],
|
93 |
+
mode="markers",
|
94 |
+
marker=dict(size=10, color="purple", line=dict(width=1, color="white")),
|
95 |
+
name=f"{y_col} vs {x_col}"
|
96 |
+
))
|
97 |
+
fig.update_layout(
|
98 |
+
title=dict(text="Scatter Plot Analysis", x=0.5, font=dict(size=22)),
|
99 |
+
xaxis_title=x_col,
|
100 |
+
yaxis_title=y_col,
|
101 |
+
legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
|
102 |
+
hovermode="closest",
|
103 |
+
template="plotly_dark"
|
104 |
+
)
|
105 |
+
st.plotly_chart(fig)
|
106 |
+
|
107 |
+
elif chart_type == "Box Plot":
|
108 |
+
column = st.selectbox("Choose Column for Visualization", st.session_state.data.columns)
|
109 |
+
fig = go.Figure()
|
110 |
+
fig.add_trace(go.Box(
|
111 |
+
y=st.session_state.data[column],
|
112 |
+
name=column,
|
113 |
+
boxmean="sd",
|
114 |
+
marker_color="teal"
|
115 |
+
))
|
116 |
+
fig.update_layout(
|
117 |
+
title=dict(text="Boxplot Analysis", x=0.5, font=dict(size=22)),
|
118 |
+
yaxis_title=column,
|
119 |
+
legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
|
120 |
+
hovermode="y",
|
121 |
+
template="plotly_dark"
|
122 |
+
)
|
123 |
+
st.plotly_chart(fig)
|
124 |
+
|
125 |
+
|
126 |
+
def model_page():
|
127 |
+
st.title("Model Evaluation")
|
128 |
+
test_data = pd.read_excel(r'Model Training\test_data.xlsx')
|
129 |
+
|
130 |
+
|
131 |
+
# encode the target variable
|
132 |
+
test_data['Health_status'].replace({'healthy': 0, 'patient': 1}, inplace=True)
|
133 |
+
|
134 |
+
# apply ordinal encoding to the categorical columns
|
135 |
+
categorical_columns = ['Gender','Family history','Obese/non obese']
|
136 |
+
encoder = joblib.load('Model Training\encoder.pkl')
|
137 |
+
|
138 |
+
X = test_data.drop( columns=['Health_status'])
|
139 |
+
X[categorical_columns] = encoder.transform(X[categorical_columns])
|
140 |
+
y = test_data['Health_status']
|
141 |
+
|
142 |
+
# apply standard scalling to numberical features in X
|
143 |
+
numerical_columns = [col_name for col_name in X.columns if col_name not in categorical_columns]
|
144 |
+
scaler = joblib.load('Model Training\scaler.pkl')
|
145 |
+
X[numerical_columns] = scaler.transform(X[numerical_columns])
|
146 |
+
|
147 |
+
# Model Selection
|
148 |
+
st.text("Model Selection")
|
149 |
+
model_choice = st.selectbox("Choose a Pre-trained Model", ["SVM - Linear", "SVM - Polynomial", "SVM - RBF",
|
150 |
+
"Random Forest","Random Forest Boosted", "Logistic Regression", "GDA"])
|
151 |
+
|
152 |
+
# Load pre-trained model
|
153 |
+
model = None
|
154 |
+
if model_choice == "SVM - Linear":
|
155 |
+
model = joblib.load('Model Training\svm_model_linear.pkl')
|
156 |
+
elif model_choice == "SVM - Polynomial":
|
157 |
+
model = joblib.load('Model Training\svm_model_poly.pkl')
|
158 |
+
elif model_choice == "SVM - RBF":
|
159 |
+
model = joblib.load('Model Training\svm_model_rbf.pkl')
|
160 |
+
elif model_choice == "Random Forest":
|
161 |
+
model = joblib.load('Model Training\rf_model.pkl')
|
162 |
+
elif model_choice == "Random Forest Boosted":
|
163 |
+
model = joblib.load('Model Training\rf_boosted.pkl')
|
164 |
+
elif model_choice == "Logistic Regression":
|
165 |
+
model = joblib.load('Model Training\lr_model.pkl')
|
166 |
+
elif model_choice == "GDA":
|
167 |
+
model = joblib.load('Model Training\gda.pkl')
|
168 |
+
|
169 |
+
|
170 |
+
if model:
|
171 |
+
# Make Predictions
|
172 |
+
y_pred = model.predict(X)
|
173 |
+
col1, col2 = st.columns(2)
|
174 |
+
with col1:
|
175 |
+
st.subheader("### Predictions on the Test Data:")
|
176 |
+
st.dataframe(pd.DataFrame({"Actual": y, "Predicted": y_pred}))
|
177 |
+
|
178 |
+
with col2:
|
179 |
+
st.subheader("Classification Report")
|
180 |
+
report = classification_report(y, y_pred, output_dict=True)
|
181 |
+
report_df = pd.DataFrame(report).transpose().reset_index()
|
182 |
+
report_df.drop('support', axis=1, inplace=True)
|
183 |
+
report_df.set_index(['index'], inplace=True)
|
184 |
+
report_df.rename(index={'0.0': 'Negative', '1.0': 'Positive'}, inplace=True)
|
185 |
+
report_df.iloc[report_df.index.get_loc('accuracy'), 0:2] = ''
|
186 |
+
st.table(report_df)
|
187 |
+
|
188 |
+
st.subheader("Confusion Matrix")
|
189 |
+
conf_matrix = confusion_matrix(y, y_pred)
|
190 |
+
# Generate text annotations for the confusion matrix
|
191 |
+
text_annotations = np.array([[str(value) for value in row] for row in conf_matrix])
|
192 |
+
|
193 |
+
col1, col2 = st.columns(2)
|
194 |
+
with col1:
|
195 |
+
# Create the heatmap using seaborn
|
196 |
+
plt.figure(figsize=(3 , 3))
|
197 |
+
sns.heatmap(conf_matrix, annot=text_annotations, fmt="", cmap="Blues", cbar=False, square=True)
|
198 |
+
plt.xlabel("Predicted")
|
199 |
+
plt.ylabel("Actual")
|
200 |
+
plt.title("Confusion Matrix")
|
201 |
+
st.pyplot(plt)
|
202 |
+
|
203 |
+
|
204 |
+
def prediction_page():
|
205 |
+
st.title("Get Your Diagnosis")
|
206 |
+
st.subheader("Symptoms Entry Form")
|
207 |
+
# Model Selection
|
208 |
+
model_choice = st.selectbox("Choose a Pre-trained Model", ["SVM - Linear", "SVM - Polynomial", "SVM - RBF",
|
209 |
+
"Random Forest","Random Forest Boosted", "Logistic Regression", "GDA"])
|
210 |
+
|
211 |
+
# Load pre-trained model
|
212 |
+
model = None
|
213 |
+
if model_choice == "SVM - Linear":
|
214 |
+
model = joblib.load('Model Training\svm_model_linear.pkl')
|
215 |
+
elif model_choice == "SVM - Polynomial":
|
216 |
+
model = joblib.load('Model Training\svm_model_poly.pkl')
|
217 |
+
elif model_choice == "SVM - RBF":
|
218 |
+
model = joblib.load('Model Training\svm_model_rbf.pkl')
|
219 |
+
elif model_choice == "Random Forest":
|
220 |
+
model = joblib.load('Model Training\rf_model.pkl')
|
221 |
+
elif model_choice == "Random Forest Boosted":
|
222 |
+
model = joblib.load('Model Training\rf_boosted.pkl')
|
223 |
+
elif model_choice == "Logistic Regression":
|
224 |
+
model = joblib.load('Model Training\lr_model.pkl')
|
225 |
+
elif model_choice == "GDA":
|
226 |
+
model = joblib.load('Model Training\gda.pkl')
|
227 |
+
|
228 |
+
with st.form(key="health_data_form"):
|
229 |
+
col1, col2, col3, col4 = st.columns(4)
|
230 |
+
|
231 |
+
with col1:
|
232 |
+
# Categorical features with dropdown selection
|
233 |
+
gender = st.selectbox("Gender", ["Male", "Female"], key="gender")
|
234 |
+
weight = st.number_input("Weight (kg)", min_value=0, step=1, key="weight")
|
235 |
+
cholesterol = st.number_input("Cholesterol (mg/dL)", min_value=0, step=1, key="cholesterol")
|
236 |
+
with col2:
|
237 |
+
family_history = st.selectbox("Family History of Illness", ["Yes", "No"], key="family_history")
|
238 |
+
bmi = st.number_input("BMI", min_value=0.0, step=0.1, key="bmi")
|
239 |
+
triglycerides = st.number_input("Triglycerides Level (mg/dL)", min_value=0, step=1, key="triglycerides")
|
240 |
+
|
241 |
+
with col3:
|
242 |
+
height = st.number_input("Height (cm)", min_value=0.0, step=0.1, key="height")
|
243 |
+
obese_status = st.selectbox("Obese/Non Obese", ["Obese", "Non-Obese"], key="obese_status")
|
244 |
+
ldl = st.number_input("LDL Level (mg/dL)", min_value=0.0, step=0.1, key="ldl")
|
245 |
+
|
246 |
+
with col4:
|
247 |
+
vldl = st.number_input("VLDL Level (mg/dL)", min_value=0.0, step=0.1, key="vldl")
|
248 |
+
|
249 |
+
|
250 |
+
|
251 |
+
# Submit button
|
252 |
+
submit_button = st.form_submit_button(label="Submit" )
|
253 |
+
|
254 |
+
if submit_button:
|
255 |
+
# Create a DataFrame directly with the user input data
|
256 |
+
data = pd.DataFrame({
|
257 |
+
"Gender": [gender],
|
258 |
+
"Family history": [family_history],
|
259 |
+
"Height": [height],
|
260 |
+
"Weight": [weight],
|
261 |
+
"BMI": [bmi],
|
262 |
+
"Obese/non obese": [obese_status],
|
263 |
+
"Cholesterol": [cholesterol],
|
264 |
+
"Triglycerides": [triglycerides],
|
265 |
+
"LDL level": [ldl],
|
266 |
+
"VLDL level": [vldl]
|
267 |
+
})
|
268 |
+
|
269 |
+
|
270 |
+
columns = ['Gender', 'Family history', 'Height', 'Weight', 'BMI', 'Obese/non obese', 'Cholesterol', 'Triglycerides level', 'LDL level', 'VLDL level']
|
271 |
+
data = data.reindex(columns=columns, fill_value=0)
|
272 |
+
|
273 |
+
categorical_columns = ['Gender','Family history','Obese/non obese']
|
274 |
+
numerical_columns = [col_name for col_name in data.columns if col_name not in categorical_columns]
|
275 |
+
# Encoding categorical data
|
276 |
+
encoder = joblib.load('Model Training\encoder.pkl')
|
277 |
+
data[categorical_columns] = encoder.transform(data[categorical_columns])
|
278 |
+
|
279 |
+
# Scaling the numeric features
|
280 |
+
scaler = joblib.load('Model Training\scaler.pkl')
|
281 |
+
data[numerical_columns] = scaler.transform(data[numerical_columns])
|
282 |
+
|
283 |
+
|
284 |
+
|
285 |
+
prediction = int(model.predict(data)[0])
|
286 |
+
st.write(f"### Predicted Diagnosis: {'Positive' if prediction == 1 else 'Negative'}")
|
287 |
+
|
288 |
+
|
289 |
+
def conclusion_page():
|
290 |
+
st.title("Conclusion")
|
291 |
+
st.markdown("""
|
292 |
+
## Key Takeaways
|
293 |
+
- Comprehensive EDA provides actionable insights into the data.
|
294 |
+
- Pre-trained machine learning models allow efficient predictions.
|
295 |
+
- The interactive app makes the analysis accessible and engaging.
|
296 |
+
|
297 |
+
Thank you for exploring this project!
|
298 |
+
""")
|
299 |
+
|
300 |
+
# Sidebar Navigation Menu with radio buttons for page selection
|
301 |
+
page = st.sidebar.radio("Navigation Menu", ["Introduction","Descriptive Statistics", "Data Analytics", "Model Evaluation", "Get Your Diagnosis", "Conclusion"])
|
302 |
+
|
303 |
+
if page == "Introduction":
|
304 |
+
introduction_page()
|
305 |
+
elif page == "Descriptive Statistics":
|
306 |
+
stats_page()
|
307 |
+
elif page == "Data Analytics":
|
308 |
+
eda_page()
|
309 |
+
elif page == "Model Evaluation":
|
310 |
+
model_page()
|
311 |
+
elif page == "Get Your Diagnosis":
|
312 |
+
prediction_page()
|
313 |
+
elif page == "Conclusion":
|
314 |
+
conclusion_page()
|
requirements.txt
ADDED
Binary file (2.01 kB). View file
|
|
rf_boosted.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bcff453f23629ac8c235105320385407489a939858ec303db312ebb934f704e
|
3 |
+
size 179112
|
rf_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:994d9d2dbee5adf22ee6434a0a778a0dae2f37c4883e414948a9b3fed3f53482
|
3 |
+
size 562377
|
scaler.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:125d5da89197d1212608fa958179f4601f9e98f7efe03640a72cbbd33ecc84e1
|
3 |
+
size 1183
|
svm_model_linear.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7636d9a64fcdc959b7d3d614e7d2567c4da70539e6147558dbedf8159b0ef637
|
3 |
+
size 9611
|
svm_model_poly.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dcb91962b964f4d63a0acefbe2842d3d72af04c54e2c8a9341bea9688b9e027
|
3 |
+
size 8203
|
svm_model_rbf.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddd0af20c80ecad7f8770169f9c8ba98aecf7271b20eddd5ca5d838a04edfecc
|
3 |
+
size 9019
|
test_data.xlsx
ADDED
Binary file (6.5 kB). View file
|
|
training.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|