Gangsterbra123 commited on
Commit
11f76af
·
verified ·
1 Parent(s): e477fa5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -81
app.py CHANGED
@@ -3,6 +3,7 @@ import pickle
3
  import pandas as pd
4
  import ast
5
  import numpy as np
 
6
 
7
  # Set the option to opt into future behavior
8
  pd.set_option('future.no_silent_downcasting', True)
@@ -30,7 +31,7 @@ education_mapping = "{'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4,
30
  education_dict = ast.literal_eval(education_mapping)
31
 
32
  # List of the columns present in dataframe used to train the model
33
- columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
34
  'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
35
  'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
36
  'workclass_State-gov', 'workclass_Without-pay',
@@ -45,17 +46,36 @@ columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
45
  'occupation_Sales', 'occupation_Tech-support',
46
  'occupation_Transport-moving', 'relationship_Not-in-family',
47
  'relationship_Other-relative', 'relationship_Own-child',
48
- 'relationship_Unmarried', 'relationship_Wife',
49
- 'race_Asian-Pac-Islander', 'race_Black', 'race_Other', 'race_White']
50
 
51
- # Code for SVM
52
- def SVM_Salary(workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):
53
- with open('../SVM/models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
54
- loaded_model = pickle.load(f)
55
 
56
- # Loading the scaler and transform the data
57
- with open('../SVM/models/z-score_scaler_svm_salary_classification.pkl', 'rb') as f:
58
- scaler = pickle.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  new_data = {
61
  'age': age,
@@ -75,7 +95,7 @@ def SVM_Salary(workclass, education, marital_status, occupation, relationship, r
75
  new_data = new_data.rename(columns={'education': 'education-num'})
76
 
77
  # Create an empty DataFrame with these columns
78
- formattedDF = pd.DataFrame(columns=columns)
79
 
80
  # Copying over the continuous columns
81
  formattedDF['age'] = new_data['age']
@@ -93,7 +113,7 @@ def SVM_Salary(workclass, education, marital_status, occupation, relationship, r
93
  # Fill remaining columns with 0
94
  formattedDF.fillna(0, inplace=True)
95
  formattedDF = formattedDF.astype(int)
96
- formattedDF = formattedDF[formattedDF.columns.intersection(columns)]
97
 
98
  # Assuming 'high_skew_columns' from training is a list of columns with high skewness
99
  for column in ['capital-gain', 'capital-loss']:
@@ -108,15 +128,32 @@ def SVM_Salary(workclass, education, marital_status, occupation, relationship, r
108
 
109
  salary_result = '<=50K' if prediction[0] == 0 else '>50K'
110
 
111
- return "Predicted Salary Class:", salary_result
 
 
112
 
113
- def SVM_Health(age, sex, bmi, children, smoker, region):
114
- with open('models/best_health_svm_OvM_Charges_Classification.pkl', 'rb') as f:
115
- loaded_model = pickle.load(f)
116
 
117
- # Loading the scaler and transform the data
118
- with open('models/z-score_scaler_svm_charges_classification.pkl', 'rb') as f:
119
- scaler = pickle.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  #Inverting the dict to map the 'charges' values back to 'charges' labels
122
  inverse_mapping_charges = {
@@ -139,7 +176,7 @@ def SVM_Health(age, sex, bmi, children, smoker, region):
139
  new_data = pd.DataFrame([new_data])
140
 
141
  # Create an empty DataFrame with these columns
142
- formattedDF = pd.DataFrame(columns=columns)
143
 
144
  # Copying over the continuous columns
145
  formattedDF['age'] = new_data['age']
@@ -147,14 +184,12 @@ def SVM_Health(age, sex, bmi, children, smoker, region):
147
  formattedDF['bmi'] = new_data['bmi']
148
  formattedDF['children'] = new_data['children']
149
  formattedDF['smoker'] = new_data['smoker'].apply(lambda x: 1 if x == 'Yes' else 0)
150
- formattedDF['marital-status_'+new_data['marital-status']] = 1
151
  formattedDF['region_'+new_data['region']] = 1
152
 
153
-
154
  # Fill remaining columns with 0
155
  formattedDF.fillna(0, inplace=True)
156
  formattedDF = formattedDF.astype(int)
157
- formattedDF = formattedDF[formattedDF.columns.intersection(columns)]
158
 
159
  # Apply the scaler to the unseen data
160
  continuous_columns = ['age', 'bmi']
@@ -164,32 +199,13 @@ def SVM_Health(age, sex, bmi, children, smoker, region):
164
  prediction = loaded_model.predict(formattedDF)[0]
165
  prediction = inverse_mapping_charges[prediction]
166
 
167
- return "Predicted Charges Class:", prediction
168
-
169
- # Code for LogisticRegression
170
- def LogisticRegression_Salary(input_image):
171
- # Task 2 logic
172
- return "Task 2 Result"
173
-
174
- # Code for LogisticRegression
175
- def LogisticRegression_Health(input_image):
176
- # Task 2 logic
177
- return "Task 2 Result"
178
-
179
- # Code for
180
- def RandomForests_Salary(input_image):
181
- # Task 2 logic
182
- return "Task 2 Result"
183
-
184
- # Code for
185
- def RandomForests_Health(input_image):
186
- # Task 2 logic
187
- return "Task 2 Result"
188
 
189
  # interface one
190
  iface1 = gr.Interface(
191
- fn=SVM_Salary,
192
  inputs=[
 
193
  gr.Dropdown(choices=workclass_options, label="Workclass"),
194
  gr.Dropdown(choices=education_option, label="Education"),
195
  gr.Dropdown(choices=marital_status_option, label="Marital Status"),
@@ -208,8 +224,9 @@ iface1 = gr.Interface(
208
 
209
  # interface two
210
  iface2 = gr.Interface(
211
- fn=SVM_Health,
212
  inputs=[
 
213
  gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
214
  gr.Dropdown(choices=sex_option, label="Sex"),
215
  gr.Slider(minimum=bmi[0], maximum=bmi[1], step=0.1, label="BMI"),
@@ -221,41 +238,7 @@ iface2 = gr.Interface(
221
  title="SVM - Health"
222
  )
223
 
224
- # interface three
225
- iface3 = gr.Interface(
226
- fn=LogisticRegression_Salary,
227
- inputs="image",
228
- outputs="text",
229
- title="Logistic Regression"
230
- )
231
-
232
- # interface four
233
- iface4 = gr.Interface(
234
- fn=LogisticRegression_Health,
235
- inputs="image",
236
- outputs="text",
237
- title="Logistic Regression"
238
- )
239
-
240
- # interface five
241
- iface5 = gr.Interface(
242
- fn=RandomForests_Salary,
243
- inputs="image",
244
- outputs="text",
245
- title="Random Forests"
246
- )
247
-
248
- # interface six
249
- iface6 = gr.Interface(
250
- fn=RandomForests_Health,
251
- inputs="image",
252
- outputs="text",
253
- title="Random Forests"
254
- )
255
-
256
- demo = gr.TabbedInterface([iface1, iface2, iface3, iface4, iface5, iface6], ["SVM - Jerome Agius", "SVM - Jerome Agius",
257
- "Logistic Regression - Isaac Muscat", "Logistic Regression - Isaac Muscat",
258
- "Random Forests - Kyle Demicoli", "Random Forests - Kyle Demicoli"])
259
 
260
  # Run the interface
261
  demo.launch(share=True)
 
3
  import pandas as pd
4
  import ast
5
  import numpy as np
6
+ import os
7
 
8
  # Set the option to opt into future behavior
9
  pd.set_option('future.no_silent_downcasting', True)
 
31
  education_dict = ast.literal_eval(education_mapping)
32
 
33
  # List of the columns present in dataframe used to train the model
34
+ salary_columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
35
  'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
36
  'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
37
  'workclass_State-gov', 'workclass_Without-pay',
 
46
  'occupation_Sales', 'occupation_Tech-support',
47
  'occupation_Transport-moving', 'relationship_Not-in-family',
48
  'relationship_Other-relative', 'relationship_Own-child',
49
+ 'relationship_Unmarried', 'relationship_Wife', 'race_Asian-Pac-Islander',
50
+ 'race_Black', 'race_Other', 'race_White']
51
 
52
+ health_columns = ['age', 'sex', 'bmi', 'children', 'smoker', 'region_northwest', 'region_southeast', 'region_southwest']
 
 
 
53
 
54
+ # Code for SVM
55
+ def Salary(model, workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):
56
+
57
+ # Set the working directory to the script's directory
58
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
59
+
60
+ if model == 0:
61
+ model_used = "SVM"
62
+ with open('models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
63
+ loaded_model = pickle.load(f)
64
+
65
+ # Loading the scaler and transform the data
66
+ with open('models/z-score_scaler_svm_salary_classification.pkl', 'rb') as f:
67
+ scaler = pickle.load(f)
68
+ elif model == 1:
69
+ model_used = "Logistic Regression"
70
+ with open('models/best_lr_Salary_Classification.pkl', 'rb') as f:
71
+ loaded_model = pickle.load(f)
72
+
73
+ # Loading the scaler and transform the data
74
+ with open('models/z-score_scaler_lr_salary_classification.pkl', 'rb') as f:
75
+ scaler = pickle.load(f)
76
+ elif model == 2:
77
+ model_used = "Random Forest"
78
+ # Add Random Forest model
79
 
80
  new_data = {
81
  'age': age,
 
95
  new_data = new_data.rename(columns={'education': 'education-num'})
96
 
97
  # Create an empty DataFrame with these columns
98
+ formattedDF = pd.DataFrame(columns=salary_columns)
99
 
100
  # Copying over the continuous columns
101
  formattedDF['age'] = new_data['age']
 
113
  # Fill remaining columns with 0
114
  formattedDF.fillna(0, inplace=True)
115
  formattedDF = formattedDF.astype(int)
116
+ formattedDF = formattedDF[formattedDF.columns.intersection(salary_columns)]
117
 
118
  # Assuming 'high_skew_columns' from training is a list of columns with high skewness
119
  for column in ['capital-gain', 'capital-loss']:
 
128
 
129
  salary_result = '<=50K' if prediction[0] == 0 else '>50K'
130
 
131
+ return f"Predicted using {model_used} Salary Class: {salary_result}"
132
+
133
+ def Health(model, age, sex, bmi, children, smoker, region):
134
 
135
+ # Set the working directory to the script's directory
136
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
 
137
 
138
+ if model == 0:
139
+ model_used = "SVM"
140
+ with open('models/best_health_svm_OvM_Charges_Classification.pkl', 'rb') as f:
141
+ loaded_model = pickle.load(f)
142
+
143
+ # Loading the scaler and transform the data
144
+ with open('models/z-score_scaler_svm_charges_classification.pkl', 'rb') as f:
145
+ scaler = pickle.load(f)
146
+ elif model == 1:
147
+ model_used = "Logistic Regression"
148
+ with open('models/best_health_lr_Charges_Classification.pkl', 'rb') as f:
149
+ loaded_model = pickle.load(f)
150
+
151
+ # Loading the scaler and transform the data
152
+ with open('models/z-score_scaler_lr_charges_classification.pkl', 'rb') as f:
153
+ scaler = pickle.load(f)
154
+ elif model == 2:
155
+ model_used = "Random Forest"
156
+ # Add Random Forest model
157
 
158
  #Inverting the dict to map the 'charges' values back to 'charges' labels
159
  inverse_mapping_charges = {
 
176
  new_data = pd.DataFrame([new_data])
177
 
178
  # Create an empty DataFrame with these columns
179
+ formattedDF = pd.DataFrame(columns=health_columns)
180
 
181
  # Copying over the continuous columns
182
  formattedDF['age'] = new_data['age']
 
184
  formattedDF['bmi'] = new_data['bmi']
185
  formattedDF['children'] = new_data['children']
186
  formattedDF['smoker'] = new_data['smoker'].apply(lambda x: 1 if x == 'Yes' else 0)
 
187
  formattedDF['region_'+new_data['region']] = 1
188
 
 
189
  # Fill remaining columns with 0
190
  formattedDF.fillna(0, inplace=True)
191
  formattedDF = formattedDF.astype(int)
192
+ formattedDF = formattedDF[formattedDF.columns.intersection(health_columns)]
193
 
194
  # Apply the scaler to the unseen data
195
  continuous_columns = ['age', 'bmi']
 
199
  prediction = loaded_model.predict(formattedDF)[0]
200
  prediction = inverse_mapping_charges[prediction]
201
 
202
+ return f"Predicted using {model_used} Charges Class: {prediction}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  # interface one
205
  iface1 = gr.Interface(
206
+ fn=Salary,
207
  inputs=[
208
+ gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0),
209
  gr.Dropdown(choices=workclass_options, label="Workclass"),
210
  gr.Dropdown(choices=education_option, label="Education"),
211
  gr.Dropdown(choices=marital_status_option, label="Marital Status"),
 
224
 
225
  # interface two
226
  iface2 = gr.Interface(
227
+ fn=Health,
228
  inputs=[
229
+ gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0),
230
  gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
231
  gr.Dropdown(choices=sex_option, label="Sex"),
232
  gr.Slider(minimum=bmi[0], maximum=bmi[1], step=0.1, label="BMI"),
 
238
  title="SVM - Health"
239
  )
240
 
241
+ demo = gr.TabbedInterface([iface1, iface2], ["Salary Prediction", "Health Charges Prediction"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  # Run the interface
244
  demo.launch(share=True)