import gradio as gr import pickle import pandas as pd import ast import numpy as np import os # Set the option to opt into future behavior pd.set_option('future.no_silent_downcasting', True) # List of options for the dropdown workclass_options = sorted(['State-gov', 'Self-emp-not-inc', 'Private', 'Federal-gov', 'Local-gov', 'Self-emp-inc', 'Without-pay']) education_option = ['Preschool', '1st-4th', '5th-6th', '7th-8th', '9th', '10th', '11th', '12th', 'HS-grad', 'Some-college', 'Assoc-voc', 'Assoc-acdm', 'Bachelors', 'Masters', 'Prof-school', 'Doctorate'] marital_status_option = sorted(['Never-married', 'Married-civ-spouse', 'Divorced', 'Separated', 'Married-AF-spouse', 'Widowed', 'Married-spouse-absent']) occupation_option = sorted(['Adm-clerical', 'Exec-managerial', 'Handlers-cleaners','Prof-specialty', 'Sales', 'Farming-fishing', 'Machine-op-inspct','Other-service', 'Transport-moving', 'Tech-support','Craft-repair', 'Protective-serv', 'Armed-Forces','Priv-house-serv']) relationship_option = sorted(['Not-in-family', 'Husband', 'Wife', 'Own-child', 'Unmarried', 'Other-relative']) race_option = sorted(['White', 'Black', 'Other', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo']) sex_option = sorted(['Male', 'Female']) age = [0, 100] capital_gain = [0, 99999] capital_loss = [0, 4356] hours_per_week = [20, 60] children_count = [0, 15] bmi = [10, 100] region_option = ['southwest', 'southeast', 'northwest', 'northeast'] smoker_option = ['yes', 'no'] # Mapping for education education_mapping = "{'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4, '9th': 5, '10th': 6, '11th': 7, '12th': 8, 'HS-grad': 9, 'Some-college': 10, 'Assoc-voc': 11, 'Assoc-acdm': 12, 'Bachelors': 13, 'Masters': 14, 'Prof-school': 15, 'Doctorate': 16}" education_dict = ast.literal_eval(education_mapping) # List of the columns present in dataframe used to train the model salary_columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'workclass_Local-gov', 'workclass_Private', 'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc', 'workclass_State-gov', 'workclass_Without-pay', 'marital-status_Married-AF-spouse', 'marital-status_Married-civ-spouse', 'marital-status_Married-spouse-absent', 'marital-status_Never-married', 'marital-status_Separated', 'marital-status_Widowed', 'occupation_Armed-Forces', 'occupation_Craft-repair', 'occupation_Exec-managerial', 'occupation_Farming-fishing', 'occupation_Handlers-cleaners', 'occupation_Machine-op-inspct', 'occupation_Other-service', 'occupation_Priv-house-serv', 'occupation_Prof-specialty', 'occupation_Protective-serv', 'occupation_Sales', 'occupation_Tech-support', 'occupation_Transport-moving', 'relationship_Not-in-family', 'relationship_Other-relative', 'relationship_Own-child', 'relationship_Unmarried', 'relationship_Wife', 'race_Asian-Pac-Islander', 'race_Black', 'race_Other', 'race_White'] health_columns = ['age', 'sex', 'bmi', 'children', 'smoker', 'region_northwest', 'region_southeast', 'region_southwest'] # Code for SVM def Salary(model, workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week): # Set the working directory to the script's directory os.chdir(os.path.dirname(os.path.abspath(__file__))) if model == 0: model_used = "SVM" with open('models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f: loaded_model = pickle.load(f) # Loading the scaler and transform the data with open('models/z-score_scaler_svm_salary_classification.pkl', 'rb') as f: scaler = pickle.load(f) elif model == 1: model_used = "Logistic Regression" with open('models/best_lr_Salary_Classification.pkl', 'rb') as f: loaded_model = pickle.load(f) # Loading the scaler and transform the data with open('models/z-score_scaler_lr_salary_classification.pkl', 'rb') as f: scaler = pickle.load(f) elif model == 2: model_used = "Random Forest" # Add Random Forest model new_data = { 'age': age, 'workclass': workclass, 'education': education, 'marital-status': marital_status, 'occupation': occupation, 'relationship': relationship, 'race': race, 'sex': sex, 'capital-gain': capital_gain, 'capital-loss': capital_loss, 'hours-per-week': hours_per_week, } new_data = pd.DataFrame([new_data]) new_data['education'] = new_data['education'].map(education_dict) new_data = new_data.rename(columns={'education': 'education-num'}) # Create an empty DataFrame with these columns formattedDF = pd.DataFrame(columns=salary_columns) # Copying over the continuous columns formattedDF['age'] = new_data['age'] formattedDF['education-num'] = new_data['education-num'] formattedDF['capital-gain'] = new_data['capital-gain'] formattedDF['capital-loss'] = new_data['capital-loss'] formattedDF['hours-per-week'] = new_data['hours-per-week'] formattedDF['workclass_'+new_data['workclass']] = 1 formattedDF['marital-status_'+new_data['marital-status']] = 1 formattedDF['occupation_'+new_data['occupation']] = 1 formattedDF['relationship_'+new_data['relationship']] = 1 formattedDF['race_'+new_data['race']] = 1 formattedDF['sex'] = formattedDF['sex'].apply(lambda x: 1 if x == 'Male' else 0) # Fill remaining columns with 0 formattedDF.fillna(0, inplace=True) formattedDF = formattedDF.astype(int) formattedDF = formattedDF[formattedDF.columns.intersection(salary_columns)] # Assuming 'high_skew_columns' from training is a list of columns with high skewness for column in ['capital-gain', 'capital-loss']: formattedDF[column] = np.log1p(formattedDF[column]) # Apply the scaler to the unseen data continuous_columns = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'] formattedDF[continuous_columns] = scaler.transform(formattedDF[continuous_columns]) # Make predictions with the loaded model prediction = loaded_model.predict(formattedDF) salary_result = '<=50K' if prediction[0] == 0 else '>50K' return f"Predicted using {model_used} Salary Class: {salary_result}" def Health(model, age, sex, bmi, children, smoker, region): # Set the working directory to the script's directory os.chdir(os.path.dirname(os.path.abspath(__file__))) if model == 0: model_used = "SVM" with open('models/best_health_svm_OvM_Charges_Classification.pkl', 'rb') as f: loaded_model = pickle.load(f) # Loading the scaler and transform the data with open('models/z-score_scaler_svm_charges_classification.pkl', 'rb') as f: scaler = pickle.load(f) elif model == 1: model_used = "Logistic Regression" with open('models/best_health_lr_Charges_Classification.pkl', 'rb') as f: loaded_model = pickle.load(f) # Loading the scaler and transform the data with open('models/z-score_scaler_lr_charges_classification.pkl', 'rb') as f: scaler = pickle.load(f) elif model == 2: model_used = "Random Forest" # Add Random Forest model #Inverting the dict to map the 'charges' values back to 'charges' labels inverse_mapping_charges = { 0: 'Very Low (<= 5000)', 1: 'Low (5001 - 10000)', 2: 'Moderate (10001 - 15000)', 3: 'High (15001 - 20000)', 4: 'Very High (> 20001)', } new_data = { 'age': age, 'sex': sex, 'bmi': bmi, 'children': children, 'smoker': smoker, 'region': region, } new_data = pd.DataFrame([new_data]) # Create an empty DataFrame with these columns formattedDF = pd.DataFrame(columns=health_columns) # Copying over the continuous columns formattedDF['age'] = new_data['age'] formattedDF['sex'] = new_data['sex'].apply(lambda x: 1 if x == 'Male' else 0) formattedDF['bmi'] = new_data['bmi'] formattedDF['children'] = new_data['children'] formattedDF['smoker'] = new_data['smoker'].apply(lambda x: 1 if x == 'Yes' else 0) formattedDF['region_'+new_data['region']] = 1 # Fill remaining columns with 0 formattedDF.fillna(0, inplace=True) formattedDF = formattedDF.astype(int) formattedDF = formattedDF[formattedDF.columns.intersection(health_columns)] # Apply the scaler to the unseen data continuous_columns = ['age', 'bmi'] formattedDF[continuous_columns] = scaler.transform(formattedDF[continuous_columns]) # Make predictions with the loaded model prediction = loaded_model.predict(formattedDF)[0] prediction = inverse_mapping_charges[prediction] return f"Predicted using {model_used} Charges Class: {prediction}" # interface one iface1 = gr.Interface( fn=Salary, inputs=[ gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0), gr.Dropdown(choices=workclass_options, label="Workclass"), gr.Dropdown(choices=education_option, label="Education"), gr.Dropdown(choices=marital_status_option, label="Marital Status"), gr.Dropdown(choices=occupation_option, label="Occupation"), gr.Dropdown(choices=relationship_option, label="Relationship"), gr.Dropdown(choices=race_option, label="Race"), gr.Dropdown(choices=sex_option, label="Sex"), gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"), gr.Slider(minimum=capital_gain[0], maximum=capital_gain[1], step=1, label="Capital Gain"), gr.Slider(minimum=capital_loss[0], maximum=capital_loss[1], step=1, label="Capital Loss"), gr.Slider(minimum=hours_per_week[0], maximum=hours_per_week[1], step=1, label="Hours per Week"), ], outputs="text", title="SVM - Salary" ) # interface two iface2 = gr.Interface( fn=Health, inputs=[ gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0), gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"), gr.Dropdown(choices=sex_option, label="Sex"), gr.Slider(minimum=bmi[0], maximum=bmi[1], step=0.1, label="BMI"), gr.Slider(minimum=children_count[0], maximum=children_count[1], step=1, label="Children"), gr.Dropdown(choices=smoker_option, label="Smoker"), gr.Dropdown(choices=region_option, label="Region"), ], outputs="text", title="SVM - Health" ) demo = gr.TabbedInterface([iface1, iface2], ["Salary Prediction", "Health Charges Prediction"]) # Run the interface demo.launch(share=True)