File size: 7,028 Bytes
bd3cc45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import os
import joblib
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import gradio as gr
# Load models and preprocessor
model_dir = 'models'
data_dir = 'datasets'
preprocessor_path = os.path.join(model_dir, 'churn_preprocessor.joblib')
loaded_preprocessor = joblib.load(preprocessor_path)
model_names = [
'Ada Boost Classifier',
'Extra Trees Classifier',
'Gradient Boosting Classifier',
'LGBM Classifier',
'LogisticRegression',
'RandomForestClassifier'
'XGBoost Classifier',
]
model_paths = {name: os.path.join(model_dir, f"{name.replace(' ', '')}.joblib") for name in model_names}
# Load models safely
models = {}
for name, path in model_paths.items():
try:
models[name] = joblib.load(path)
except Exception as e:
print(f"Error loading model {name} from {path}: {str(e)}")
# Load dataset
data_path = os.path.join(data_dir, 'cleaned_IT_customer_churn.csv')
df = pd.read_csv(data_path)
# Prepare features and target
X = df.drop(columns=['Churn'])
y = df['Churn']
# Predefined input choices
input_choices = {
'gender': ['Female', 'Male'],
'internet_service': ['DSL', 'Fiber optic', 'No'],
'contract': ['Month-to-month', 'One year', 'Two year'],
'payment_method': ['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'],
'others' : ['No', 'Yes']
}
# Pre-computed statistics for default values
stats = df[['tenure', 'MonthlyCharges', 'TotalCharges']].agg(['mean', 'max']).reset_index()
means = stats.loc[0]
maxs = stats.loc[1]
# Metrics calculation function
def calculate_metrics(y_true, y_pred):
return {
'Accuracy': accuracy_score(y_true, y_pred) * 100,
'Recall': recall_score(y_true, y_pred) * 100,
'F1 Score': f1_score(y_true, y_pred) * 100,
'Precision': precision_score(y_true, y_pred) * 100,
}
# Prediction and metrics evaluation function
def load_and_predict(
gender, internet_service, contract, payment_method, tenure, monthly_charges, total_charges,
senior_citizen, partner, dependents, phone_service, multiple_lines, online_security, online_backup,
device_protection, tech_support, streaming_tv, streaming_movies, paperless_billing):
# Ensure inputs are not None
try:
sample = {
'gender': int(gender == 'Male'),
'SeniorCitizen': int(senior_citizen == 'Yes'),
'Partner': int(partner == 'Yes'),
'Dependents': int(dependents == 'Yes'),
'tenure': int(tenure),
'PhoneService': int(phone_service == 'Yes'),
'MultipleLines': int(multiple_lines == 'Yes'),
'InternetService': str(internet_service),
'OnlineSecurity': int(online_security == 'Yes'),
'OnlineBackup': int(online_backup == 'Yes'),
'DeviceProtection': int(device_protection == 'Yes'),
'TechSupport': int(tech_support == 'Yes'),
'StreamingTV': int(streaming_tv == 'Yes'),
'StreamingMovies': int(streaming_movies == 'Yes'),
'Contract': str(contract),
'PaperlessBilling': int(paperless_billing == 'Yes'),
'PaymentMethod': str(payment_method),
'MonthlyCharges': float(monthly_charges),
'TotalCharges': float(total_charges)
}
sample_df = pd.DataFrame([sample])
sample_trans = loaded_preprocessor.transform(sample_df)
X_trans = loaded_preprocessor.transform(X)
# Using SMOTE to handle class imbalance
X_resampled, y_resampled = SMOTE(random_state=42).fit_resample(X_trans, y)
results = []
for name, model in models.items():
churn_pred = model.predict(sample_trans)
y_resampled_pred = model.predict(X_resampled)
metrics = calculate_metrics(y_resampled, y_resampled_pred)
results.append({
'Model': name,
'Predicted Churn': 'Yes' if churn_pred[0] == 1 else 'No',
**metrics,
})
return pd.DataFrame(results).sort_values(by='Accuracy', ascending=False).reset_index(drop=True)
except Exception as e:
return f"An error occurred during model loading or prediction: {str(e)}"
# Gradio Interface setup
input_components = [
gr.Radio(label="Gender", choices=input_choices['gender'], value=input_choices['gender'][0]),
gr.Dropdown(label="Internet Service", choices=input_choices['internet_service'], value=input_choices['internet_service'][0]),
gr.Dropdown(label="Contract", choices=input_choices['contract'], value=input_choices['contract'][0]),
gr.Dropdown(label="Payment Method", choices=input_choices['payment_method'], value=input_choices['payment_method'][0]),
gr.Slider(label="Tenure (Months)", minimum=0, maximum=int(maxs['tenure'] * 1.5), value=int(means['tenure'])),
gr.Number(label="Monthly Charges", minimum=0.0, maximum=float(maxs['MonthlyCharges'] * 1.5), value=float(means['MonthlyCharges'])),
gr.Number(label="Total Charges", minimum=0.0, maximum=float(maxs['TotalCharges'] * 1.5), value=float(means['TotalCharges'])),
gr.Radio(label="Senior Citizen", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Partner", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Dependents", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Phone Service", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Multiple Lines", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Online Security", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Online Backup", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Device Protection", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Tech Support", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Streaming TV", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Streaming Movies", choices=input_choices['others'], value=input_choices['others'][0]),
gr.Radio(label="Paperless Billing", choices=input_choices['others'], value=input_choices['others'][0]),
]
output_component = gr.DataFrame()
# Launching the Gradio Interface
gr.Interface(
fn=load_and_predict,
inputs=input_components,
outputs=output_component,
title="♻️ Customer Churn Prediction",
description="Enter the following information to predict customer churn.",
flagging_mode="never" # Replacing allow_flagging with flagging_mode
).launch() |