File size: 6,782 Bytes
bdb54f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd3cc45
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
import joblib
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import gradio as gr

# Load models and preprocessor
model_dir = 'models'
data_dir = 'datasets'

preprocessor_path = os.path.join(model_dir, 'churn_preprocessor.joblib')
loaded_preprocessor = joblib.load(preprocessor_path)

model_names = [
    'Ada Boost Classifier',
    'LGBM Classifier', 
    'LogisticRegression',
    'XGBoost Classifier', 
]
model_paths = {name: os.path.join(model_dir, f"{name.replace(' ', '')}.joblib") for name in model_names}

# Load models safely
models = {}
for name, path in model_paths.items():
    try:
        models[name] = joblib.load(path)
    except Exception as e:
        print(f"Error loading model {name} from {path}: {str(e)}")

# Load dataset
data_path = os.path.join(data_dir, 'cleaned_IT_customer_churn.csv')
df = pd.read_csv(data_path)

# Prepare features and target
X = df.drop(columns=['Churn'])
y = df['Churn']

# Predefined input choices
input_choices = {
    'gender': ['Female', 'Male'],
    'internet_service': ['DSL', 'Fiber optic', 'No'],
    'contract': ['Month-to-month', 'One year', 'Two year'],
    'payment_method': ['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'],
    'others' : ['No', 'Yes']
}

# Pre-computed statistics for default values
stats = df[['tenure', 'MonthlyCharges', 'TotalCharges']].agg(['mean', 'max']).reset_index()
means = stats.loc[0]
maxs = stats.loc[1]

# Metrics calculation function
def calculate_metrics(y_true, y_pred):
    return {
        'Accuracy': accuracy_score(y_true, y_pred) * 100,
        'Recall': recall_score(y_true, y_pred) * 100,
        'F1 Score': f1_score(y_true, y_pred) * 100,
        'Precision': precision_score(y_true, y_pred) * 100,
    }

# Prediction and metrics evaluation function
def load_and_predict(
        gender, internet_service, contract, payment_method, tenure, monthly_charges, total_charges, 
        senior_citizen, partner, dependents, phone_service, multiple_lines, online_security, online_backup, 
        device_protection, tech_support, streaming_tv, streaming_movies, paperless_billing):
    
    # Ensure inputs are not None
    try:
        sample = {
            'gender': int(gender == 'Male'),
            'SeniorCitizen': int(senior_citizen == 'Yes'),
            'Partner': int(partner == 'Yes'),
            'Dependents': int(dependents == 'Yes'),
            'tenure': int(tenure),
            'PhoneService': int(phone_service == 'Yes'),
            'MultipleLines': int(multiple_lines == 'Yes'),
            'InternetService': str(internet_service),
            'OnlineSecurity': int(online_security == 'Yes'),
            'OnlineBackup': int(online_backup == 'Yes'),
            'DeviceProtection': int(device_protection == 'Yes'),
            'TechSupport': int(tech_support == 'Yes'),
            'StreamingTV': int(streaming_tv == 'Yes'),
            'StreamingMovies': int(streaming_movies == 'Yes'),
            'Contract': str(contract),
            'PaperlessBilling': int(paperless_billing == 'Yes'),
            'PaymentMethod': str(payment_method),
            'MonthlyCharges': float(monthly_charges),
            'TotalCharges': float(total_charges)
        }
        
        sample_df = pd.DataFrame([sample])
        sample_trans = loaded_preprocessor.transform(sample_df)
        X_trans = loaded_preprocessor.transform(X)

        # Using SMOTE to handle class imbalance
        X_resampled, y_resampled = SMOTE(random_state=42).fit_resample(X_trans, y)

        results = []
        for name, model in models.items():
            churn_pred = model.predict(sample_trans)
            y_resampled_pred = model.predict(X_resampled)
            metrics = calculate_metrics(y_resampled, y_resampled_pred)

            results.append({
                'Model': name,
                'Predicted Churn': 'Yes' if churn_pred[0] == 1 else 'No',
                **metrics,
            })

        return pd.DataFrame(results).sort_values(by='Accuracy', ascending=False).reset_index(drop=True)

    except Exception as e:
        return f"An error occurred during model loading or prediction: {str(e)}"
    
# Gradio Interface setup
input_components = [
    gr.Radio(label="Gender", choices=input_choices['gender'], value=input_choices['gender'][0]),
    gr.Dropdown(label="Internet Service", choices=input_choices['internet_service'], value=input_choices['internet_service'][0]),
    gr.Dropdown(label="Contract", choices=input_choices['contract'], value=input_choices['contract'][0]),
    gr.Dropdown(label="Payment Method", choices=input_choices['payment_method'], value=input_choices['payment_method'][0]),
    gr.Slider(label="Tenure (Months)", minimum=0, maximum=int(maxs['tenure'] * 1.5), value=int(means['tenure'])),
    gr.Number(label="Monthly Charges", minimum=0.0, maximum=float(maxs['MonthlyCharges'] * 1.5), value=float(means['MonthlyCharges'])),
    gr.Number(label="Total Charges", minimum=0.0, maximum=float(maxs['TotalCharges'] * 1.5), value=float(means['TotalCharges'])),
    gr.Radio(label="Senior Citizen", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Partner", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Dependents", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Phone Service", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Multiple Lines", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Online Security", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Online Backup", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Device Protection", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Tech Support", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Streaming TV", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Streaming Movies", choices=input_choices['others'], value=input_choices['others'][0]),
    gr.Radio(label="Paperless Billing", choices=input_choices['others'], value=input_choices['others'][0]),
]

output_component = gr.DataFrame()

# Launching the Gradio Interface
gr.Interface(
    fn=load_and_predict, 
    inputs=input_components,
    outputs=output_component,
    title="♻️ Customer Churn Prediction",
    description="Enter the following information to predict customer churn.",
    flagging_mode="never"  # Replacing allow_flagging with flagging_mode
).launch()