ranimeree's picture
Update app.py
a9e59c6 verified
import gradio as gr
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
import sklearn
import os
print(f"Prediction environment scikit-learn version: {sklearn.__version__}")
def decode_file(file_path):
with open(file_path, 'rb') as file:
obj = pickle.load(file)
return obj
# Load the model without try/except
model = decode_file('./model.pkl')
def preprocess_input(data_dict):
"""Preprocess input data to match the training format"""
# First create DataFrame with original columns
df = pd.DataFrame([data_dict])
# Store original features before transformation
original_df = df.copy()
# Numeric features
numeric_features = ['age', 'avg_glucose_level', 'bmi']
# Scale numeric features and add prefix
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[numeric_features])
for i, feat in enumerate(numeric_features):
df[f'num__{feat}'] = scaled_features[:, i]
# Create categorical features
# Gender
df['cat__gender_Male'] = (df['gender'] == 'Male').astype(float)
df['cat__gender_Other'] = 0.0
# Hypertension
df['cat__hypertension_1'] = df['hypertension'].astype(float)
# Heart Disease
df['cat__heart_disease_1'] = df['heart_disease'].astype(float)
# Ever Married
df['cat__ever_married_Yes'] = (df['ever_married'] == 'Yes').astype(float)
# Work Type
df['cat__work_type_Never_worked'] = (df['work_type'] == 'Never_worked').astype(float)
df['cat__work_type_Private'] = (df['work_type'] == 'Private').astype(float)
df['cat__work_type_Self-employed'] = (df['work_type'] == 'Self-employed').astype(float)
df['cat__work_type_children'] = (df['work_type'] == 'children').astype(float)
# Residence Type
df['cat__Residence_type_Urban'] = (df['Residence_type'] == 'Urban').astype(float)
# Smoking Status
df['cat__smoking_status_formerly smoked'] = (df['smoking_status'] == 'formerly smoked').astype(float)
df['cat__smoking_status_never smoked'] = (df['smoking_status'] == 'never smoked').astype(float)
df['cat__smoking_status_smokes'] = (df['smoking_status'] == 'smokes').astype(float)
# Select only the transformed columns in the correct order
feature_columns = [
'num__age', 'num__avg_glucose_level', 'num__bmi',
'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
'cat__heart_disease_1', 'cat__ever_married_Yes',
'cat__work_type_Never_worked', 'cat__work_type_Private',
'cat__work_type_Self-employed', 'cat__work_type_children',
'cat__Residence_type_Urban', 'cat__smoking_status_formerly smoked',
'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
]
return df[feature_columns]
def predict(gender, age, hypertension, ever_married, work_type, heart_disease,
avg_glucose_level, bmi, smoking_status, Residence_type):
"""Make prediction using the loaded model"""
if model is None:
return "Error: Model not loaded"
# Create input dictionary
input_data = {
'gender': gender,
'age': float(age), # Ensure numeric
'hypertension': 1 if hypertension == 'Yes' else 0,
'heart_disease': 1 if heart_disease == 'Yes' else 0,
'ever_married': ever_married,
'work_type': work_type,
'Residence_type': Residence_type,
'avg_glucose_level': float(avg_glucose_level), # Ensure numeric
'bmi': float(bmi), # Ensure numeric
'smoking_status': smoking_status
}
# Preprocess the input
processed_input = preprocess_input(input_data)
# Use the loaded model
try:
prediction = model.predict_proba(processed_input)[0][1]
return f"The probability of stroke is {prediction:.2%}"
except Exception as e:
return f"Error making prediction: {str(e)}, model is not valid" # updated here
# Create the Gradio interface
iface = gr.Interface(
fn=predict,
inputs=[
gr.Radio(choices=['Female', 'Male'], label="Gender"),
gr.Slider(minimum=0, maximum=100, label="Age"),
gr.Radio(choices=['Yes', 'No'], label="Hypertension"),
gr.Radio(choices=['Yes', 'No'], label="Ever Married"),
gr.Radio(choices=['Private', 'Self-employed', 'Govt_job', 'children', 'Never_worked'], label="Work Type"),
gr.Radio(choices=['Yes', 'No'], label="Heart Disease"),
gr.Number(label="Average Glucose Level"),
gr.Slider(minimum=10, maximum=50, label="BMI"),
gr.Radio(choices=['formerly smoked', 'never smoked', 'smokes', 'Unknown'], label="Smoking Status"),
gr.Radio(choices=['Urban', 'Rural'], label="Residence Type")
],
outputs='text',
title='Stroke Probability Predictor',
description='Predicts the probability of having a stroke based on input features.'
)
if __name__ == "__main__":
iface.launch()