|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import StandardScaler, LabelEncoder |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc |
|
|
|
st.title("๐ฉบ Diabetes Prediction App") |
|
|
|
|
|
@st.cache_data |
|
def load_data(): |
|
file_path = "diabetes_prediction_dataset.csv" |
|
df = pd.read_csv(file_path) |
|
return df |
|
|
|
df = load_data() |
|
|
|
|
|
label_encoders = {} |
|
for col in ["gender", "smoking_history"]: |
|
le = LabelEncoder() |
|
df[col] = le.fit_transform(df[col]) |
|
label_encoders[col] = le |
|
|
|
|
|
binary_columns = ["hypertension", "heart_disease", "diabetes"] |
|
df_display = df.copy() |
|
for col in binary_columns: |
|
df_display[col] = df_display[col].map({0: "No", 1: "Yes"}) |
|
|
|
|
|
X = df.drop(columns=["diabetes"]) |
|
y = df["diabetes"] |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
scaler = StandardScaler() |
|
X_train_scaled = scaler.fit_transform(X_train) |
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
rf = RandomForestClassifier(n_estimators=100, random_state=42) |
|
rf.fit(X_train_scaled, y_train) |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["๐ Dataset Preview", "๐ Model Performance", "๐ฉบ Prediction"]) |
|
|
|
|
|
with tab1: |
|
st.subheader("๐ Complete Dataset Preview") |
|
st.write(df_display) |
|
|
|
st.subheader("๐ Correlation Heatmap") |
|
plt.figure(figsize=(10,6)) |
|
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f") |
|
st.pyplot(plt) |
|
|
|
|
|
with tab2: |
|
st.subheader("๐ Model Performance") |
|
|
|
|
|
y_pred = rf.predict(X_test_scaled) |
|
accuracy = accuracy_score(y_test, y_pred) |
|
st.write(f"### โก Random Forest Accuracy: **{accuracy:.2f}**") |
|
|
|
|
|
st.write("### ๐ Confusion Matrix") |
|
cm = confusion_matrix(y_test, y_pred) |
|
plt.figure(figsize=(5,4)) |
|
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No Diabetes", "Diabetes"], yticklabels=["No Diabetes", "Diabetes"]) |
|
plt.xlabel("Predicted") |
|
plt.ylabel("Actual") |
|
st.pyplot(plt) |
|
|
|
|
|
st.write("### ๐ ROC Curve") |
|
fpr, tpr, _ = roc_curve(y_test, rf.predict_proba(X_test_scaled)[:,1]) |
|
roc_auc = auc(fpr, tpr) |
|
plt.figure(figsize=(6,4)) |
|
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc)) |
|
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') |
|
plt.xlabel("False Positive Rate") |
|
plt.ylabel("True Positive Rate") |
|
plt.title("Receiver Operating Characteristic (ROC) Curve") |
|
plt.legend(loc="lower right") |
|
st.pyplot(plt) |
|
|
|
|
|
with tab3: |
|
st.subheader("๐ฉบ Make a Prediction") |
|
|
|
|
|
user_name = st.text_input("Patient Name", value="John Doe") |
|
user_gender = st.selectbox("Gender", label_encoders["gender"].classes_, key="gender_input") |
|
user_smoking = st.selectbox("Smoking History", label_encoders["smoking_history"].classes_, key="smoking_input") |
|
|
|
|
|
user_gender_encoded = label_encoders["gender"].transform([user_gender])[0] |
|
user_smoking_encoded = label_encoders["smoking_history"].transform([user_smoking])[0] |
|
|
|
|
|
user_data = [user_gender_encoded, user_smoking_encoded] |
|
for col in ["age", "bmi", "HbA1c_level", "blood_glucose_level"]: |
|
user_data.append(st.number_input(f"Enter {col}", float(df[col].min()), float(df[col].max()), float(df[col].mean()))) |
|
|
|
|
|
user_binary_data = {} |
|
for col in ["hypertension", "heart_disease"]: |
|
user_binary_data[col] = st.radio(f"{col.replace('_', ' ').title()} (Yes/No)", ["No", "Yes"]) |
|
|
|
|
|
for col in ["hypertension", "heart_disease"]: |
|
user_data.append(1 if user_binary_data[col] == "Yes" else 0) |
|
|
|
|
|
user_data = np.array([user_data]).reshape(1, -1) |
|
|
|
|
|
if st.button("๐ฎ Predict"): |
|
user_data_scaled = scaler.transform(user_data) |
|
|
|
|
|
prediction = rf.predict(user_data_scaled) |
|
probability = rf.predict_proba(user_data_scaled)[:, 1][0] |
|
|
|
|
|
st.subheader(f"๐ค Prediction for {user_name}") |
|
if prediction[0] == 1: |
|
st.error(f"๐จ **{user_name} is likely to have diabetes.** (Probability: {probability:.2f})") |
|
else: |
|
st.success(f"โ
**{user_name} is not likely to have diabetes.** (Probability: {probability:.2f})") |
|
|