3v324v23's picture
changes
b797e49
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
st.title("๐Ÿฉบ Diabetes Prediction App")
# Load dataset
@st.cache_data
def load_data():
file_path = "diabetes_prediction_dataset.csv"
df = pd.read_csv(file_path)
return df
df = load_data()
# Encode categorical features
label_encoders = {}
for col in ["gender", "smoking_history"]:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le
# Convert binary features (0,1) to "Yes" and "No" for display
binary_columns = ["hypertension", "heart_disease", "diabetes"]
df_display = df.copy() # Keep a copy for display
for col in binary_columns:
df_display[col] = df_display[col].map({0: "No", 1: "Yes"})
# Splitting dataset
X = df.drop(columns=["diabetes"])
y = df["diabetes"] # Keep original 0/1 format
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardizing data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
# Tabs
tab1, tab2, tab3 = st.tabs(["๐Ÿ“„ Dataset Preview", "๐Ÿ“ˆ Model Performance", "๐Ÿฉบ Prediction"])
# 1๏ธโƒฃ **Tab 1: Dataset Preview**
with tab1:
st.subheader("๐Ÿ“„ Complete Dataset Preview")
st.write(df_display) # Show dataset with Yes/No for better readability
st.subheader("๐Ÿ“Š Correlation Heatmap")
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f")
st.pyplot(plt)
# 2๏ธโƒฃ **Tab 2: Model Performance**
with tab2:
st.subheader("๐Ÿ“ˆ Model Performance")
# Evaluate model
y_pred = rf.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
st.write(f"### โšก Random Forest Accuracy: **{accuracy:.2f}**")
# Confusion Matrix
st.write("### ๐Ÿ“Š Confusion Matrix")
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No Diabetes", "Diabetes"], yticklabels=["No Diabetes", "Diabetes"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
st.pyplot(plt)
# ROC Curve
st.write("### ๐Ÿ“‰ ROC Curve")
fpr, tpr, _ = roc_curve(y_test, rf.predict_proba(X_test_scaled)[:,1])
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
st.pyplot(plt)
# 3๏ธโƒฃ **Tab 3: Prediction**
with tab3:
st.subheader("๐Ÿฉบ Make a Prediction")
# User inputs
user_name = st.text_input("Patient Name", value="John Doe")
user_gender = st.selectbox("Gender", label_encoders["gender"].classes_, key="gender_input")
user_smoking = st.selectbox("Smoking History", label_encoders["smoking_history"].classes_, key="smoking_input")
# Convert categorical inputs using label encoders
user_gender_encoded = label_encoders["gender"].transform([user_gender])[0]
user_smoking_encoded = label_encoders["smoking_history"].transform([user_smoking])[0]
# User inputs numerical features
user_data = [user_gender_encoded, user_smoking_encoded]
for col in ["age", "bmi", "HbA1c_level", "blood_glucose_level"]:
user_data.append(st.number_input(f"Enter {col}", float(df[col].min()), float(df[col].max()), float(df[col].mean())))
# User inputs binary features
user_binary_data = {}
for col in ["hypertension", "heart_disease"]:
user_binary_data[col] = st.radio(f"{col.replace('_', ' ').title()} (Yes/No)", ["No", "Yes"])
# Convert "Yes"/"No" to numerical (0 or 1) before prediction
for col in ["hypertension", "heart_disease"]:
user_data.append(1 if user_binary_data[col] == "Yes" else 0)
# Convert input into array
user_data = np.array([user_data]).reshape(1, -1)
# Predict button
if st.button("๐Ÿ”ฎ Predict"):
user_data_scaled = scaler.transform(user_data)
# Prediction
prediction = rf.predict(user_data_scaled)
probability = rf.predict_proba(user_data_scaled)[:, 1][0]
# Display result with patient name
st.subheader(f"๐Ÿค– Prediction for {user_name}")
if prediction[0] == 1:
st.error(f"๐Ÿšจ **{user_name} is likely to have diabetes.** (Probability: {probability:.2f})")
else:
st.success(f"โœ… **{user_name} is not likely to have diabetes.** (Probability: {probability:.2f})")