feddernico's picture
Upload folder using huggingface_hub
79f6c4b verified
import gradio as gr
import io
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from config import csv_data
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve
# Read the CSV data from the string
df = pd.read_csv(StringIO(csv_data))
# Encode categorical columns
categorical_columns = ['Gender', 'Smoking', 'Alcohol Intake', 'Family History',
'Diabetes', 'Obesity', 'Exercise Induced Angina', 'Chest Pain Type']
from sklearn.linear_model import LogisticRegression
# Select only Age and Cholesterol columns along with the target variable
df = df[['Age', 'Cholesterol', 'Heart Disease']]
# Split the dataset
X = df[['Age', 'Cholesterol']]
y = df['Heart Disease']
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Function to compute precision and recall
def compute_precision_recall(threshold):
y_prob = model.predict_proba(X_test)[:, 1]
y_pred = (y_prob >= threshold).astype(int)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
return precision, recall
def plot_roc_curve(threshold):
# Compute ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
roc_auc = auc(fpr, tpr)
# Create a DataFrame for the ROC curve
df_roc = pd.DataFrame({
'fpr': fpr,
'tpr': tpr,
'threshold': thresholds
})
# Create the plot with Plotly Express
fig = px.line(df_roc, x='fpr', y='tpr', title=f'ROC Curve (AUC = {roc_auc:.2f})', labels={'fpr': 'False Positive Rate', 'tpr': 'True Positive Rate'})
# Add a diagonal line for the random model
fig.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(color='gray', dash='dash'), name='Random Model')
# Add a vertical line for the specified threshold
threshold_index = (np.abs(thresholds - threshold)).argmin()
fig.add_vline(x=fpr[threshold_index], line=dict(color='black', dash='dot'), annotation_text=f'Threshold: {threshold}', annotation_position="bottom right")
return fig
def plot_precision_recall(threshold):
# Use the precision_recall_curve function to compute precision and recall for all possible thresholds
precisions, recalls, thresholds = precision_recall_curve(y_test, model.predict_proba(X_test)[:, 1])
# Create a DataFrame for precision and recall values
df_pr = pd.DataFrame({
'threshold': np.concatenate([[0], thresholds]),
'precision': precisions,
'recall': recalls
})
# Create the plot with Plotly Express
fig = px.line(df_pr, x='threshold', y='precision', title='Precision and Recall at Different Thresholds', labels={'threshold': 'Threshold', 'value': 'Score'})
fig.add_scatter(x=df_pr['threshold'], y=df_pr['recall'], mode='lines', name='Recall')
# Add a vertical line for the specified threshold
fig.add_vline(x=threshold, line=dict(color='black', dash='dot'), annotation_text=f'Threshold: {threshold}', annotation_position="top right")
return fig
def plot_performance(threshold):
return [plot_roc_curve(threshold),
plot_precision_recall(threshold)]
inputs = [gr.Slider(0, 1, step=0.01, label='Threshold')]
outputs = [gr.Plot(), gr.Plot()]
# Create the Gradio interface
iface = gr.Interface(
fn=plot_performance,
inputs=inputs,
outputs=outputs,
live=True
)
# Launch the interface
iface.launch(share=True)