import os
HF_TOKEN = os.getenv("HF_TOKEN")
import numpy as np
import pandas as pd
import sklearn
import sklearn.metrics
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve, auc, precision_score, recall_score, f1_score, classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve
from math import sqrt
from scipy import stats as st
from random import randrange
from matplotlib import pyplot as plt
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from catboost import Pool
from sklearn.ensemble import RandomForestClassifier
import optuna
from optuna.samplers import TPESampler
import shap
import gradio as gr
import random
import re
import textwrap
from datasets import load_dataset
#Read data.
x1 = load_dataset("mertkarabacak/NTDB-Epidural", data_files="mortality_data.csv", use_auth_token = HF_TOKEN)
x1 = pd.DataFrame(x1['train'])
variables1 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'PreHospital_Cardiac_Arrest', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Current_Smoker', 'Comorbid_Condition__Alcohol_Use_Disorder', 'Comorbid_Condition__Substance_Abuse_Disorder', 'Comorbid_Condition__Diabetes_Mellitus', 'Comorbid_Condition__Hypertension', 'Comorbid_Condition__Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Comorbid_Condition__Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Comorbid_Condition__Peripheral_Arterial_Disease', 'Comorbid_Condition__Chronic_Obstructive_Pulmonary_Disease', 'Comorbid_Condition__Chronic_Renal_Failure', 'Comorbid_Condition__Cirrhosis', 'Comorbid_Condition__Bleeding_Disorder', 'Comorbid_Condition__Disseminated_Cancer', 'Comorbid_Condition__Currently_Receiving_Chemotherapy_for_Cancer', 'Comorbid_Condition__Dementia', 'Comorbid_Condition__Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Comorbid_Condition__Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Advanced_Directive_Limiting_Care', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Head', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Face', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Neck', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Thorax', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Abdomen', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Spine', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Upper_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Lower_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_Unspecified_Body_Regions', 'AIS_derived_ISS', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Cerebral_Monitoring', 'Protective_Device', 'OUTCOME']
x1 = x1[variables1]
x2 = load_dataset("mertkarabacak/NTDB-Epidural", data_files="discharge_data.csv", use_auth_token = HF_TOKEN)
x2 = pd.DataFrame(x2['train'])
variables2= ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'PreHospital_Cardiac_Arrest', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Current_Smoker', 'Comorbid_Condition__Alcohol_Use_Disorder', 'Comorbid_Condition__Substance_Abuse_Disorder', 'Comorbid_Condition__Diabetes_Mellitus', 'Comorbid_Condition__Hypertension', 'Comorbid_Condition__Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Comorbid_Condition__Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Comorbid_Condition__Peripheral_Arterial_Disease', 'Comorbid_Condition__Chronic_Obstructive_Pulmonary_Disease', 'Comorbid_Condition__Chronic_Renal_Failure', 'Comorbid_Condition__Cirrhosis', 'Comorbid_Condition__Bleeding_Disorder', 'Comorbid_Condition__Disseminated_Cancer', 'Comorbid_Condition__Currently_Receiving_Chemotherapy_for_Cancer', 'Comorbid_Condition__Dementia', 'Comorbid_Condition__Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Comorbid_Condition__Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Advanced_Directive_Limiting_Care', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Head', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Face', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Neck', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Thorax', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Abdomen', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Spine', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Upper_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Lower_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_Unspecified_Body_Regions', 'AIS_derived_ISS', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Cerebral_Monitoring', 'Protective_Device', 'OUTCOME']
x2 = x2[variables2]
x3 = load_dataset("mertkarabacak/NTDB-Epidural", data_files="los_data.csv", use_auth_token = HF_TOKEN)
x3 = pd.DataFrame(x3['train'])
variables3 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'PreHospital_Cardiac_Arrest', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Current_Smoker', 'Comorbid_Condition__Alcohol_Use_Disorder', 'Comorbid_Condition__Substance_Abuse_Disorder', 'Comorbid_Condition__Diabetes_Mellitus', 'Comorbid_Condition__Hypertension', 'Comorbid_Condition__Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Comorbid_Condition__Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Comorbid_Condition__Peripheral_Arterial_Disease', 'Comorbid_Condition__Chronic_Obstructive_Pulmonary_Disease', 'Comorbid_Condition__Chronic_Renal_Failure', 'Comorbid_Condition__Cirrhosis', 'Comorbid_Condition__Bleeding_Disorder', 'Comorbid_Condition__Disseminated_Cancer', 'Comorbid_Condition__Currently_Receiving_Chemotherapy_for_Cancer', 'Comorbid_Condition__Dementia', 'Comorbid_Condition__Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Comorbid_Condition__Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Advanced_Directive_Limiting_Care', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Head', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Face', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Neck', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Thorax', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Abdomen', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Spine', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Upper_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Lower_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_Unspecified_Body_Regions', 'AIS_derived_ISS', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Cerebral_Monitoring', 'Protective_Device', 'OUTCOME']
x3 = x3[variables3]
x4 = load_dataset("mertkarabacak/NTDB-Epidural", data_files="iculos_data.csv", use_auth_token = HF_TOKEN)
x4 = pd.DataFrame(x4['train'])
variables4 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'PreHospital_Cardiac_Arrest', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Current_Smoker', 'Comorbid_Condition__Alcohol_Use_Disorder', 'Comorbid_Condition__Substance_Abuse_Disorder', 'Comorbid_Condition__Diabetes_Mellitus', 'Comorbid_Condition__Hypertension', 'Comorbid_Condition__Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Comorbid_Condition__Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Comorbid_Condition__Peripheral_Arterial_Disease', 'Comorbid_Condition__Chronic_Obstructive_Pulmonary_Disease', 'Comorbid_Condition__Chronic_Renal_Failure', 'Comorbid_Condition__Cirrhosis', 'Comorbid_Condition__Bleeding_Disorder', 'Comorbid_Condition__Disseminated_Cancer', 'Comorbid_Condition__Currently_Receiving_Chemotherapy_for_Cancer', 'Comorbid_Condition__Dementia', 'Comorbid_Condition__Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Comorbid_Condition__Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Advanced_Directive_Limiting_Care', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Head', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Face', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Neck', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Thorax', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Abdomen', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Spine', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Upper_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Lower_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_Unspecified_Body_Regions', 'AIS_derived_ISS', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Cerebral_Monitoring', 'Protective_Device', 'OUTCOME']
x4 = x4[variables4]
x5 = load_dataset("mertkarabacak/NTDB-Epidural", data_files="complications_data.csv", use_auth_token = HF_TOKEN)
x5 = pd.DataFrame(x5['train'])
variables5 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'PreHospital_Cardiac_Arrest', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Current_Smoker', 'Comorbid_Condition__Alcohol_Use_Disorder', 'Comorbid_Condition__Substance_Abuse_Disorder', 'Comorbid_Condition__Diabetes_Mellitus', 'Comorbid_Condition__Hypertension', 'Comorbid_Condition__Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Comorbid_Condition__Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Comorbid_Condition__Peripheral_Arterial_Disease', 'Comorbid_Condition__Chronic_Obstructive_Pulmonary_Disease', 'Comorbid_Condition__Chronic_Renal_Failure', 'Comorbid_Condition__Cirrhosis', 'Comorbid_Condition__Bleeding_Disorder', 'Comorbid_Condition__Disseminated_Cancer', 'Comorbid_Condition__Currently_Receiving_Chemotherapy_for_Cancer', 'Comorbid_Condition__Dementia', 'Comorbid_Condition__Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Comorbid_Condition__Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Advanced_Directive_Limiting_Care', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Head', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Face', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Neck', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Thorax', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Abdomen', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Spine', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Upper_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_the_Lower_Extremity', 'AIS_Severity__Maximum_Severity_of_Injury_in_Unspecified_Body_Regions', 'AIS_derived_ISS', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Cerebral_Monitoring', 'Protective_Device', 'OUTCOME']
x5 = x5[variables5]
#Define feature names.
f1_names = list(x1.columns)
f1_names = [f1.replace('__', ' - ') for f1 in f1_names]
f1_names = [f1.replace('_', ' ') for f1 in f1_names]
f2_names = list(x2.columns)
f2_names = [f2.replace('__', ' - ') for f2 in f2_names]
f2_names = [f2.replace('_', ' ') for f2 in f2_names]
f3_names = list(x3.columns)
f3_names = [f3.replace('__', ' - ') for f3 in f3_names]
f3_names = [f3.replace('_', ' ') for f3 in f3_names]
f4_names = list(x4.columns)
f4_names = [f4.replace('__', ' - ') for f4 in f4_names]
f4_names = [f4.replace('_', ' ') for f4 in f4_names]
f5_names = list(x5.columns)
f5_names = [f5.replace('__', ' - ') for f5 in f5_names]
f5_names = [f5.replace('_', ' ') for f5 in f5_names]
#Assign unique values as answer options.
unique_SEX = ['Male', 'Female', 'Unknown']
unique_RACE = ['White', 'Black', 'Asian', 'American Indian', 'Pacific Islander', 'Other', 'Unknown']
unique_ETHNICITY = ['Not Hispanic or Latino', 'Hispanic or Latino', 'Unknown']
unique_SUPPLEMENTALOXYGEN = ['No supplemental oxygen', 'Supplemental oxygen', 'Unknown']
unique_RESPIRATORYASSISTANCE = ['Unassisted respiratory rate', 'Assisted respiratory rate', 'Unknown']
unique_PREHOSPITALCARDIACARREST = ['No', 'Yes', 'Unknown']
unique_TBIMIDLINESHIFT = ['No', 'Yes', 'Not imaged/unknown']
unique_TBIPUPILLARYRESPONSE = ['Both reactive', 'One reactive', 'Neither reactive', 'Unknown']
unique_CC_ADHD = ['No', 'Yes', 'Unknown']
unique_CC_ADLC = ['No', 'Yes', 'Unknown']
unique_CC_ALCOHOLISM = ['No', 'Yes', 'Unknown']
unique_CC_ANGINAPECTORIS = ['No', 'Yes', 'Unknown']
unique_CC_ANTICOAGULANT = ['No', 'Yes', 'Unknown']
unique_CC_BLEEDING = ['No', 'Yes', 'Unknown']
unique_CC_CHEMO = ['No', 'Yes', 'Unknown']
unique_CC_CHF = ['No', 'Yes', 'Unknown']
unique_CC_CIRRHOSIS = ['No', 'Yes', 'Unknown']
unique_CC_COPD = ['No', 'Yes', 'Unknown']
unique_CC_CVA = ['No', 'Yes', 'Unknown']
unique_CC_DEMENTIA = ['No', 'Yes', 'Unknown']
unique_CC_DIABETES = ['No', 'Yes', 'Unknown']
unique_CC_DISCANCER = ['No', 'Yes', 'Unknown']
unique_CC_FUNCTIONAL = ['No', 'Yes', 'Unknown']
unique_CC_HYPERTENSION = ['No', 'Yes', 'Unknown']
unique_CC_MENTALPERSONALITY = ['No', 'Yes', 'Unknown']
unique_CC_MI = ['No', 'Yes', 'Unknown']
unique_CC_PAD = ['No', 'Yes', 'Unknown']
unique_CC_RENAL = ['No', 'Yes', 'Unknown']
unique_CC_SMOKING = ['No', 'Yes', 'Unknown']
unique_CC_STEROID = ['No', 'Yes', 'Unknown']
unique_CC_SUBSTANCEABUSE = ['No', 'Yes', 'Unknown']
unique_CC_PREGNANCY = ['No', 'Yes', 'Unknown', 'Not applicable (male patient)']
unique_TRANSPORTMODE = ['Ground ambulance', 'Private vehicle/public vehicle/walk-in', 'Air ambulance', 'Police', 'Other/unknown']
unique_INTERFACILITYTRANSFER = ['No', 'Yes']
unique_TRAUMATYPE = ['Blunt', 'Penetrating', 'Other/unknown']
unique_INTENT = ['Unintentional', 'Assault', 'Self-inflicted', 'Other/undetermined/unknown']
unique_MECHANISM = ['Fall', 'Struck by or against', 'MVT occupant', 'MVT pedestrian', 'MVT motorcyclist', 'MVT pedal cyclist', 'Other MVT', 'Other transport', 'Other pedestrian', 'Other pedal cyclist', 'Firearm', 'Cut/pierce', 'Natural/environmental', 'Machinery', 'Overexertion', 'Other/unspecified/unknown']
unique_PROTDEV = ['None', 'Airbag present', 'Helmet', 'Lap belt', 'Shoulder Belt', 'Protective clothing', 'Protective non-clothing gear', 'Eye protection', 'Unknown']
unique_WORKRELATED = ['No', 'Yes (Unknown)', 'Yes (Construction and Extraction Occupations)', 'Yes (Transportation and Material Moving Occupations)', 'Yes (Installation, Maintenance, and Repair Occupations)', 'Yes (Farming, Fishing, and Forestry Occupations)', 'Yes (Building and Grounds Cleaning and Maintenance)', 'Yes (Food Preparation and Serving Related)', 'Yes (Production Occupations)', 'Yes (Sales and Related Occupations)', 'Yes (Arts, Design, Entertainment, Sports, and Media)', 'Yes (Military Specific Occupations)', 'Yes (Healthcare Practitioners and Technical Occupations)', 'Yes (Management Occupations)', 'Yes (Protective Service Occupations)', 'Yes (Education, Training, and Library Occupations)', 'Yes (Office and Administrative Support Occupations)', 'Yes (Computer and Mathematical Occupations)', 'Yes (Legal Occupations)', 'Yes (Personal Care and Service Occupations)']
unique_INTERVENTION = ['No', 'Yes']
unique_ICP = ['None', 'Intraventricular drain or catheter (e.g. ventriculostomy, external ventricular drain)', 'Intraparenchymal pressure monitor (e.g. Camino bolt, subarachnoid bolt, intraparenchymal catheter)', 'Jugular venous bulb', 'Intraparenchymal oxygen monitor (e.g. Licox)', 'Unknown']
unique_ALCOHOLSCREEN = ['Yes', 'No', 'Unknown']
unique_ANTIBIOTICTHERAPY = ['Yes', 'No', 'Unknown']
unique_DRGSCR_AMPHETAMINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_BARBITURATE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_BENZODIAZEPINES = ['Not tested', 'No', 'Yes']
unique_DRGSCR_CANNABINOID = ['Not tested', 'No', 'Yes']
unique_DRGSCR_COCAINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_ECSTASY = ['Not tested', 'No', 'Yes']
unique_DRGSCR_METHADONE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_METHAMPHETAMINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_OPIOID = ['Not tested', 'No', 'Yes']
unique_DRGSCR_OXYCODONE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_PHENCYCLIDINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_TRICYCLICDEPRESS = ['Not tested', 'No', 'Yes']
unique_VERIFICATIONLEVEL = ['Level I Trauma Center', 'Level II Trauma Center', 'Level III Trauma Center', 'Unknown']
unique_HOSPITALTYPE = ['Non-profit', 'For profit', 'Government', 'Unknown']
unique_BEDSIZE = ['More than 600', '401 to 600', '201 to 400', '200 or fewer']
unique_PRIMARYMETHODPAYMENT = ['Private/commercial insurance', 'Medicaid', 'Medicare', 'Other government', 'Self-pay', 'Other', 'Not billed', 'Unknown']
#Prepare data for the outcome 1 (mortality).
y1 = x1.pop('OUTCOME')
categorical_columns1 = list(x1.select_dtypes('object').columns)
x1 = x1.astype({col: "category" for col in categorical_columns1})
y1_data_xgb = xgb.DMatrix(x1, label=y1, enable_categorical=True)
x1_lgb = x1.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
y1_data_lgb = lgb.Dataset(x1_lgb, label=y1)
y1_data_cb = Pool(data=x1, label=y1, cat_features=categorical_columns1)
x1_rf = x1
categorical_columns1 = list(x1_rf.select_dtypes('category').columns)
x1_rf = x1_rf.astype({col: "category" for col in categorical_columns1})
le = sklearn.preprocessing.LabelEncoder()
for col in categorical_columns1:
x1_rf[col] = le.fit_transform(x1_rf[col].astype(str))
d1 = dict.fromkeys(x1_rf.select_dtypes(np.int64).columns, str)
x1_rf = x1_rf.astype(d1)
#Prepare data for the outcome 2 (discharge).
y2 = x2.pop('OUTCOME')
categorical_columns2 = list(x2.select_dtypes('object').columns)
x2 = x2.astype({col: "category" for col in categorical_columns2})
y2_data_xgb = xgb.DMatrix(x2, label=y2, enable_categorical=True)
x2_lgb = x2.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
y2_data_lgb = lgb.Dataset(x2_lgb, label=y2)
y2_data_cb = Pool(data=x2, label=y2, cat_features=categorical_columns2)
x2_rf = x2
categorical_columns2 = list(x2_rf.select_dtypes('category').columns)
x2_rf = x2_rf.astype({col: "category" for col in categorical_columns2})
le = sklearn.preprocessing.LabelEncoder()
for col in categorical_columns2:
x2_rf[col] = le.fit_transform(x2_rf[col].astype(str))
d2 = dict.fromkeys(x2_rf.select_dtypes(np.int64).columns, str)
x2_rf = x2_rf.astype(d2)
#Prepare data for the outcome 3 (LOS).
y3 = x3.pop('OUTCOME')
categorical_columns3 = list(x3.select_dtypes('object').columns)
x3 = x3.astype({col: "category" for col in categorical_columns3})
y3_data_xgb = xgb.DMatrix(x3, label=y3, enable_categorical=True)
x3_lgb = x3.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
y3_data_lgb = lgb.Dataset(x3_lgb, label=y3)
y3_data_cb = Pool(data=x3, label=y3, cat_features=categorical_columns3)
x3_rf = x3
categorical_columns3 = list(x3_rf.select_dtypes('category').columns)
x3_rf = x3_rf.astype({col: "category" for col in categorical_columns3})
le = sklearn.preprocessing.LabelEncoder()
for col in categorical_columns3:
x3_rf[col] = le.fit_transform(x3_rf[col].astype(str))
d3 = dict.fromkeys(x3_rf.select_dtypes(np.int64).columns, str)
x3_rf = x3_rf.astype(d3)
#Prepare data for the outcome 4 (ICU LOS).
y4 = x4.pop('OUTCOME')
categorical_columns4 = list(x4.select_dtypes('object').columns)
x4 = x4.astype({col: "category" for col in categorical_columns4})
y4_data_xgb = xgb.DMatrix(x4, label=y4, enable_categorical=True)
x4_lgb = x4.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
y4_data_lgb = lgb.Dataset(x4_lgb, label=y4)
y4_data_cb = Pool(data=x4, label=y4, cat_features=categorical_columns4)
x4_rf = x4
categorical_columns4 = list(x4_rf.select_dtypes('category').columns)
x4_rf = x4_rf.astype({col: "category" for col in categorical_columns4})
le = sklearn.preprocessing.LabelEncoder()
for col in categorical_columns4:
x4_rf[col] = le.fit_transform(x4_rf[col].astype(str))
d4 = dict.fromkeys(x4_rf.select_dtypes(np.int64).columns, str)
x4_rf = x4_rf.astype(d4)
#Prepare data for the outcome 5 (complications).
y5 = x5.pop('OUTCOME')
categorical_columns5 = list(x5.select_dtypes('object').columns)
x5 = x5.astype({col: "category" for col in categorical_columns5})
y5_data_xgb = xgb.DMatrix(x5, label=y5, enable_categorical=True)
x5_lgb = x5.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
y5_data_lgb = lgb.Dataset(x5_lgb, label=y5)
y5_data_cb = Pool(data=x5, label=y5, cat_features=categorical_columns5)
x5_rf = x5
categorical_columns5 = list(x5_rf.select_dtypes('category').columns)
x5_rf = x5_rf.astype({col: "category" for col in categorical_columns5})
le = sklearn.preprocessing.LabelEncoder()
for col in categorical_columns5:
x5_rf[col] = le.fit_transform(x5_rf[col].astype(str))
d5 = dict.fromkeys(x5_rf.select_dtypes(np.int64).columns, str)
x5_rf = x5_rf.astype(d5)
#Assign hyperparameters.
y1_lgb_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.5873889067286373, 'lambda_l2': 0.0043364331356120405, 'num_leaves': 221, 'feature_fraction': 0.8036023624154648, 'bagging_fraction': 0.43028542431491096, 'bagging_freq': 5, 'min_child_samples': 7}
y2_rf_params = {'criterion': 'gini', 'max_features': None, 'max_depth': 98, 'n_estimators': 1500, 'min_samples_leaf': 3, 'min_samples_split': 4}
y3_cb_params = {'objective': 'Logloss', 'colsample_bylevel': 0.025539888924058358, 'depth': 10, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'subsample': 0.1746916603674696}
y4_xgb_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 1.9428110874325094e-06, 'alpha': 0.4622155288125666, 'max_depth': 7, 'eta': 0.7851633597929335, 'gamma': 4.627257790053709e-07, 'grow_policy': 'lossguide'}
y5_xgb_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 2.3932298953962467e-07, 'alpha': 0.028092948114954518, 'max_depth': 4, 'eta': 0.9081862563796025, 'gamma': 2.6030807393900514e-07, 'grow_policy': 'lossguide'}
#Training models.
y1_model_lgb = lgb.train(params=y1_lgb_params, train_set=y1_data_lgb)
y1_explainer_lgb = shap.TreeExplainer(y1_model_lgb)
from sklearn.ensemble import RandomForestClassifier as rf
y2_rf = rf(**y2_rf_params)
y2_model_rf = y2_rf.fit(x2_rf, y2)
y2_explainer_rf = shap.TreeExplainer(y2_model_rf)
y3_model_cb = cb.train(pool=y3_data_cb, params=y3_cb_params)
y3_explainer_cb = shap.TreeExplainer(y3_model_cb)
y4_model_xgb = xgb.train(params=y4_xgb_params, dtrain=y4_data_xgb)
y4_explainer_xgb = shap.TreeExplainer(y4_model_xgb)
y5_model_xgb = xgb.train(params=y5_xgb_params, dtrain=y5_data_xgb)
y5_explainer_xgb = shap.TreeExplainer(y5_model_xgb)
#Define predict for y1 (mortality).
def y1_predict_xgb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
pos_pred = y1_model_xgb.predict(xgb.DMatrix(df1, enable_categorical=True))
return {"Mortality": float(pos_pred[0]), "No Mortality": 1 - float(pos_pred[0])}
def y1_predict_lgb(*args):
df1 = pd.DataFrame([args], columns=x1_lgb.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
pos_pred = y1_model_lgb.predict(df1)
return {"Mortality": float(pos_pred[0]), "No Mortality": 1 - float(pos_pred[0])}
def y1_predict_cb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
pos_pred = y1_model_cb.predict(Pool(df1, cat_features = categorical_columns1), prediction_type='Probability')
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
def y1_predict_rf(*args):
df1 = pd.DataFrame([args], columns=x1_rf.columns)
df1 = df.astype({col: "category" for col in categorical_columns1})
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
d1f = df1.astype(d1)
pos_pred = y1_model_rf.predict_proba(df1)
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
#Define predict for y2 (discharge).
def y2_predict_xgb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
pos_pred = y2_model_xgb.predict(xgb.DMatrix(df2, enable_categorical=True))
return {"Facility Discharge": float(pos_pred[0]), "Home Discharge": 1 - float(pos_pred[0])}
def y2_predict_lgb(*args):
df2 = pd.DataFrame([args], columns=x2_lgb.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
pos_pred = y2_model_lgb.predict(df2)
return {"Facility Discharge": float(pos_pred[0]), "Home Discharge": 1 - float(pos_pred[0])}
def y2_predict_cb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
pos_pred = y2_model_cb.predict(Pool(df2, cat_features = categorical_columns2), prediction_type='Probability')
return {"Facility Discharge": float(pos_pred[0][1]), "Home Discharge": float(pos_pred[0][0])}
def y2_predict_rf(*args):
df2 = pd.DataFrame([args], columns=x2_rf.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
d2 = dict.fromkeys(df2.select_dtypes(np.int64).columns, np.int32)
df2 = df2.astype(d2)
pos_pred = y2_model_rf.predict_proba(df2)
return {"Facility Discharge": float(pos_pred[0][1]), "Home Discharge": float(pos_pred[0][0])}
#Define predict for y3 (LOS).
def y3_predict_xgb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
pos_pred = y3_model_xgb.predict(xgb.DMatrix(df3, enable_categorical=True))
return {"Prolonged LOS": float(pos_pred[0]), "No Prolonged LOS": 1 - float(pos_pred[0])}
def y3_predict_lgb(*args):
df3 = pd.DataFrame([args], columns=x3_lgb.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
pos_pred = y3_model_lgb.predict(df3)
return {"Prolonged LOS": float(pos_pred[0]), "No Prolonged LOS": 1 - float(pos_pred[0])}
def y3_predict_cb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
pos_pred = y3_model_cb.predict(Pool(df3, cat_features = categorical_columns3), prediction_type='Probability')
return {"Prolonged LOS": float(pos_pred[0][1]), "No Prolonged LOS": float(pos_pred[0][0])}
def y3_predict_rf(*args):
df3 = pd.DataFrame([args], columns=x3_rf.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
d3 = dict.fromkeys(df3.select_dtypes(np.int64).columns, np.int32)
df3 = df.astype(d3)
pos_pred = y3_model_rf.predict_proba(df3)
return {"Prolonged LOS": float(pos_pred[0][1]), "No Prolonged LOS": float(pos_pred[0][0])}
#Define predict for y4 (ICU LOS).
def y4_predict_xgb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
pos_pred = y4_model_xgb.predict(xgb.DMatrix(df4, enable_categorical=True))
return {"Prolonged ICU LOS": float(pos_pred[0]), "No Prolonged ICU LOS": 1 - float(pos_pred[0])}
def y4_predict_lgb(*args):
df4 = pd.DataFrame([args], columns=x4_lgb.columns)
df4 = df.astype({col: "category" for col in categorical_columns4})
pos_pred = y4_model_lgb.predict(df4)
return {"Prolonged ICU LOS": float(pos_pred[0]), "No Prolonged ICU LOS": 1 - float(pos_pred[0])}
def y4_predict_cb(*args):
df4 = df4.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
pos_pred = y4_model_cb.predict(Pool(df4, cat_features = categorical_columns4), prediction_type='Probability')
return {"Prolonged ICU LOS": float(pos_pred[0][1]), "No Prolonged ICU LOS": float(pos_pred[0][0])}
def y4_predict_rf(*args):
df4 = pd.DataFrame([args], columns=x4_rf.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
d4 = dict.fromkeys(df4.select_dtypes(np.int64).columns, np.int32)
df4 = df4.astype(d4)
pos_pred = y4_model_rf.predict_proba(df4)
return {"Prolonged ICU LOS": float(pos_pred[0][1]), "No Prolonged ICU LOS": float(pos_pred[0][0])}
#Define predict for y5 (complications).
def y5_predict_xgb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
pos_pred = y5_model_xgb.predict(xgb.DMatrix(df5, enable_categorical=True))
return {"Major Complications": float(pos_pred[0]), "No Major Complications": 1 - float(pos_pred[0])}
def y5_predict_lgb(*args):
df5 = pd.DataFrame([args], columns=x5_lgb.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
pos_pred = y5_model_lgb.predict(df5)
return {"Major Complications": float(pos_pred[0]), "No Major Complications": 1 - float(pos_pred[0])}
def y5_predict_cb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
pos_pred = y5_model_cb.predict(Pool(df5, cat_features = categorical_columns5), prediction_type='Probability')
return {"Major Complications": float(pos_pred[0][1]), "No Major Complications": float(pos_pred[0][0])}
def y5_predict_rf(*args):
df5 = pd.DataFrame([args], columns=x5_rf.columns)
df5 = df.astype({col: "category" for col in categorical_columns5})
d5 = dict.fromkeys(df5.select_dtypes(np.int64).columns, np.int32)
df5 = df5.astype(d5)
pos_pred = y5_model_rf.predict_proba(df5)
return {"Major Complications": float(pos_pred[0][1]), "No Major Complications": float(pos_pred[0][0])}
#Define function for wrapping feature labels.
def wrap_labels(ax, width, break_long_words=False):
labels = []
for label in ax.get_yticklabels():
text = label.get_text()
labels.append(textwrap.fill(text, width=width, break_long_words=break_long_words))
ax.set_yticklabels(labels, rotation=0)
#Define interpret for y1 (mortality).
def y1_interpret_xgb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_xgb.shap_values(xgb.DMatrix(df1, enable_categorical=True))
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0], max_display = 10, show = False, feature_names = f1_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y1_interpret_lgb(*args):
df1 = pd.DataFrame([args], columns=x1_lgb.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_lgb.shap_values(df1)
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0][0], max_display = 10, show = False, feature_names = f1_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y1_interpret_cb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_cb.shap_values(Pool(df1, cat_features = categorical_columns1))
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0], max_display = 10, show = False, feature_names = f1_names)
scores_desc = sorted(scores_desc)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y1_interpret_rf(*args):
df1 = pd.DataFrame([args], columns=x1_rf.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_rf.shap_values(df1)
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0][0], max_display = 10, show = False, feature_names = f1_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y2 (discharge).
def y2_interpret_xgb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_xgb.shap_values(xgb.DMatrix(df2, enable_categorical=True))
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y2_interpret_lgb(*args):
df2 = pd.DataFrame([args], columns=x2_lgb.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_lgb.shap_values(df2)
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0][0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y2_interpret_cb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_cb.shap_values(Pool(df2, cat_features = categorical_columns2))
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y2_interpret_rf(*args):
df2 = pd.DataFrame([args], columns=x2_rf.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_rf.shap_values(df2)
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0][0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y3 (LOS).
def y3_interpret_xgb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_xgb.shap_values(xgb.DMatrix(df3, enable_categorical=True))
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y3_interpret_lgb(*args):
df3 = pd.DataFrame([args], columns=x3_lgb.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_lgb.shap_values(df3)
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0][0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y3_interpret_cb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_cb.shap_values(Pool(df3, cat_features = categorical_columns3))
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y3_interpret_rf(*args):
df3 = pd.DataFrame([args], columns=x3_rf.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_rf.shap_values(df3)
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0][0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y4 (ICU LOS).
def y4_interpret_xgb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_xgb.shap_values(xgb.DMatrix(df4, enable_categorical=True))
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y4_interpret_lgb(*args):
df4 = pd.DataFrame([args], columns=x4_lgb.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_lgb.shap_values(df4)
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0][0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y4_interpret_cb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_cb.shap_values(Pool(df4, cat_features = categorical_columns4))
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y4_interpret_rf(*args):
df4 = pd.DataFrame([args], columns=x4_rf.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_rf.shap_values(df4)
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0][0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y5 (complications).
def y5_interpret_xgb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values5 = y5_explainer_xgb.shap_values(xgb.DMatrix(df5, enable_categorical=True))
shap_values5 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y5_interpret_lgb(*args):
df5 = pd.DataFrame([args], columns=x5_lgb.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values5 = y5_explainer_lgb.shap_values(df5)
shap_values5 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0][0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y5_interpret_cb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values5 = y5_explainer_cb.shap_values(Pool(df5, cat_features = categorical_columns5))
shap_values5 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y5_interpret_rf(*args):
df5 = pd.DataFrame([args], columns=x5_rf.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values = y5_explainer_rf.shap_values(df5)
shap_values1 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0][0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
with gr.Blocks(title = "NTDB-Epidural") as demo:
gr.Markdown(
"""