BiswajitPadhi99 commited on
Commit
7c3768c
·
1 Parent(s): d870da0

Add app.py

Browse files
Files changed (2) hide show
  1. app.py +555 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # streamlit_app.py
3
+
4
+ import streamlit as st
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+
11
+ # ---------------------------
12
+ # Function Definitions
13
+ # ---------------------------
14
+
15
+ def create_histogram(df):
16
+ """Creates a histogram for Age Distribution."""
17
+ fig, ax = plt.subplots(figsize=(5, 3.5))
18
+ sns.histplot(df['anchor_age'], bins=30, kde=True, color='skyblue', ax=ax)
19
+ ax.set_xlabel("Age")
20
+ ax.set_ylabel("Number of Admissions")
21
+ ax.set_title("Age Distribution")
22
+ plt.tight_layout()
23
+ st.pyplot(fig)
24
+
25
+ def create_gender_bar_chart(df):
26
+ """Creates a bar chart for Gender Distribution."""
27
+ fig, ax = plt.subplots(figsize=(5, 3.5))
28
+ sns.countplot(data=df, x='gender', palette='pastel', ax=ax)
29
+ ax.set_title("Gender Distribution")
30
+ ax.set_xlabel("Gender")
31
+ ax.set_ylabel("Number of Admissions")
32
+ plt.tight_layout()
33
+ st.pyplot(fig)
34
+
35
+ def create_stacked_bar_admission_race(df):
36
+ """Creates a stacked bar chart for Admission Types by Race."""
37
+ admission_race = df.groupby(['race', 'admission_type']).size().unstack(fill_value=0)
38
+ admission_race_percent = admission_race.div(admission_race.sum(axis=1), axis=0) * 100
39
+
40
+ admission_race_percent.plot(kind='bar', stacked=True, figsize=(8, 6), colormap='tab20')
41
+ plt.title("Admission Types by Race (%)")
42
+ plt.xlabel("Race")
43
+ plt.ylabel("Percentage of Admission Types")
44
+ plt.legend(title='Admission Type', bbox_to_anchor=(1.05, 1), loc='upper left')
45
+ plt.tight_layout()
46
+ st.pyplot(plt.gcf())
47
+
48
+ def create_los_by_race(df):
49
+ """Creates a box plot for Length of Stay by Race."""
50
+ fig, ax = plt.subplots(figsize=(6, 4))
51
+ sns.boxplot(data=df, x='race', y='los', palette='Pastel1', ax=ax)
52
+ ax.set_title("Length of Stay by Race")
53
+ ax.set_xlabel("Race")
54
+ ax.set_ylabel("Length of Stay (Days)")
55
+ ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
56
+ plt.tight_layout()
57
+ st.pyplot(fig)
58
+
59
+ def create_correlation_heatmap(df):
60
+ """Creates a correlation heatmap for numerical features."""
61
+ numerical_features = df[['anchor_age', 'los']]
62
+ corr_matrix = numerical_features.corr()
63
+
64
+ fig, ax = plt.subplots(figsize=(3.5, 3))
65
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", ax=ax)
66
+ ax.set_title("Correlation Heatmap")
67
+ plt.tight_layout()
68
+ st.pyplot(fig)
69
+
70
+ def create_time_series_heatmap(df):
71
+ """Creates an admissions over time heatmap."""
72
+ month_order = ['January', 'February', 'March', 'April', 'May', 'June',
73
+ 'July', 'August', 'September', 'October', 'November', 'December']
74
+ df['admission_month'] = pd.Categorical(df['admission_month'], categories=month_order, ordered=True)
75
+
76
+ heatmap_df = df.groupby(['admission_year', 'admission_month']).size().reset_index(name='counts')
77
+
78
+ fig = px.density_heatmap(
79
+ heatmap_df,
80
+ x='admission_month',
81
+ y='admission_year',
82
+ z='counts',
83
+ histfunc='sum',
84
+ title='Admissions Over Time',
85
+ labels={'counts': 'Number of Admissions'},
86
+ color_continuous_scale='Blues'
87
+ )
88
+
89
+ fig.update_xaxes(categoryorder='array', categoryarray=month_order)
90
+ fig.update_layout(yaxis=dict(autorange='reversed'))
91
+ fig.update_traces(colorbar=dict(title='Admissions'))
92
+ st.plotly_chart(fig, use_container_width=True)
93
+
94
+ def create_mortality_by_race(df):
95
+ """Creates a bar chart for Mortality Rate by Race."""
96
+ mortality_race = df.groupby('race')['hospital_expire_flag'].mean().reset_index()
97
+ mortality_race['mortality_rate'] = mortality_race['hospital_expire_flag'] * 100
98
+
99
+ fig, ax = plt.subplots(figsize=(6, 4))
100
+ sns.barplot(data=mortality_race, x='race', y='mortality_rate', palette='Set2', ax=ax)
101
+ ax.set_title("Mortality Rate by Race")
102
+ ax.set_xlabel("Race")
103
+ ax.set_ylabel("Mortality Rate (%)")
104
+ ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
105
+ plt.tight_layout()
106
+ st.pyplot(fig)
107
+
108
+ def create_mortality_by_gender(df):
109
+ """Creates a bar chart for Mortality Rate by Gender."""
110
+ mortality_gender = df.groupby('gender')['hospital_expire_flag'].mean().reset_index()
111
+ mortality_gender['mortality_rate'] = mortality_gender['hospital_expire_flag'] * 100
112
+
113
+ fig, ax = plt.subplots(figsize=(6, 4))
114
+ sns.barplot(data=mortality_gender, x='gender', y='mortality_rate', palette='Set3', ax=ax)
115
+ ax.set_title("Mortality Rate by Gender")
116
+ ax.set_xlabel("Gender")
117
+ ax.set_ylabel("Mortality Rate (%)")
118
+ plt.tight_layout()
119
+ st.pyplot(fig)
120
+
121
+ def create_mortality_by_age_group(df):
122
+ """Creates a bar chart for Mortality Rate by Age Group."""
123
+ # Define age bins and labels
124
+ bins = [0, 30, 50, 70, 90, 120]
125
+ labels = ['0-30', '31-50', '51-70', '71-90', '91-120']
126
+ df['age_group'] = pd.cut(df['anchor_age'], bins=bins, labels=labels, right=False)
127
+
128
+ mortality_age = df.groupby('age_group')['hospital_expire_flag'].mean().reset_index()
129
+ mortality_age['mortality_rate'] = mortality_age['hospital_expire_flag'] * 100
130
+
131
+ fig, ax = plt.subplots(figsize=(6, 4))
132
+ sns.barplot(data=mortality_age, x='age_group', y='mortality_rate', palette='coolwarm', ax=ax)
133
+ ax.set_title("Mortality Rate by Age Group")
134
+ ax.set_xlabel("Age Group")
135
+ ax.set_ylabel("Mortality Rate (%)")
136
+ plt.tight_layout()
137
+ st.pyplot(fig)
138
+
139
+ def create_violin_age_race_mortality(df):
140
+ """Creates a violin plot for Age Distribution by Race and Mortality."""
141
+ fig, ax = plt.subplots(figsize=(8, 6))
142
+ sns.violinplot(
143
+ data=df,
144
+ x='race',
145
+ y='anchor_age',
146
+ hue='hospital_expire_flag',
147
+ split=True,
148
+ palette='Set2',
149
+ ax=ax
150
+ )
151
+ ax.set_title("Age Distribution by Race and Mortality")
152
+ ax.set_xlabel("Race")
153
+ ax.set_ylabel("Age")
154
+ ax.legend(title='Mortality', loc='upper right')
155
+ plt.tight_layout()
156
+ st.pyplot(fig)
157
+
158
+ def create_heatmap_race_gender_mortality(df):
159
+ """Creates a heatmap for Mortality Rate by Race and Gender."""
160
+ pivot_table = df.pivot_table(
161
+ index='race',
162
+ columns='gender',
163
+ values='hospital_expire_flag',
164
+ aggfunc='mean'
165
+ ) * 100 # Convert to percentage
166
+
167
+ fig, ax = plt.subplots(figsize=(8, 6))
168
+ sns.heatmap(pivot_table, annot=True, fmt=".1f", cmap='YlOrRd', ax=ax)
169
+ ax.set_title("Mortality Rate by Race and Gender (%)")
170
+ ax.set_xlabel("Gender")
171
+ ax.set_ylabel("Race")
172
+ plt.tight_layout()
173
+ st.pyplot(fig)
174
+
175
+ def create_parallel_coordinates(df):
176
+ """Creates a parallel coordinates plot for Demographics and Outcomes."""
177
+ # Select relevant numerical features
178
+ parallel_df = df[['anchor_age', 'los', 'hospital_expire_flag']].copy()
179
+
180
+ # Encode categorical variables numerically
181
+ parallel_df['race_code'] = df['race'].astype('category').cat.codes
182
+ parallel_df['gender_code'] = df['gender'].astype('category').cat.codes
183
+
184
+ # Create the parallel coordinates plot
185
+ fig = px.parallel_coordinates(
186
+ parallel_df,
187
+ color='hospital_expire_flag',
188
+ labels={
189
+ 'anchor_age': 'Age',
190
+ 'los': 'Length of Stay',
191
+ 'hospital_expire_flag': 'Mortality',
192
+ 'race_code': 'Race',
193
+ 'gender_code': 'Gender'
194
+ },
195
+ color_continuous_scale=px.colors.diverging.Tealrose,
196
+ color_continuous_midpoint=0.5
197
+ )
198
+
199
+ fig.update_layout(title='Parallel Coordinates Plot of Demographics and Outcomes')
200
+ st.plotly_chart(fig, use_container_width=True)
201
+
202
+ def create_treemap_race_mortality(df):
203
+ """Creates a treemap for Race and Mortality."""
204
+ treemap_df = df.groupby(['race', 'hospital_expire_flag']).size().reset_index(name='counts')
205
+ treemap_df['Mortality'] = treemap_df['hospital_expire_flag'].map({0: 'Survived', 1: 'Died'})
206
+
207
+ fig = px.treemap(
208
+ treemap_df,
209
+ path=['race', 'Mortality'],
210
+ values='counts',
211
+ color='Mortality',
212
+ color_discrete_map={'Survived':'#66b3ff','Died':'#ff6666'},
213
+ title='Treemap of Race and Mortality'
214
+ )
215
+ fig.update_layout(margin = dict(t=30, l=0, r=0, b=0))
216
+ st.plotly_chart(fig, use_container_width=True)
217
+
218
+ def create_sankey_race_mortality(df):
219
+ """Creates a Sankey diagram for Race to Mortality Outcomes."""
220
+ sankey_df = df.groupby(['race', 'hospital_expire_flag']).size().reset_index(name='counts')
221
+
222
+ # Map 'hospital_expire_flag' to 'Mortality' status
223
+ sankey_df['Mortality'] = sankey_df['hospital_expire_flag'].map({0: 'Survived', 1: 'Died'})
224
+
225
+ # Create source and target labels
226
+ source = sankey_df['race'].tolist()
227
+ target = sankey_df['Mortality'].tolist()
228
+ values = sankey_df['counts'].tolist()
229
+
230
+ # Create a list of unique labels ensuring no duplicates
231
+ unique_races = sankey_df['race'].unique().tolist()
232
+ unique_mortality = sankey_df['Mortality'].unique().tolist()
233
+ labels = unique_races + unique_mortality
234
+
235
+
236
+ # Create a mapping from label to index for efficient lookup
237
+ label_to_index = {label: idx for idx, label in enumerate(labels)}
238
+
239
+ # Map source and target labels to their corresponding indices
240
+ source_indices = [label_to_index[s] for s in source]
241
+ target_indices = [label_to_index[t] for t in target]
242
+
243
+ # Optionally, define colors for different node types
244
+ # For example, races could have one color and mortality outcomes another
245
+ race_color = "#FFA07A" # Light Salmon
246
+ mortality_color = "#20B2AA" # Light Sea Green
247
+ node_colors = [race_color] * len(unique_races) + [mortality_color] * len(unique_mortality)
248
+
249
+ # Create the Sankey diagram
250
+ fig = go.Figure(data=[go.Sankey(
251
+ node=dict(
252
+ pad=15,
253
+ thickness=20,
254
+ line=dict(color="black", width=0.5),
255
+ label=labels,
256
+ color=node_colors
257
+ ),
258
+ link=dict(
259
+ source=source_indices,
260
+ target=target_indices,
261
+ value=values
262
+ )
263
+ )])
264
+
265
+ # Add title to the layout
266
+ fig.update_layout(
267
+ title_text="Sankey Diagram of Race and Mortality Outcomes",
268
+ font_size=10
269
+ )
270
+
271
+ st.plotly_chart(fig, use_container_width=True)
272
+
273
+ # ---------------------------
274
+ # Streamlit Application
275
+ # ---------------------------
276
+
277
+ # Set Streamlit page configuration
278
+ st.set_page_config(
279
+ page_title="MIMIC-IV ICU Patient Data Dashboard",
280
+ layout="wide",
281
+ initial_sidebar_state="expanded",
282
+ )
283
+
284
+ # Title and Description
285
+ st.title("MIMIC-IV ICU Patient Data Dashboard")
286
+ st.markdown("""
287
+ Explore the general feature distribution and outcome metrics of ICU patients from the MIMIC-IV dataset. Utilize the sidebar filters to customize the data view and interact with various visualizations to uncover patterns and insights.
288
+ """)
289
+
290
+ # Sidebar Filters
291
+ st.sidebar.header("Filter Data")
292
+
293
+ @st.cache_data
294
+ def load_data():
295
+ # Load the dataframes (update the paths as necessary)
296
+ admissions_df = pd.read_csv('data/admissions.csv')
297
+ patients_df = pd.read_csv('data/patients.csv')
298
+ # diagnoses_icd_df = pd.read_csv('data/diagnoses_icd.csv')
299
+ # pharmacy_df = pd.read_csv('data/pharmacy.csv')
300
+ # prescriptions_df = pd.read_csv('data/prescriptions.csv')
301
+ # d_hcpcs_df = pd.read_csv('data/d_hcpcs.csv')
302
+ # poe_detail_df = pd.read_csv('data/poe_detail.csv')
303
+ # provider_df = pd.read_csv('data/provider.csv')
304
+
305
+ race_map = {"WHITE":"WHITE",
306
+ "BLACK/AFRICAN AMERICAN":"BLACK",
307
+ "OTHER":"OTHER",
308
+ "UNKNOWN":"UNKNOWN",
309
+ "HISPANIC/LATINO - PUERTO RICAN":"HISPANIC",
310
+ "WHITE - OTHER EUROPEAN":"WHITE",
311
+ "HISPANIC OR LATINO":"HISPANIC",
312
+ "ASIAN":"ASIAN",
313
+ "ASIAN - CHINESE":"ASIAN",
314
+ "WHITE - RUSSIAN":"WHITE",
315
+ "BLACK/CAPE VERDEAN":"BLACK",
316
+ "HISPANIC/LATINO - DOMINICAN":"HISPANIC",
317
+ "BLACK/CARIBBEAN ISLAND":"BLACK",
318
+ "BLACK/AFRICAN":"BLACK",
319
+ "PATIENT DECLINED TO ANSWER":"UNKNOWN",
320
+ "UNABLE TO OBTAIN":"UNKNOWN",
321
+ "PORTUGUESE":"WHITE",
322
+ "ASIAN - SOUTH EAST ASIAN":"ASIAN",
323
+ "HISPANIC/LATINO - GUATEMALAN":"HISPANIC",
324
+ "ASIAN - ASIAN INDIAN":"ASIAN",
325
+ "WHITE - EASTERN EUROPEAN":"WHITE",
326
+ "WHITE - BRAZILIAN":"WHITE",
327
+ "AMERICAN INDIAN/ALASKA NATIVE":"NATIVES",
328
+ "HISPANIC/LATINO - SALVADORAN":"HISPANIC",
329
+ "HISPANIC/LATINO - MEXICAN":"HISPANIC",
330
+ "HISPANIC/LATINO - COLUMBIAN":"HISPANIC",
331
+ "MULTIPLE RACE/ETHNICITY":"MULTI-ETHINIC",
332
+ "HISPANIC/LATINO - HONDURAN":"HISPANIC",
333
+ "ASIAN - KOREAN":"ASIAN",
334
+ "SOUTH AMERICAN":"HISPANIC",
335
+ "HISPANIC/LATINO - CUBAN":"HISPANIC",
336
+ "HISPANIC/LATINO - CENTRAL AMERICAN":"HISPANIC",
337
+ "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER":"NATIVES"}
338
+
339
+ admissions_df['race'] = admissions_df['race'].map(race_map)
340
+ # Merge admissions and patients data on 'subject_id'
341
+ merged_df = pd.merge(admissions_df, patients_df, on='subject_id', how='left')
342
+
343
+ # Handle missing values by dropping rows with critical missing data
344
+ merged_df = merged_df.dropna(subset=['anchor_age', 'gender', 'race', 'hospital_expire_flag'])
345
+
346
+ # Convert datetime columns
347
+ merged_df['admittime'] = pd.to_datetime(merged_df['admittime'])
348
+ merged_df['dischtime'] = pd.to_datetime(merged_df['dischtime'])
349
+ merged_df['deathtime'] = pd.to_datetime(merged_df['deathtime'], errors='coerce') # Some may not have deathtime
350
+
351
+ # Create derived features
352
+ merged_df['los'] = (merged_df['dischtime'] - merged_df['admittime']).dt.days
353
+ merged_df['admission_year'] = merged_df['admittime'].dt.year
354
+ merged_df['admission_month'] = merged_df['admittime'].dt.month_name()
355
+ merged_df['admittime_date'] = merged_df['admittime'].dt.date
356
+
357
+ return merged_df
358
+
359
+ merged_df = load_data()
360
+
361
+ # Sidebar Filters Function
362
+ def add_sidebar_filters(df):
363
+ # Admission Types
364
+ admission_types = sorted(df['admission_type'].unique())
365
+ selected_admission_types = st.sidebar.multiselect(
366
+ "Select Admission Type(s):",
367
+ options=admission_types,
368
+ default=admission_types
369
+ )
370
+
371
+ # Insurance Types
372
+ insurance_types = sorted(df['insurance'].unique())
373
+ selected_insurance_types = st.sidebar.multiselect(
374
+ "Select Insurance Type(s):",
375
+ options=insurance_types,
376
+ default=insurance_types
377
+ )
378
+
379
+ # Gender
380
+ genders = sorted(df['gender'].unique())
381
+ selected_genders = st.sidebar.multiselect(
382
+ "Select Gender(s):",
383
+ options=genders,
384
+ default=genders
385
+ )
386
+
387
+ # Race
388
+ races = sorted(df['race'].unique())
389
+ selected_races = st.sidebar.multiselect(
390
+ "Select Race(s):",
391
+ options=races,
392
+ default=races
393
+ )
394
+
395
+ # Year Range
396
+ min_year = int(df['admission_year'].min())
397
+ max_year = int(df['admission_year'].max())
398
+ selected_years = st.sidebar.slider(
399
+ "Select Admission Year Range:",
400
+ min_value=min_year,
401
+ max_value=max_year,
402
+ value=(min_year, max_year)
403
+ )
404
+
405
+ # Apply Filters
406
+ filtered_df = df[
407
+ (df['admission_type'].isin(selected_admission_types)) &
408
+ (df['insurance'].isin(selected_insurance_types)) &
409
+ (df['gender'].isin(selected_genders)) &
410
+ (df['race'].isin(selected_races)) &
411
+ (df['admission_year'] >= selected_years[0]) &
412
+ (df['admission_year'] <= selected_years[1])
413
+ ]
414
+
415
+ return filtered_df
416
+
417
+ filtered_df = add_sidebar_filters(merged_df)
418
+
419
+ # Display Summary Statistics for Q1
420
+ st.header("Summary Statistics")
421
+
422
+ # Create four columns for metrics
423
+ col1, col2, col3, col4 = st.columns(4)
424
+
425
+ with col1:
426
+ total_admissions = filtered_df.shape[0]
427
+ st.metric("Total Admissions", f"{total_admissions:,}")
428
+
429
+ with col2:
430
+ average_age = filtered_df['anchor_age'].mean()
431
+ st.metric("Average Age", f"{average_age:.2f} years")
432
+
433
+ with col3:
434
+ gender_counts = filtered_df['gender'].value_counts()
435
+ male_count = gender_counts.get('M', 0)
436
+ female_count = gender_counts.get('F', 0)
437
+ st.metric("Male Patients", f"{male_count:,}")
438
+ st.metric("Female Patients", f"{female_count:,}")
439
+
440
+ with col4:
441
+ mortality_rate = filtered_df['hospital_expire_flag'].mean() * 100 # Percentage
442
+ st.metric("Mortality Rate", f"{mortality_rate:.2f}%")
443
+
444
+ st.markdown("---")
445
+
446
+ # Create Tabs for Q1 and Q2
447
+ tabs = st.tabs(["General Overview", "Potential Biases"])
448
+
449
+ # ---------------------------
450
+ # Q1: General Overview
451
+ # ---------------------------
452
+ with tabs[0]:
453
+ st.subheader("General Feature Distribution and Outcome Metrics")
454
+
455
+ # Define the number of columns per row
456
+ num_cols = 2
457
+
458
+ # Define all Q1 plots in a list with titles and plot-generating functions
459
+ q1_plots = [
460
+ {
461
+ "title": "Age Distribution of ICU Patients",
462
+ "plot": lambda: create_histogram(filtered_df)
463
+ },
464
+ {
465
+ "title": "Gender Distribution of ICU Patients",
466
+ "plot": lambda: create_gender_bar_chart(filtered_df)
467
+ },
468
+ {
469
+ "title": "Admission Types by Race",
470
+ "plot": lambda: create_stacked_bar_admission_race(filtered_df)
471
+ },
472
+ {
473
+ "title": "Length of Stay by Race",
474
+ "plot": lambda: create_los_by_race(filtered_df)
475
+ },
476
+ {
477
+ "title": "Correlation Heatmap of Age and LOS",
478
+ "plot": lambda: create_correlation_heatmap(filtered_df)
479
+ },
480
+ {
481
+ "title": "Admissions Over Time",
482
+ "plot": lambda: create_time_series_heatmap(filtered_df)
483
+ }
484
+ ]
485
+
486
+ # Arrange Q1 plots in a grid layout
487
+ for i in range(0, len(q1_plots), num_cols):
488
+ cols = st.columns(num_cols)
489
+ for j in range(num_cols):
490
+ if i + j < len(q1_plots):
491
+ with cols[j]:
492
+ st.subheader(q1_plots[i + j]["title"])
493
+ q1_plots[i + j]["plot"]()
494
+
495
+ # ---------------------------
496
+ # Q2: Potential Biases
497
+ # ---------------------------
498
+ with tabs[1]:
499
+ st.subheader("Analyzing Potential Biases Across Demographics")
500
+
501
+ # Define the number of columns per row
502
+ num_cols = 2
503
+
504
+ # Define all Q2 plots in a list with titles and plot-generating functions
505
+ q2_plots = [
506
+ {
507
+ "title": "Mortality Rate by Race",
508
+ "plot": lambda: create_mortality_by_race(filtered_df)
509
+ },
510
+ {
511
+ "title": "Mortality Rate by Gender",
512
+ "plot": lambda: create_mortality_by_gender(filtered_df)
513
+ },
514
+ {
515
+ "title": "Mortality Rate by Age Group",
516
+ "plot": lambda: create_mortality_by_age_group(filtered_df)
517
+ },
518
+ {
519
+ "title": "Age Distribution by Race and Mortality",
520
+ "plot": lambda: create_violin_age_race_mortality(filtered_df)
521
+ },
522
+ {
523
+ "title": "Heatmap: Race & Gender vs. Mortality",
524
+ "plot": lambda: create_heatmap_race_gender_mortality(filtered_df)
525
+ },
526
+ {
527
+ "title": "Parallel Coordinates Plot of Demographics and Outcomes",
528
+ "plot": lambda: create_parallel_coordinates(filtered_df)
529
+ },
530
+ {
531
+ "title": "Treemap of Race and Mortality",
532
+ "plot": lambda: create_treemap_race_mortality(filtered_df)
533
+ },
534
+ {
535
+ "title": "Sankey Diagram: Race to Mortality Outcomes",
536
+ "plot": lambda: create_sankey_race_mortality(filtered_df)
537
+ }
538
+ ]
539
+
540
+ # Arrange Q2 plots in a grid layout
541
+ for i in range(0, len(q2_plots), num_cols):
542
+ cols = st.columns(num_cols)
543
+ for j in range(num_cols):
544
+ if i + j < len(q2_plots):
545
+ with cols[j]:
546
+ st.subheader(q2_plots[i + j]["title"])
547
+ q2_plots[i + j]["plot"]()
548
+
549
+ # Footer
550
+ st.markdown("""
551
+ ---
552
+ **Data Source:** MIMIC-IV Dataset
553
+ **Project:** Investigating Biases in ICU Patient Data
554
+ **Developed with:** Streamlit, Python
555
+ """)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ plotly
7
+ lifelines
8
+ scikit-learn