BiswajitPadhi99 commited on
Commit
4380ba4
·
1 Parent(s): 9c9b1b7

Add visuals in app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -188
app.py CHANGED
@@ -1,6 +1,3 @@
1
-
2
- # streamlit_app.py
3
-
4
  import streamlit as st
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
@@ -8,64 +5,74 @@ import seaborn as sns
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
 
11
- # ---------------------------
12
- # Function Definitions
13
- # ---------------------------
14
-
15
- def create_histogram(df):
16
- """Creates a histogram for Age Distribution."""
17
- fig, ax = plt.subplots(figsize=(5, 3.5))
18
- sns.histplot(df['anchor_age'], bins=30, kde=True, color='skyblue', ax=ax)
19
- ax.set_xlabel("Age")
20
- ax.set_ylabel("Number of Admissions")
21
- ax.set_title("Age Distribution")
22
- plt.tight_layout()
23
- st.pyplot(fig)
24
 
25
- def create_gender_bar_chart(df):
26
- """Creates a bar chart for Gender Distribution."""
27
- fig, ax = plt.subplots(figsize=(5, 3.5))
28
- sns.countplot(data=df, x='gender', palette='pastel', ax=ax)
29
- ax.set_title("Gender Distribution")
30
- ax.set_xlabel("Gender")
31
- ax.set_ylabel("Number of Admissions")
32
- plt.tight_layout()
33
- st.pyplot(fig)
34
-
35
- def create_stacked_bar_admission_race(df):
36
- """Creates a stacked bar chart for Admission Types by Race."""
37
- admission_race = df.groupby(['race', 'admission_type']).size().unstack(fill_value=0)
38
- admission_race_percent = admission_race.div(admission_race.sum(axis=1), axis=0) * 100
39
 
40
- admission_race_percent.plot(kind='bar', stacked=True, figsize=(8, 6), colormap='tab20')
41
- plt.title("Admission Types by Race (%)")
42
- plt.xlabel("Race")
43
- plt.ylabel("Percentage of Admission Types")
44
- plt.legend(title='Admission Type', bbox_to_anchor=(1.05, 1), loc='upper left')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  plt.tight_layout()
46
  st.pyplot(plt.gcf())
47
-
48
- def create_los_by_race(df):
49
- """Creates a box plot for Length of Stay by Race."""
50
- fig, ax = plt.subplots(figsize=(6, 4))
51
- sns.boxplot(data=df, x='race', y='los', palette='Pastel1', ax=ax)
52
- ax.set_title("Length of Stay by Race")
53
- ax.set_xlabel("Race")
54
- ax.set_ylabel("Length of Stay (Days)")
55
- ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
56
- plt.tight_layout()
57
- st.pyplot(fig)
58
-
59
- def create_correlation_heatmap(df):
60
- """Creates a correlation heatmap for numerical features."""
61
- numerical_features = df[['anchor_age', 'los']]
62
- corr_matrix = numerical_features.corr()
63
-
64
- fig, ax = plt.subplots(figsize=(3.5, 3))
65
- sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", ax=ax)
66
- ax.set_title("Correlation Heatmap")
67
- plt.tight_layout()
68
- st.pyplot(fig)
69
 
70
  def create_time_series_heatmap(df):
71
  """Creates an admissions over time heatmap."""
@@ -81,16 +88,76 @@ def create_time_series_heatmap(df):
81
  y='admission_year',
82
  z='counts',
83
  histfunc='sum',
84
- title='Admissions Over Time',
85
- labels={'counts': 'Number of Admissions'},
86
- color_continuous_scale='Blues'
87
  )
88
-
89
  fig.update_xaxes(categoryorder='array', categoryarray=month_order)
90
  fig.update_layout(yaxis=dict(autorange='reversed'))
91
  fig.update_traces(colorbar=dict(title='Admissions'))
92
  st.plotly_chart(fig, use_container_width=True)
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def create_mortality_by_race(df):
95
  """Creates a bar chart for Mortality Rate by Race."""
96
  mortality_race = df.groupby('race')['hospital_expire_flag'].mean().reset_index()
@@ -98,7 +165,6 @@ def create_mortality_by_race(df):
98
 
99
  fig, ax = plt.subplots(figsize=(6, 4))
100
  sns.barplot(data=mortality_race, x='race', y='mortality_rate', palette='Set2', ax=ax)
101
- ax.set_title("Mortality Rate by Race")
102
  ax.set_xlabel("Race")
103
  ax.set_ylabel("Mortality Rate (%)")
104
  ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
@@ -112,7 +178,6 @@ def create_mortality_by_gender(df):
112
 
113
  fig, ax = plt.subplots(figsize=(6, 4))
114
  sns.barplot(data=mortality_gender, x='gender', y='mortality_rate', palette='Set3', ax=ax)
115
- ax.set_title("Mortality Rate by Gender")
116
  ax.set_xlabel("Gender")
117
  ax.set_ylabel("Mortality Rate (%)")
118
  plt.tight_layout()
@@ -120,7 +185,6 @@ def create_mortality_by_gender(df):
120
 
121
  def create_mortality_by_age_group(df):
122
  """Creates a bar chart for Mortality Rate by Age Group."""
123
- # Define age bins and labels
124
  bins = [0, 30, 50, 70, 90, 120]
125
  labels = ['0-30', '31-50', '51-70', '71-90', '91-120']
126
  df['age_group'] = pd.cut(df['anchor_age'], bins=bins, labels=labels, right=False)
@@ -130,7 +194,6 @@ def create_mortality_by_age_group(df):
130
 
131
  fig, ax = plt.subplots(figsize=(6, 4))
132
  sns.barplot(data=mortality_age, x='age_group', y='mortality_rate', palette='coolwarm', ax=ax)
133
- ax.set_title("Mortality Rate by Age Group")
134
  ax.set_xlabel("Age Group")
135
  ax.set_ylabel("Mortality Rate (%)")
136
  plt.tight_layout()
@@ -148,7 +211,6 @@ def create_violin_age_race_mortality(df):
148
  palette='Set2',
149
  ax=ax
150
  )
151
- ax.set_title("Age Distribution by Race and Mortality")
152
  ax.set_xlabel("Race")
153
  ax.set_ylabel("Age")
154
  ax.legend(title='Mortality', loc='upper right')
@@ -162,42 +224,15 @@ def create_heatmap_race_gender_mortality(df):
162
  columns='gender',
163
  values='hospital_expire_flag',
164
  aggfunc='mean'
165
- ) * 100 # Convert to percentage
166
-
167
  fig, ax = plt.subplots(figsize=(8, 6))
168
  sns.heatmap(pivot_table, annot=True, fmt=".1f", cmap='YlOrRd', ax=ax)
169
- ax.set_title("Mortality Rate by Race and Gender (%)")
170
  ax.set_xlabel("Gender")
171
  ax.set_ylabel("Race")
172
  plt.tight_layout()
173
  st.pyplot(fig)
174
 
175
- def create_parallel_coordinates(df):
176
- """Creates a parallel coordinates plot for Demographics and Outcomes."""
177
- # Select relevant numerical features
178
- parallel_df = df[['anchor_age', 'los', 'hospital_expire_flag']].copy()
179
-
180
- # Encode categorical variables numerically
181
- parallel_df['race_code'] = df['race'].astype('category').cat.codes
182
- parallel_df['gender_code'] = df['gender'].astype('category').cat.codes
183
-
184
- # Create the parallel coordinates plot
185
- fig = px.parallel_coordinates(
186
- parallel_df,
187
- color='hospital_expire_flag',
188
- labels={
189
- 'anchor_age': 'Age',
190
- 'los': 'Length of Stay',
191
- 'hospital_expire_flag': 'Mortality',
192
- 'race_code': 'Race',
193
- 'gender_code': 'Gender'
194
- },
195
- color_continuous_scale=px.colors.diverging.Tealrose,
196
- color_continuous_midpoint=0.5
197
- )
198
-
199
- fig.update_layout(title='Parallel Coordinates Plot of Demographics and Outcomes')
200
- st.plotly_chart(fig, use_container_width=True)
201
 
202
  def create_treemap_race_mortality(df):
203
  """Creates a treemap for Race and Mortality."""
@@ -209,70 +244,12 @@ def create_treemap_race_mortality(df):
209
  path=['race', 'Mortality'],
210
  values='counts',
211
  color='Mortality',
212
- color_discrete_map={'Survived':'#66b3ff','Died':'#ff6666'},
213
- title='Treemap of Race and Mortality'
214
  )
215
  fig.update_layout(margin = dict(t=30, l=0, r=0, b=0))
216
  st.plotly_chart(fig, use_container_width=True)
217
 
218
- def create_sankey_race_mortality(df):
219
- """Creates a Sankey diagram for Race to Mortality Outcomes."""
220
- sankey_df = df.groupby(['race', 'hospital_expire_flag']).size().reset_index(name='counts')
221
-
222
- # Map 'hospital_expire_flag' to 'Mortality' status
223
- sankey_df['Mortality'] = sankey_df['hospital_expire_flag'].map({0: 'Survived', 1: 'Died'})
224
-
225
- # Create source and target labels
226
- source = sankey_df['race'].tolist()
227
- target = sankey_df['Mortality'].tolist()
228
- values = sankey_df['counts'].tolist()
229
-
230
- # Create a list of unique labels ensuring no duplicates
231
- unique_races = sankey_df['race'].unique().tolist()
232
- unique_mortality = sankey_df['Mortality'].unique().tolist()
233
- labels = unique_races + unique_mortality
234
-
235
-
236
- # Create a mapping from label to index for efficient lookup
237
- label_to_index = {label: idx for idx, label in enumerate(labels)}
238
-
239
- # Map source and target labels to their corresponding indices
240
- source_indices = [label_to_index[s] for s in source]
241
- target_indices = [label_to_index[t] for t in target]
242
-
243
- # Optionally, define colors for different node types
244
- # For example, races could have one color and mortality outcomes another
245
- race_color = "#FFA07A" # Light Salmon
246
- mortality_color = "#20B2AA" # Light Sea Green
247
- node_colors = [race_color] * len(unique_races) + [mortality_color] * len(unique_mortality)
248
-
249
- # Create the Sankey diagram
250
- fig = go.Figure(data=[go.Sankey(
251
- node=dict(
252
- pad=15,
253
- thickness=20,
254
- line=dict(color="black", width=0.5),
255
- label=labels,
256
- color=node_colors
257
- ),
258
- link=dict(
259
- source=source_indices,
260
- target=target_indices,
261
- value=values
262
- )
263
- )])
264
-
265
- # Add title to the layout
266
- fig.update_layout(
267
- title_text="Sankey Diagram of Race and Mortality Outcomes",
268
- font_size=10
269
- )
270
-
271
- st.plotly_chart(fig, use_container_width=True)
272
-
273
- # ---------------------------
274
  # Streamlit Application
275
- # ---------------------------
276
 
277
  # Set Streamlit page configuration
278
  st.set_page_config(
@@ -281,11 +258,10 @@ st.set_page_config(
281
  initial_sidebar_state="expanded",
282
  )
283
 
284
- # Title and Description
285
  st.title("MIMIC-IV ICU Patient Data Dashboard")
286
- st.markdown("""
287
- Explore the general feature distribution and outcome metrics of ICU patients from the MIMIC-IV dataset. Utilize the sidebar filters to customize the data view and interact with various visualizations to uncover patterns and insights.
288
- """)
289
 
290
  # Sidebar Filters
291
  st.sidebar.header("Filter Data")
@@ -293,10 +269,10 @@ st.sidebar.header("Filter Data")
293
  @st.cache_data
294
  def load_data():
295
 
296
- admissions_df = pd.read_feather('data/admissions.feather')
297
- patients_df = pd.read_feather('data/patients.feather')
298
  # diagnoses_icd_df = pd.read_csv('data/diagnoses_icd.csv')
299
- pharmacy_df = pd.read_feather('data/pharmacy.feather')
300
  # prescriptions_df = pd.read_csv('data/prescriptions.csv')
301
  # d_hcpcs_df = pd.read_csv('data/d_hcpcs.csv')
302
  # poe_detail_df = pd.read_csv('data/poe_detail.csv')
@@ -337,16 +313,14 @@ def load_data():
337
  "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER":"NATIVES"}
338
 
339
  admissions_df['race'] = admissions_df['race'].map(race_map)
340
- # Merge admissions and patients data on 'subject_id'
341
  merged_df = pd.merge(admissions_df, patients_df, on='subject_id', how='left')
342
 
343
- # Handle missing values by dropping rows with critical missing data
344
  merged_df = merged_df.dropna(subset=['anchor_age', 'gender', 'race', 'hospital_expire_flag'])
345
 
346
- # Convert datetime columns
347
  merged_df['admittime'] = pd.to_datetime(merged_df['admittime'])
348
  merged_df['dischtime'] = pd.to_datetime(merged_df['dischtime'])
349
- merged_df['deathtime'] = pd.to_datetime(merged_df['deathtime'], errors='coerce')
350
 
351
  # Create derived features
352
  merged_df['los'] = (merged_df['dischtime'] - merged_df['admittime']).dt.days
@@ -419,6 +393,7 @@ filtered_df = add_sidebar_filters(merged_df)
419
  # Display Summary Statistics for Q1
420
  st.header("Summary Statistics")
421
 
 
422
  col1, col2, col3, col4 = st.columns(4)
423
 
424
  with col1:
@@ -446,54 +421,82 @@ st.markdown("---")
446
  tabs = st.tabs(["General Overview", "Potential Biases"])
447
 
448
  # Q1: General Overview
 
449
  with tabs[0]:
450
  st.subheader("General Feature Distribution and Outcome Metrics")
451
 
 
452
  num_cols = 2
453
 
454
- q1_plots = [
 
455
  {
456
- "title": "Age Distribution of ICU Patients",
457
- "plot": lambda: create_histogram(filtered_df)
458
  },
459
  {
460
- "title": "Gender Distribution of ICU Patients",
461
- "plot": lambda: create_gender_bar_chart(filtered_df)
462
  },
463
  {
464
- "title": "Admission Types by Race",
465
- "plot": lambda: create_stacked_bar_admission_race(filtered_df)
466
  },
467
  {
468
- "title": "Length of Stay by Race",
469
- "plot": lambda: create_los_by_race(filtered_df)
470
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  {
472
- "title": "Correlation Heatmap of Age and LOS",
473
- "plot": lambda: create_correlation_heatmap(filtered_df)
474
  },
475
  {
476
  "title": "Admissions Over Time",
477
  "plot": lambda: create_time_series_heatmap(filtered_df)
478
  }
479
  ]
480
-
481
- for i in range(0, len(q1_plots), num_cols):
 
482
  cols = st.columns(num_cols)
483
  for j in range(num_cols):
484
- if i + j < len(q1_plots):
485
  with cols[j]:
486
- st.subheader(q1_plots[i + j]["title"])
487
- q1_plots[i + j]["plot"]()
488
 
489
 
490
- # Q2: Potential Biases from patient side
491
  with tabs[1]:
492
  st.subheader("Analyzing Potential Biases Across Demographics")
493
 
 
494
  num_cols = 2
495
 
 
496
  q2_plots = [
 
 
 
 
 
 
 
 
 
497
  {
498
  "title": "Mortality Rate by Race",
499
  "plot": lambda: create_mortality_by_race(filtered_df)
@@ -517,13 +520,10 @@ with tabs[1]:
517
  {
518
  "title": "Treemap of Race and Mortality",
519
  "plot": lambda: create_treemap_race_mortality(filtered_df)
520
- },
521
- {
522
- "title": "Sankey Diagram: Race to Mortality Outcomes",
523
- "plot": lambda: create_sankey_race_mortality(filtered_df)
524
  }
525
  ]
526
 
 
527
  for i in range(0, len(q2_plots), num_cols):
528
  cols = st.columns(num_cols)
529
  for j in range(num_cols):
@@ -536,6 +536,6 @@ with tabs[1]:
536
  st.markdown("""
537
  ---
538
  **Data Source:** MIMIC-IV Dataset
539
- **Project:** Investigating Biases in ICU Patient Data
540
  **Developed with:** Streamlit, Python
541
  """)
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
 
5
  import plotly.express as px
6
  import plotly.graph_objects as go
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Plot Function Definitions
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ def create_gender_pie_chart(df):
12
+ """Creates a bar chart for Gender Distribution."""
13
+ gender_counts = df['gender'].value_counts().reset_index()
14
+ gender_counts.columns = ['Gender', 'Count']
15
+ fig_gender = px.pie(
16
+ gender_counts,
17
+ names='Gender',
18
+ values='Count',
19
+ hover_data=['Count'],
20
+ hole=0.3
21
+ )
22
+ st.plotly_chart(fig_gender, use_container_width=True)
23
+
24
+
25
+ def create_race_pie_chart(df):
26
+ race_counts = df['race'].value_counts().reset_index()
27
+ race_counts.columns = ['Race Type', 'Count']
28
+ fig_race = px.pie(
29
+ race_counts,
30
+ names='Race Type',
31
+ values='Count',
32
+ hover_data=['Count'],
33
+ hole=0.3
34
+ )
35
+ st.plotly_chart(fig_race, use_container_width=True)
36
+
37
+ def create_insurance_pie_chart(df):
38
+ insurance_counts = df['insurance'].value_counts().reset_index()
39
+ insurance_counts.columns = ['Insurance Type', 'Count']
40
+ fig_insurance = px.pie(
41
+ insurance_counts,
42
+ names='Insurance Type',
43
+ values='Count',
44
+ hover_data=['Count'],
45
+ hole=0.3
46
+ )
47
+ st.plotly_chart(fig_insurance, use_container_width=True)
48
+
49
+ def create_mortality_pie_chart(df):
50
+ #plt.figure(figsize=(6,3), facecolor='white')
51
+ total_admissions = df.shape[0]
52
+ labels = ['Survived', 'Died']
53
+ sizes = [total_admissions - df['hospital_expire_flag'].sum(),
54
+ df['hospital_expire_flag'].sum()]
55
+ colors = ['#66b3ff', '#ff6666']
56
+ explode = (0.1, 0)
57
+
58
+ plt.pie(sizes, explode=explode, labels=labels, colors=colors,
59
+ autopct='%1.1f%%', startangle=140, textprops={'fontsize': 14})
60
+ plt.axis('equal')
61
  plt.tight_layout()
62
  st.pyplot(plt.gcf())
63
+
64
+ def create_admission_type_bar_chart(df):
65
+ admission_counts = df['admission_type'].value_counts().reset_index()
66
+ admission_counts.columns = ['Admission Type', 'Count']
67
+ fig_admission = px.bar(
68
+ admission_counts,
69
+ y='Admission Type',
70
+ x='Count',
71
+ color='Admission Type',
72
+ labels={'Count': 'Number of Admissions', 'Admission Type': 'Admission Type'},
73
+ hover_data=['Count']
74
+ )
75
+ st.plotly_chart(fig_admission, use_container_width=True)
 
 
 
 
 
 
 
 
 
76
 
77
  def create_time_series_heatmap(df):
78
  """Creates an admissions over time heatmap."""
 
88
  y='admission_year',
89
  z='counts',
90
  histfunc='sum',
91
+ labels={'counts': 'Number of Admissions', 'admission_month': 'Admission Month', 'admission_year': 'Admission Year'},
92
+ color_continuous_scale='rdbu'
 
93
  )
 
94
  fig.update_xaxes(categoryorder='array', categoryarray=month_order)
95
  fig.update_layout(yaxis=dict(autorange='reversed'))
96
  fig.update_traces(colorbar=dict(title='Admissions'))
97
  st.plotly_chart(fig, use_container_width=True)
98
 
99
+
100
+
101
+
102
+
103
+
104
+ # def create_stacked_bar_admission_race(df):
105
+ # """Creates a stacked bar chart for Admission Types by Race."""
106
+ # admission_race = df.groupby(['race', 'admission_type']).size().unstack(fill_value=0)
107
+ # admission_race_percent = admission_race.div(admission_race.sum(axis=1), axis=0) * 100
108
+
109
+ # admission_race_percent.plot(kind='bar', stacked=True, figsize=(8, 6), colormap='tab20')
110
+ # plt.xlabel("Race")
111
+ # plt.ylabel("Percentage of Admission Types")
112
+ # plt.legend(title='Admission Type', bbox_to_anchor=(1.05, 1), loc='upper left')
113
+ # plt.tight_layout()
114
+ # st.pyplot(plt.gcf())
115
+
116
+ # def create_los_by_race(df):
117
+ # """Creates a box plot for Length of Stay by Race."""
118
+ # fig, ax = plt.subplots(figsize=(6, 4))
119
+ # sns.boxplot(data=df, x='race', y='los', palette='Pastel1', ax=ax)
120
+ # ax.set_xlabel("Race")
121
+ # ax.set_ylabel("Length of Stay (Days)")
122
+ # ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
123
+ # plt.tight_layout()
124
+ # st.pyplot(fig)
125
+
126
+ # def create_correlation_heatmap(df):
127
+ # """Creates a correlation heatmap for numerical features."""
128
+ # numerical_features = df[['anchor_age', 'los']]
129
+ # corr_matrix = numerical_features.corr()
130
+
131
+ # fig, ax = plt.subplots(figsize=(3.5, 3))
132
+ # sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", ax=ax)
133
+ # plt.tight_layout()
134
+ # st.pyplot(fig)
135
+
136
+
137
+ def create_age_distribution_by_gender(df):
138
+ plt.figure(figsize=(12, 8))
139
+ sns.histplot(data=df, x='anchor_age', bins=30,
140
+ kde=True, palette='bright', hue='gender')
141
+ plt.xlabel('Age', fontsize=16)
142
+ plt.ylabel('Number of Admissions', fontsize=16)
143
+ plt.xticks(fontsize=16)
144
+ plt.yticks(fontsize=16)
145
+ plt.tight_layout()
146
+ st.pyplot(plt.gcf())
147
+
148
+
149
+ def create_age_distribution_by_admission_type(df):
150
+ plt.figure(figsize=(12, 8))
151
+ sns.boxenplot(data=df, x='admission_type',
152
+ y='anchor_age', palette='Set3')
153
+ plt.xlabel('Admission Type', fontsize=16)
154
+ plt.ylabel('Age', fontsize=16)
155
+ plt.xticks(fontsize=16, rotation=45)
156
+ plt.yticks(fontsize=16)
157
+ plt.tight_layout()
158
+ st.pyplot(plt.gcf())
159
+
160
+
161
  def create_mortality_by_race(df):
162
  """Creates a bar chart for Mortality Rate by Race."""
163
  mortality_race = df.groupby('race')['hospital_expire_flag'].mean().reset_index()
 
165
 
166
  fig, ax = plt.subplots(figsize=(6, 4))
167
  sns.barplot(data=mortality_race, x='race', y='mortality_rate', palette='Set2', ax=ax)
 
168
  ax.set_xlabel("Race")
169
  ax.set_ylabel("Mortality Rate (%)")
170
  ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
 
178
 
179
  fig, ax = plt.subplots(figsize=(6, 4))
180
  sns.barplot(data=mortality_gender, x='gender', y='mortality_rate', palette='Set3', ax=ax)
 
181
  ax.set_xlabel("Gender")
182
  ax.set_ylabel("Mortality Rate (%)")
183
  plt.tight_layout()
 
185
 
186
  def create_mortality_by_age_group(df):
187
  """Creates a bar chart for Mortality Rate by Age Group."""
 
188
  bins = [0, 30, 50, 70, 90, 120]
189
  labels = ['0-30', '31-50', '51-70', '71-90', '91-120']
190
  df['age_group'] = pd.cut(df['anchor_age'], bins=bins, labels=labels, right=False)
 
194
 
195
  fig, ax = plt.subplots(figsize=(6, 4))
196
  sns.barplot(data=mortality_age, x='age_group', y='mortality_rate', palette='coolwarm', ax=ax)
 
197
  ax.set_xlabel("Age Group")
198
  ax.set_ylabel("Mortality Rate (%)")
199
  plt.tight_layout()
 
211
  palette='Set2',
212
  ax=ax
213
  )
 
214
  ax.set_xlabel("Race")
215
  ax.set_ylabel("Age")
216
  ax.legend(title='Mortality', loc='upper right')
 
224
  columns='gender',
225
  values='hospital_expire_flag',
226
  aggfunc='mean'
227
+ ) * 100
228
+
229
  fig, ax = plt.subplots(figsize=(8, 6))
230
  sns.heatmap(pivot_table, annot=True, fmt=".1f", cmap='YlOrRd', ax=ax)
 
231
  ax.set_xlabel("Gender")
232
  ax.set_ylabel("Race")
233
  plt.tight_layout()
234
  st.pyplot(fig)
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  def create_treemap_race_mortality(df):
238
  """Creates a treemap for Race and Mortality."""
 
244
  path=['race', 'Mortality'],
245
  values='counts',
246
  color='Mortality',
247
+ color_discrete_map={'Survived':'#66b3ff','Died':'#ff6666'}
 
248
  )
249
  fig.update_layout(margin = dict(t=30, l=0, r=0, b=0))
250
  st.plotly_chart(fig, use_container_width=True)
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  # Streamlit Application
 
253
 
254
  # Set Streamlit page configuration
255
  st.set_page_config(
 
258
  initial_sidebar_state="expanded",
259
  )
260
 
 
261
  st.title("MIMIC-IV ICU Patient Data Dashboard")
262
+ st.markdown('''
263
+ Explore the general feature distribution and demographics related bias in ICU patients from the MIMIC-IV dataset. Utilize the sidebar filters to customize the data view'''
264
+ )
265
 
266
  # Sidebar Filters
267
  st.sidebar.header("Filter Data")
 
269
  @st.cache_data
270
  def load_data():
271
 
272
+ admissions_df = pd.read_csv('data/admissions.csv')
273
+ patients_df = pd.read_csv('data/patients.csv')
274
  # diagnoses_icd_df = pd.read_csv('data/diagnoses_icd.csv')
275
+ # pharmacy_df = pd.read_csv('data/pharmacy.csv')
276
  # prescriptions_df = pd.read_csv('data/prescriptions.csv')
277
  # d_hcpcs_df = pd.read_csv('data/d_hcpcs.csv')
278
  # poe_detail_df = pd.read_csv('data/poe_detail.csv')
 
313
  "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER":"NATIVES"}
314
 
315
  admissions_df['race'] = admissions_df['race'].map(race_map)
316
+
317
  merged_df = pd.merge(admissions_df, patients_df, on='subject_id', how='left')
318
 
 
319
  merged_df = merged_df.dropna(subset=['anchor_age', 'gender', 'race', 'hospital_expire_flag'])
320
 
 
321
  merged_df['admittime'] = pd.to_datetime(merged_df['admittime'])
322
  merged_df['dischtime'] = pd.to_datetime(merged_df['dischtime'])
323
+ merged_df['deathtime'] = pd.to_datetime(merged_df['deathtime'], errors='coerce')
324
 
325
  # Create derived features
326
  merged_df['los'] = (merged_df['dischtime'] - merged_df['admittime']).dt.days
 
393
  # Display Summary Statistics for Q1
394
  st.header("Summary Statistics")
395
 
396
+ # Create four columns for metrics
397
  col1, col2, col3, col4 = st.columns(4)
398
 
399
  with col1:
 
421
  tabs = st.tabs(["General Overview", "Potential Biases"])
422
 
423
  # Q1: General Overview
424
+
425
  with tabs[0]:
426
  st.subheader("General Feature Distribution and Outcome Metrics")
427
 
428
+ # Define the number of columns per row
429
  num_cols = 2
430
 
431
+ # Define all Q1 plots in a list with titles and plot-generating functions
432
+ q1_plots_2_col = [
433
  {
434
+ "title": "Gender Distribution",
435
+ "plot": lambda: create_gender_pie_chart(filtered_df)
436
  },
437
  {
438
+ "title": "Race Distribution",
439
+ "plot": lambda: create_race_pie_chart(filtered_df)
440
  },
441
  {
442
+ "title": "Insurance Type Distribution",
443
+ "plot": lambda: create_insurance_pie_chart(filtered_df)
444
  },
445
  {
446
+ "title": "Mortality Rate of ICU Patients",
447
+ "plot": lambda: create_mortality_pie_chart(filtered_df)
448
+ }
449
+ ]
450
+ # Arrange Q1 plots in a grid layout
451
+ for i in range(0, len(q1_plots_2_col), num_cols):
452
+ cols = st.columns(num_cols)
453
+ for j in range(num_cols):
454
+ if i + j < len(q1_plots_2_col):
455
+ with cols[j]:
456
+ st.subheader(q1_plots_2_col[i + j]["title"])
457
+ q1_plots_2_col[i + j]["plot"]()
458
+
459
+ num_cols = 1
460
+
461
+ q1_plots_1_col = [
462
  {
463
+ "title": "Admission Type Count",
464
+ "plot": lambda: create_admission_type_bar_chart(filtered_df)
465
  },
466
  {
467
  "title": "Admissions Over Time",
468
  "plot": lambda: create_time_series_heatmap(filtered_df)
469
  }
470
  ]
471
+
472
+ # Arrange Q1 plots in a grid layout
473
+ for i in range(0, len(q1_plots_1_col), num_cols):
474
  cols = st.columns(num_cols)
475
  for j in range(num_cols):
476
+ if i + j < len(q1_plots_1_col):
477
  with cols[j]:
478
+ st.subheader(q1_plots_1_col[i + j]["title"])
479
+ q1_plots_1_col[i + j]["plot"]()
480
 
481
 
482
+ # Q2: Potential Biases
483
  with tabs[1]:
484
  st.subheader("Analyzing Potential Biases Across Demographics")
485
 
486
+ # Define the number of columns per row
487
  num_cols = 2
488
 
489
+ # Define all Q2 plots in a list with titles and plot-generating functions
490
  q2_plots = [
491
+
492
+ {
493
+ "title": "Age Distribution of ICU Patients",
494
+ "plot": lambda: create_age_distribution_by_gender(filtered_df)
495
+ },
496
+ {
497
+ "title": "Boxen Plot of Age Distribution by Admission Type",
498
+ "plot": lambda: create_age_distribution_by_admission_type(filtered_df)
499
+ },
500
  {
501
  "title": "Mortality Rate by Race",
502
  "plot": lambda: create_mortality_by_race(filtered_df)
 
520
  {
521
  "title": "Treemap of Race and Mortality",
522
  "plot": lambda: create_treemap_race_mortality(filtered_df)
 
 
 
 
523
  }
524
  ]
525
 
526
+ # Arrange Q2 plots in a grid layout
527
  for i in range(0, len(q2_plots), num_cols):
528
  cols = st.columns(num_cols)
529
  for j in range(num_cols):
 
536
  st.markdown("""
537
  ---
538
  **Data Source:** MIMIC-IV Dataset
539
+ **Project:** Fairness in ICU Patient Data
540
  **Developed with:** Streamlit, Python
541
  """)