Roberta2024 commited on
Commit
bbb0a94
·
verified ·
1 Parent(s): 7cb0623

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -4
app.py CHANGED
@@ -8,7 +8,7 @@ from sklearn.tree import DecisionTreeClassifier
8
  from sklearn.model_selection import train_test_split
9
  import numpy as np
10
 
11
- # Function to process data and return feature importances
12
  def calculate_importances(file):
13
  # Read uploaded file
14
  heart_df = pd.read_csv(file)
@@ -50,11 +50,14 @@ def calculate_importances(file):
50
  # Merge DataFrames
51
  importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')
52
 
 
 
 
53
  # Save to Excel
54
  file_name = 'feature_importances.xlsx'
55
  importance_df.to_excel(file_name, index=False)
56
 
57
- return file_name, importance_df.head()
58
 
59
  # Streamlit interface
60
  st.title("Feature Importance Calculation")
@@ -64,11 +67,11 @@ uploaded_file = st.file_uploader("Upload heart.csv file", type=['csv'])
64
 
65
  if uploaded_file is not None:
66
  # Process the file and get results
67
- excel_file, preview_df = calculate_importances(uploaded_file)
68
 
69
  # Display a preview of the DataFrame
70
  st.write("Feature Importances (Preview):")
71
- st.dataframe(preview_df)
72
 
73
  # Provide a link to download the Excel file
74
  with open(excel_file, "rb") as file:
@@ -78,3 +81,16 @@ if uploaded_file is not None:
78
  file_name=excel_file,
79
  mime="application/vnd.ms-excel"
80
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from sklearn.model_selection import train_test_split
9
  import numpy as np
10
 
11
+ # Function to process data and return feature importances and correlation matrix
12
  def calculate_importances(file):
13
  # Read uploaded file
14
  heart_df = pd.read_csv(file)
 
50
  # Merge DataFrames
51
  importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')
52
 
53
+ # Correlation Matrix
54
+ corr_matrix = heart_df.corr()
55
+
56
  # Save to Excel
57
  file_name = 'feature_importances.xlsx'
58
  importance_df.to_excel(file_name, index=False)
59
 
60
+ return file_name, importance_df, corr_matrix, rf_importances, feature_names
61
 
62
  # Streamlit interface
63
  st.title("Feature Importance Calculation")
 
67
 
68
  if uploaded_file is not None:
69
  # Process the file and get results
70
+ excel_file, importance_df, corr_matrix, rf_importances, feature_names = calculate_importances(uploaded_file)
71
 
72
  # Display a preview of the DataFrame
73
  st.write("Feature Importances (Preview):")
74
+ st.dataframe(importance_df.head())
75
 
76
  # Provide a link to download the Excel file
77
  with open(excel_file, "rb") as file:
 
81
  file_name=excel_file,
82
  mime="application/vnd.ms-excel"
83
  )
84
+
85
+ # Plot and display the Correlation Matrix
86
+ st.write("Correlation Matrix:")
87
+ plt.figure(figsize=(10, 8))
88
+ sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", cbar=True)
89
+ st.pyplot(plt)
90
+
91
+ # Plot and display the Feature Importance (Random Forest)
92
+ st.write("Random Forest Feature Importance:")
93
+ fig, ax = plt.subplots()
94
+ sns.barplot(x=rf_importances, y=feature_names, ax=ax)
95
+ ax.set_title('Random Forest Feature Importances')
96
+ st.pyplot(fig)