Spaces:

Roberta2024
/

TESTTT

Sleeping

App Files Files Community

Roberta2024 commited on Sep 13, 2024

Commit

bbb0a94

verified ·

1 Parent(s): 7cb0623

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -4

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import train_test_split
 import numpy as np
-# Function to process data and return feature importances
 def calculate_importances(file):
     # Read uploaded file
     heart_df = pd.read_csv(file)
@@ -50,11 +50,14 @@ def calculate_importances(file):
     # Merge DataFrames
     importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')
     # Save to Excel
     file_name = 'feature_importances.xlsx'
     importance_df.to_excel(file_name, index=False)
-    return file_name, importance_df.head()
 # Streamlit interface
 st.title("Feature Importance Calculation")
@@ -64,11 +67,11 @@ uploaded_file = st.file_uploader("Upload heart.csv file", type=['csv'])
 if uploaded_file is not None:
     # Process the file and get results
-    excel_file, preview_df = calculate_importances(uploaded_file)
     # Display a preview of the DataFrame
     st.write("Feature Importances (Preview):")
-    st.dataframe(preview_df)
     # Provide a link to download the Excel file
     with open(excel_file, "rb") as file:
@@ -78,3 +81,16 @@ if uploaded_file is not None:
             file_name=excel_file,
             mime="application/vnd.ms-excel"
         )

 from sklearn.model_selection import train_test_split
 import numpy as np
+# Function to process data and return feature importances and correlation matrix
 def calculate_importances(file):
     # Read uploaded file
     heart_df = pd.read_csv(file)
     # Merge DataFrames
     importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')
+    # Correlation Matrix
+    corr_matrix = heart_df.corr()
     # Save to Excel
     file_name = 'feature_importances.xlsx'
     importance_df.to_excel(file_name, index=False)
+    return file_name, importance_df, corr_matrix, rf_importances, feature_names
 # Streamlit interface
 st.title("Feature Importance Calculation")
 if uploaded_file is not None:
     # Process the file and get results
+    excel_file, importance_df, corr_matrix, rf_importances, feature_names = calculate_importances(uploaded_file)
     # Display a preview of the DataFrame
     st.write("Feature Importances (Preview):")
+    st.dataframe(importance_df.head())
     # Provide a link to download the Excel file
     with open(excel_file, "rb") as file:
             file_name=excel_file,
             mime="application/vnd.ms-excel"
         )
+    # Plot and display the Correlation Matrix
+    st.write("Correlation Matrix:")
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", cbar=True)
+    st.pyplot(plt)
+    # Plot and display the Feature Importance (Random Forest)
+    st.write("Random Forest Feature Importance:")
+    fig, ax = plt.subplots()
+    sns.barplot(x=rf_importances, y=feature_names, ax=ax)
+    ax.set_title('Random Forest Feature Importances')
+    st.pyplot(fig)