Spaces:

allantacuelwvsu
/

titanic_survival

Sleeping

App Files Files Community

allantacuelwvsu commited on 22 days ago

Commit

08bca51

1 Parent(s): ba7d08e

add datasets section

Browse files

Files changed (1) hide show

app.py +44 -1

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ y_pred = model.predict(X_val)
 st.title("Classification: Titanic Survival Prediction")
 st.caption("dataset: https://www.kaggle.com/code/mrisdal/exploring-survival-on-the-titanic/input -> test.csv || train.csv")
-tab1, tab2 = st.tabs(["Model Performance", "Survival Chance  Predictor"])
 with tab1:
     # Model Assessment
@@ -70,6 +70,49 @@ with tab1:
     st.divider()
 with tab2:
     # Predictor
     st.header("Survival Chance Prediction")

 st.title("Classification: Titanic Survival Prediction")
 st.caption("dataset: https://www.kaggle.com/code/mrisdal/exploring-survival-on-the-titanic/input -> test.csv || train.csv")
+tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Survival Chance  Predictor"])
 with tab1:
     # Model Assessment
     st.divider()
 with tab2:
+    # Dataset
+    st.header("Dataset")
+    @st.cache_data()
+    def load():
+        return pd.read_csv("datasets/train.csv")
+    dataset = load()
+    dataset_processed = train_df
+    # Quick preprocess, just for display
+    def preprocess(data):
+        data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
+        data["FamilySize"] = data["SibSp"] + data["Parch"] + 1
+        return data
+    def corr(data, title):
+        data = data.select_dtypes(include=["number"])
+        fig, ax = plt.subplots(figsize=(8, 6))
+        sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax)
+        ax.set_title(title)
+        st.pyplot(fig)
+    corr(preprocess(dataset), "Correlation Matrix")
+    st.caption("'tis all the correlatable data, minus the other irrelevant, non-correlatable data. [Embarked] feature is missing cos' it is string data, and was one-hotted.")
+    # Toggle order
+    view_type = st.radio("Order:", ["Top -> Bottom", "Bottom -> Top"])
+    # Display head() or tail()
+    if view_type == "Top -> Bottom":
+        st.caption("datasets/train.csv")
+        st.dataframe(dataset.head(len(dataset)))
+        st.caption("df")
+        st.dataframe(dataset_processed.head(len(dataset_processed)))
+    elif view_type == "Bottom -> Top":
+        st.caption("datasets/train.csv")
+        st.dataframe(dataset.tail(len(dataset)).iloc[::-1])
+        st.caption("df")
+        st.dataframe(dataset_processed.tail(len(dataset_processed)).iloc[::-1])
+    st.caption("There's a test.csv file in my datasets/ folder, but it's just an artifact from my Kaggle download. Can't be bothered to organize.")
+    st.divider()
+with tab3:
     # Predictor
     st.header("Survival Chance Prediction")