allantacuelwvsu commited on
Commit
08bca51
·
1 Parent(s): ba7d08e

add datasets section

Browse files
Files changed (1) hide show
  1. app.py +44 -1
app.py CHANGED
@@ -30,7 +30,7 @@ y_pred = model.predict(X_val)
30
  st.title("Classification: Titanic Survival Prediction")
31
  st.caption("dataset: https://www.kaggle.com/code/mrisdal/exploring-survival-on-the-titanic/input -> test.csv || train.csv")
32
 
33
- tab1, tab2 = st.tabs(["Model Performance", "Survival Chance Predictor"])
34
 
35
  with tab1:
36
  # Model Assessment
@@ -70,6 +70,49 @@ with tab1:
70
  st.divider()
71
 
72
  with tab2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # Predictor
74
  st.header("Survival Chance Prediction")
75
 
 
30
  st.title("Classification: Titanic Survival Prediction")
31
  st.caption("dataset: https://www.kaggle.com/code/mrisdal/exploring-survival-on-the-titanic/input -> test.csv || train.csv")
32
 
33
+ tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Survival Chance Predictor"])
34
 
35
  with tab1:
36
  # Model Assessment
 
70
  st.divider()
71
 
72
  with tab2:
73
+ # Dataset
74
+ st.header("Dataset")
75
+
76
+ @st.cache_data()
77
+ def load():
78
+ return pd.read_csv("datasets/train.csv")
79
+ dataset = load()
80
+ dataset_processed = train_df
81
+
82
+ # Quick preprocess, just for display
83
+ def preprocess(data):
84
+ data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
85
+ data["FamilySize"] = data["SibSp"] + data["Parch"] + 1
86
+ return data
87
+
88
+ def corr(data, title):
89
+ data = data.select_dtypes(include=["number"])
90
+ fig, ax = plt.subplots(figsize=(8, 6))
91
+ sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax)
92
+ ax.set_title(title)
93
+ st.pyplot(fig)
94
+ corr(preprocess(dataset), "Correlation Matrix")
95
+ st.caption("'tis all the correlatable data, minus the other irrelevant, non-correlatable data. [Embarked] feature is missing cos' it is string data, and was one-hotted.")
96
+
97
+ # Toggle order
98
+ view_type = st.radio("Order:", ["Top -> Bottom", "Bottom -> Top"])
99
+
100
+ # Display head() or tail()
101
+ if view_type == "Top -> Bottom":
102
+ st.caption("datasets/train.csv")
103
+ st.dataframe(dataset.head(len(dataset)))
104
+ st.caption("df")
105
+ st.dataframe(dataset_processed.head(len(dataset_processed)))
106
+
107
+ elif view_type == "Bottom -> Top":
108
+ st.caption("datasets/train.csv")
109
+ st.dataframe(dataset.tail(len(dataset)).iloc[::-1])
110
+ st.caption("df")
111
+ st.dataframe(dataset_processed.tail(len(dataset_processed)).iloc[::-1])
112
+ st.caption("There's a test.csv file in my datasets/ folder, but it's just an artifact from my Kaggle download. Can't be bothered to organize.")
113
+ st.divider()
114
+
115
+ with tab3:
116
  # Predictor
117
  st.header("Survival Chance Prediction")
118