allantacuelwvsu commited on
Commit
cc17b16
·
1 Parent(s): ff85e80

add dataset section

Browse files
Files changed (1) hide show
  1. app.py +44 -2
app.py CHANGED
@@ -31,7 +31,7 @@ y_pred = model.predict(X_val)
31
  # App
32
  st.title("Regression: Delhi Housing Price Prediction")
33
  st.caption("dataset: https://www.kaggle.com/code/ruchi798/housing-prices-eda-and-prediction/input -> Housing Prices in Metropolitan Areas of India/Delhi.csv")
34
- tab1, tab2 = st.tabs(["Model Performance", "Price Predictor"])
35
 
36
  with tab1:
37
  # Model Assessment
@@ -68,6 +68,48 @@ with tab1:
68
  st.caption("Well, I tried to make a practical predictor based on amenities but it turns out that location is the most important feature, amenities are irrelevant.")
69
 
70
  with tab2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # User Input
72
  st.header("Price Prediction")
73
  col1, col2 = st.columns(2)
@@ -139,5 +181,5 @@ with tab2:
139
  st.write(f"₹{predicted_price:,.2f}")
140
  st.markdown(f'<h3 style="color:{color};">{category}</h3>', unsafe_allow_html=True)
141
  st.write(description)
142
- st.caption("Dataset is small so expect anomalous output (negative prices).")
143
  st.divider()
 
31
  # App
32
  st.title("Regression: Delhi Housing Price Prediction")
33
  st.caption("dataset: https://www.kaggle.com/code/ruchi798/housing-prices-eda-and-prediction/input -> Housing Prices in Metropolitan Areas of India/Delhi.csv")
34
+ tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Price Predictor"])
35
 
36
  with tab1:
37
  # Model Assessment
 
68
  st.caption("Well, I tried to make a practical predictor based on amenities but it turns out that location is the most important feature, amenities are irrelevant.")
69
 
70
  with tab2:
71
+ # Dataset
72
+ st.header("Dataset")
73
+
74
+ @st.cache_data()
75
+ def load():
76
+ return pd.read_csv("datasets/Delhi.csv")
77
+ dataset = load()
78
+ dataset_processed = df
79
+
80
+ # Quick preprocess, just for display
81
+ def preprocess(data):
82
+ data = data.drop(columns=["Location"], errors="ignore")
83
+ return data
84
+
85
+ def corr(data, title):
86
+ data = data.select_dtypes(include=["number"])
87
+ fig, ax = plt.subplots(figsize=(8, 6))
88
+ sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax, annot_kws={"size": 3}, cbar_kws={"shrink": .8}) # Reduce size, lots of features.
89
+ ax.set_title(title)
90
+ st.pyplot(fig)
91
+ corr(preprocess(dataset), "Correlation Matrix")
92
+ st.caption("'tis all the correlatable data, minus the other irrelevant, non-correlatable data. [Location] feature is missing cos' it is string data, and was one-hotted.")
93
+
94
+ # Toggle order
95
+ view_type = st.radio("Order:", ["Top -> Bottom", "Bottom -> Top"])
96
+
97
+ # Display head() or tail()
98
+ if view_type == "Top -> Bottom":
99
+ st.caption("datasets/Delhi.csv")
100
+ st.dataframe(dataset.head(len(dataset)))
101
+ st.caption("df")
102
+ st.dataframe(dataset_processed.head(len(dataset_processed)))
103
+ st.caption("Lots of unique values = omega long onehot encoded feature list.")
104
+
105
+ elif view_type == "Bottom -> Top":
106
+ st.caption("datasets/Delhi.csv")
107
+ st.dataframe(dataset.tail(len(dataset)).iloc[::-1])
108
+ st.caption("df")
109
+ st.dataframe(dataset_processed.tail(len(dataset_processed)).iloc[::-1])
110
+ st.caption("Lots of unique values = omega long onehot encoded feature list.")
111
+ st.divider()
112
+ with tab3:
113
  # User Input
114
  st.header("Price Prediction")
115
  col1, col2 = st.columns(2)
 
181
  st.write(f"₹{predicted_price:,.2f}")
182
  st.markdown(f'<h3 style="color:{color};">{category}</h3>', unsafe_allow_html=True)
183
  st.write(description)
184
+ st.caption("Dataset is weird so expect anomalous output (negative prices, omega-high prices, etc.).")
185
  st.divider()