Spaces:

allantacuelwvsu
/

delhi_housing_price

Sleeping

App Files Files Community

allantacuelwvsu commited on 29 days ago

Commit

cc17b16

1 Parent(s): ff85e80

add dataset section

Browse files

Files changed (1) hide show

app.py +44 -2

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ y_pred = model.predict(X_val)
 # App
 st.title("Regression: Delhi Housing Price Prediction")
 st.caption("dataset: https://www.kaggle.com/code/ruchi798/housing-prices-eda-and-prediction/input -> Housing Prices in Metropolitan Areas of India/Delhi.csv")
-tab1, tab2 = st.tabs(["Model Performance", "Price Predictor"])
 with tab1:
     # Model Assessment
@@ -68,6 +68,48 @@ with tab1:
     st.caption("Well, I tried to make a practical predictor based on amenities but it turns out that location is the most important feature, amenities are irrelevant.")
 with tab2:
     # User Input
     st.header("Price Prediction")
     col1, col2 = st.columns(2)
@@ -139,5 +181,5 @@ with tab2:
     st.write(f"₹{predicted_price:,.2f}")
     st.markdown(f'<h3 style="color:{color};">{category}</h3>', unsafe_allow_html=True)
     st.write(description)
-    st.caption("Dataset is small so expect anomalous output (negative prices).")
     st.divider()

 # App
 st.title("Regression: Delhi Housing Price Prediction")
 st.caption("dataset: https://www.kaggle.com/code/ruchi798/housing-prices-eda-and-prediction/input -> Housing Prices in Metropolitan Areas of India/Delhi.csv")
+tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Price Predictor"])
 with tab1:
     # Model Assessment
     st.caption("Well, I tried to make a practical predictor based on amenities but it turns out that location is the most important feature, amenities are irrelevant.")
 with tab2:
+    # Dataset
+    st.header("Dataset")
+    @st.cache_data()
+    def load():
+        return pd.read_csv("datasets/Delhi.csv")
+    dataset = load()
+    dataset_processed = df
+    # Quick preprocess, just for display
+    def preprocess(data):
+        data = data.drop(columns=["Location"], errors="ignore")
+        return data
+    def corr(data, title):
+        data = data.select_dtypes(include=["number"])
+        fig, ax = plt.subplots(figsize=(8, 6))
+        sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax, annot_kws={"size": 3}, cbar_kws={"shrink": .8}) # Reduce size, lots of features.
+        ax.set_title(title)
+        st.pyplot(fig)
+    corr(preprocess(dataset), "Correlation Matrix")
+    st.caption("'tis all the correlatable data, minus the other irrelevant, non-correlatable data. [Location] feature is missing cos' it is string data, and was one-hotted.")
+    # Toggle order
+    view_type = st.radio("Order:", ["Top -> Bottom", "Bottom -> Top"])
+    # Display head() or tail()
+    if view_type == "Top -> Bottom":
+        st.caption("datasets/Delhi.csv")
+        st.dataframe(dataset.head(len(dataset)))
+        st.caption("df")
+        st.dataframe(dataset_processed.head(len(dataset_processed)))
+        st.caption("Lots of unique values = omega long onehot encoded feature list.")
+    elif view_type == "Bottom -> Top":
+        st.caption("datasets/Delhi.csv")
+        st.dataframe(dataset.tail(len(dataset)).iloc[::-1])
+        st.caption("df")
+        st.dataframe(dataset_processed.tail(len(dataset_processed)).iloc[::-1])
+        st.caption("Lots of unique values = omega long onehot encoded feature list.")
+    st.divider()
+with tab3:
     # User Input
     st.header("Price Prediction")
     col1, col2 = st.columns(2)
     st.write(f"₹{predicted_price:,.2f}")
     st.markdown(f'<h3 style="color:{color};">{category}</h3>', unsafe_allow_html=True)
     st.write(description)
+    st.caption("Dataset is weird so expect anomalous output (negative prices, omega-high prices, etc.).")
     st.divider()