Upload 4 files
Browse files- Data/kc_house_data.csv +0 -0
- app.py +203 -0
- house_data.csv +0 -0
- requirements.txt +6 -0
Data/kc_house_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.preprocessing import StandardScaler
|
5 |
+
from sklearn.ensemble import RandomForestRegressor
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import seaborn as sns
|
8 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
+
|
11 |
+
|
12 |
+
st.write("""
|
13 |
+
# Simple House price Prediction App
|
14 |
+
|
15 |
+
This app **Predicts the price of House** according to features!
|
16 |
+
|
17 |
+
It is using **Random Forest Regression** algorithm.
|
18 |
+
""")
|
19 |
+
|
20 |
+
st.sidebar.header('User Input Parameters')
|
21 |
+
|
22 |
+
def user_input_features():
|
23 |
+
bedrooms = st.sidebar.slider('Bedrooms',1,15,2,1,key=0)
|
24 |
+
|
25 |
+
bathrooms = st.sidebar.slider('Bathrooms',1,12,2,1,key=1)
|
26 |
+
|
27 |
+
sqft_living = st.sidebar.slider('Area of Living (Sqft)',300,10000,560,50,key=2)
|
28 |
+
|
29 |
+
sqft_land = st.sidebar.slider('Area of Land (Sqft)',400,20000,700,100,key=3)
|
30 |
+
|
31 |
+
floors = st.sidebar.slider('Floors',1,6,1,1,key=4)
|
32 |
+
|
33 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>Waterfront</p>", unsafe_allow_html=True)
|
34 |
+
waterfront_input = st.sidebar.radio("Select Yes or No", ["Yes", "No"], index=1)
|
35 |
+
waterfront = 1 if waterfront_input == 'Yes' else 0
|
36 |
+
|
37 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>View</p>", unsafe_allow_html=True)
|
38 |
+
view_input = st.sidebar.radio("Select Yes or No", ["Yes", "No"], index=1, key='Yes')
|
39 |
+
view = 1 if view_input == 'Yes' else 0
|
40 |
+
|
41 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>Condition</p>", unsafe_allow_html=True)
|
42 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>1: Poor</p>", unsafe_allow_html=True)
|
43 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>2: Fair</p>", unsafe_allow_html=True)
|
44 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>3: Average</p>", unsafe_allow_html=True)
|
45 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>4: Good</p>", unsafe_allow_html=True)
|
46 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>5: Excellent</p>", unsafe_allow_html=True)
|
47 |
+
condition = st.sidebar.slider('Select from above',1,5,3,1,key=5)
|
48 |
+
|
49 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>Grades</p>", unsafe_allow_html=True)
|
50 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>(1-3): Poor Construction</p>", unsafe_allow_html=True)
|
51 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>(4-7): Standard Construction</p>", unsafe_allow_html=True)
|
52 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>(8-9): Good to Very Good Quality</p>", unsafe_allow_html=True)
|
53 |
+
st.sidebar.write("<p style='margin-bottom: 2px;'>(10-12): High Quality with Luxury Features</p>", unsafe_allow_html=True)
|
54 |
+
grade = st.sidebar.slider('Select from above',1,12,6,1,key=6)
|
55 |
+
|
56 |
+
sqft_above = st.sidebar.slider('Sqft Above', 300, 10000, 560, 50,key=7)
|
57 |
+
|
58 |
+
sqft_basement = st.sidebar.slider('Sqft Basement', 0, 5000, 0, 50,key=8)
|
59 |
+
|
60 |
+
yr_built = st.sidebar.slider('Year Built', 1900, 2022, 1970, 1,key=9)
|
61 |
+
|
62 |
+
yr_renovated = st.sidebar.slider('Year Renovated', 1900, 2022, 1970, 1, key=10)
|
63 |
+
|
64 |
+
sqft_living15 = st.sidebar.slider("Average interior square footage of the 15 nearest neighbors' living spaces", 300, 10000, 560, 50, key=11)
|
65 |
+
|
66 |
+
sqft_lot15 = st.sidebar.slider('The average square footage of the land lots of the 15 nearest neighbors', 400, 20000, 700, 100, key=12)
|
67 |
+
|
68 |
+
zipcode_options = ['98002', '98003', '98004', '98005', '98006', '98007', '98008', '98010', '98011', '98014',
|
69 |
+
'98019', '98022', '98023', '98024', '98027', '98028', '98029', '98030', '98031', '98032',
|
70 |
+
'98033', '98034', '98038', '98039', '98040', '98042', '98045', '98052', '98053', '98055',
|
71 |
+
'98056', '98058', '98059', '98065', '98070', '98072', '98074', '98075', '98077', '98092',
|
72 |
+
'98102', '98103', '98105', '98106', '98107', '98108', '98109', '98112', '98115', '98116',
|
73 |
+
'98117', '98118', '98119', '98122', '98125', '98126', '98133', '98136', '98144', '98146',
|
74 |
+
'98148', '98155', '98166', '98168', '98177', '98178', '98188', '98198', '98199']
|
75 |
+
|
76 |
+
selected_zipcode = st.sidebar.selectbox('Zipcode', zipcode_options)
|
77 |
+
|
78 |
+
data = {
|
79 |
+
'bedrooms': bedrooms,
|
80 |
+
'bathrooms': bathrooms,
|
81 |
+
'sqft_living': sqft_living,
|
82 |
+
'sqft_lot': sqft_land,
|
83 |
+
'floors': floors,
|
84 |
+
'waterfront': waterfront,
|
85 |
+
'view': view,
|
86 |
+
'condition': condition,
|
87 |
+
'grade': grade,
|
88 |
+
'sqft_above': sqft_above,
|
89 |
+
'sqft_basement': sqft_basement,
|
90 |
+
'yr_built': yr_built,
|
91 |
+
'yr_renovated': yr_renovated,
|
92 |
+
'sqft_living15': sqft_living15,
|
93 |
+
'sqft_lot15': sqft_lot15,
|
94 |
+
f'zipcode_{selected_zipcode}': True, # Set the selected zipcode to True
|
95 |
+
}
|
96 |
+
# Set all zipcodes to False
|
97 |
+
for zipcode_option in zipcode_options:
|
98 |
+
data[f'zipcode_{zipcode_option}'] = False
|
99 |
+
|
100 |
+
# Set the selected zipcode to True
|
101 |
+
data[f'zipcode_{selected_zipcode}'] = True
|
102 |
+
|
103 |
+
features = pd.DataFrame(data, index=[0])
|
104 |
+
return features
|
105 |
+
|
106 |
+
user_features = user_input_features()
|
107 |
+
|
108 |
+
st.subheader('User Input Parameters')
|
109 |
+
st.write(user_features)
|
110 |
+
|
111 |
+
#loading dataset
|
112 |
+
|
113 |
+
@st.cache_data
|
114 |
+
def load_dataset():
|
115 |
+
return pd.read_csv('house_data.csv')
|
116 |
+
|
117 |
+
df = load_dataset()
|
118 |
+
y=df['price']
|
119 |
+
X=df.drop("price",axis=1)
|
120 |
+
|
121 |
+
#splitting dataset
|
122 |
+
@st.cache_data
|
123 |
+
def split_dataset():
|
124 |
+
return train_test_split(X, y, test_size=0.2, random_state=0)
|
125 |
+
|
126 |
+
X_train, X_test, y_train, y_test = split_dataset()
|
127 |
+
|
128 |
+
|
129 |
+
#model Training
|
130 |
+
@st.cache_data
|
131 |
+
def train_model():
|
132 |
+
rfregressor = RandomForestRegressor(n_estimators=500, random_state=0, n_jobs=4,
|
133 |
+
max_depth=30, max_features='sqrt', min_samples_leaf=1, min_samples_split=10)
|
134 |
+
rfregressor.fit(X_train, y_train)
|
135 |
+
return rfregressor
|
136 |
+
|
137 |
+
model = train_model()
|
138 |
+
|
139 |
+
#Prediction
|
140 |
+
|
141 |
+
@st.cache_data
|
142 |
+
def make_prediction(features):
|
143 |
+
# Load the model inside the function to avoid UnhashableParamError
|
144 |
+
model = train_model()
|
145 |
+
|
146 |
+
# Extract feature names from the training data
|
147 |
+
feature_names = X.columns.tolist()
|
148 |
+
|
149 |
+
# Ensure the input features have the same order as the training data
|
150 |
+
features = features[feature_names]
|
151 |
+
|
152 |
+
return model.predict(features)
|
153 |
+
|
154 |
+
y_pred = make_prediction(X_test)
|
155 |
+
|
156 |
+
prediction = make_prediction(user_features)
|
157 |
+
|
158 |
+
st.write('## Prediction')
|
159 |
+
st.write(f'Predicted House Price: **${int(prediction[0]):,}**')
|
160 |
+
|
161 |
+
st.write("## Data and Result Analysis")
|
162 |
+
st.subheader('Dataframe Sample')
|
163 |
+
st.write(df.head(10))
|
164 |
+
|
165 |
+
#Model Analysis
|
166 |
+
|
167 |
+
def analyze_model(X_test, y_test, model):
|
168 |
+
model_params = (model.get_params(),)
|
169 |
+
y_pred = model.predict(X_test)
|
170 |
+
mse = mean_squared_error(y_test, y_pred)
|
171 |
+
rmse = np.sqrt(mse)
|
172 |
+
mae = mean_absolute_error(y_test, y_pred)
|
173 |
+
r2 = r2_score(y_test, y_pred)
|
174 |
+
return mse, rmse, mae, r2
|
175 |
+
|
176 |
+
mse, rmse, mae, r2 = analyze_model(X_test, y_test, model)
|
177 |
+
|
178 |
+
# Display the mathematical result analysis
|
179 |
+
st.subheader('Mathematical Result Analysis')
|
180 |
+
st.write(f'R-squared (R2): **{r2:.2f}**')
|
181 |
+
st.write(f'Mean Squared Error (MSE): **{mse:.2f}**')
|
182 |
+
st.write(f'Root Mean Squared Error (RMSE): **{rmse:.2f}**')
|
183 |
+
st.write(f'Mean Absolute Error (MAE): **{mae:.2f}**')
|
184 |
+
|
185 |
+
|
186 |
+
# Add space to push the footer to the bottom
|
187 |
+
st.markdown("<br><br>", unsafe_allow_html=True)
|
188 |
+
|
189 |
+
st.markdown('<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.2/css/all.min.css">', unsafe_allow_html=True)
|
190 |
+
|
191 |
+
footer_html = """
|
192 |
+
---
|
193 |
+
### Contact Me
|
194 |
+
For inquiries or support, please contact me at [[email protected]](mailto:[email protected]).
|
195 |
+
|
196 |
+
[<i class="fab fa-github" style="color: white; font-size: 25px;"></i>](https://github.com/vigneshmaradiya)
|
197 |
+
|
198 |
+
[<i class="fab fa-linkedin" style="color: white; font-size: 25px;"></i>](https://www.linkedin.com/in/vignesh-maradiya/)
|
199 |
+
|
200 |
+
© 2024 Vignesh Maradiya. All rights reserved. Developed by Vignesh Maradiya.
|
201 |
+
"""
|
202 |
+
|
203 |
+
st.markdown(footer_html, unsafe_allow_html=True)
|
house_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib==3.8.2
|
2 |
+
numpy==1.26.3
|
3 |
+
pandas==2.1.4
|
4 |
+
scikit_learn==1.3.2
|
5 |
+
seaborn==0.13.1
|
6 |
+
streamlit==1.29.0
|