|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from mpl_toolkits.mplot3d import Axes3D |
|
from sklearn.model_selection import train_test_split |
|
import gradio as gr |
|
import plotly.graph_objects as go |
|
|
|
class LinearRegression: |
|
def __init__(self, eta = 0.01, n_iter = 1000): |
|
self.eta = eta |
|
self.n_iter = n_iter |
|
self.w = None |
|
self.b = None |
|
self.Lambda = 0.7 |
|
self.Min = None |
|
self.Max = None |
|
self.cost_history = None |
|
def ScaleData(self, X): |
|
return (X - self.Min) / (self.Max - self.Min); |
|
def compute_cost(self, X, y, w, b): |
|
m = len(y); |
|
J = np.sum(((np.dot(X,w) + b) - y) ** 2) / (2 * m); |
|
return J; |
|
def SubGrad(X, y, w, b): |
|
new_W = np.zeros((3,1)); |
|
m = X.shape[0]; |
|
A = X; |
|
dW = (((np.dot(X,w) + b) - y) / m).reshape(m,1); |
|
A = A*dW; |
|
new_W = np.sum(A, axis = 0).reshape((X.shape[1],1)); |
|
return new_W; |
|
def Grad_de(self,X, y, X_test, y_test, w, eta, b, lamda, decay_rate = 0.045): |
|
cost_history = []; |
|
new_W = w; |
|
Fake_W = np.zeros((X.shape[1],1)); |
|
S_corrected_w = np.zeros((X.shape[1],1)); |
|
new_B = 0; |
|
Fake_B = 0; |
|
S_corrected_b = 0; |
|
W_prev = Fake_W; |
|
m = len(X); |
|
iter = 1; |
|
count = 0; |
|
new_eta = eta; |
|
m = X.shape[0]; |
|
for iter in range(0,1000): |
|
new_W = self.SubGrad(X,y,w, lamda, b); |
|
new_B = 0; |
|
count = count + 1; |
|
new_B = np.sum(((np.dot(X,w) + b) - y) / m); |
|
Fake_W = 0.9 * Fake_W + 0.1 * new_W; |
|
Fake_B = 0.9 * Fake_B + 0.1 * new_B; |
|
S_corrected_w = 0.99 * S_corrected_w + 0.01 * (new_W ** 2); |
|
S_corrected_b = 0.99 * S_corrected_b + 0.01 * (new_B ** 2); |
|
w = w - (new_eta / (np.sqrt(S_corrected_w) + 1e-8)) * Fake_W; |
|
b = b - (new_eta / (np.sqrt(S_corrected_b) + 1e-8)) * Fake_B; |
|
cost_history.append(self.compute_cost(X_test,y_test,w,b)); |
|
new_eta = eta / (1 + np.floor(count/50) * decay_rate); |
|
|
|
return (w,b, cost_history); |
|
def standardize(self,X): |
|
self.Max = np.zeros(X.shape[1]); |
|
self.Min = np.zeros(X.shape[1]); |
|
for i in range(0,len(X)): |
|
self.Min[i] = 10000000; |
|
self.Max[i] = -10000000; |
|
for i in range(0,len(X)): |
|
for j in range(0,X.shape[1]): |
|
if self.Max[j] < X[i][j]: |
|
self.Max[j] = (X[i][j]); |
|
if self.Min[j] > X[i][j]: |
|
self.Min[j] = (X[i][j]); |
|
return (X - self.Min) / (self.Max - self.Min); |
|
def fit(self, X, y): |
|
self.w = (np.random.rand(X.shape[1])*0.01).reshape(X.shape[1],1); |
|
self.b = 0 |
|
self.cost = [] |
|
X = self.standardize(X); |
|
(self.w,self.b, self.cost_history) = self.Grad_de(X, y, self.w, self.b, self.eta, self.n_iter, self.Lambda); |
|
return self |
|
def predict(self, X): |
|
XPr = self.ScaleData(X, self.Min, self.Max); |
|
Xans = np.dot(XPr,self.w) + self.b; |
|
x1 = Xans[0][0]; |
|
return x1 |
|
|
|
df = pd.read_csv("C:\Project\Kaggle\Cali_housing_Price\housing_price_dataset.csv"); |
|
Data = pd.read_csv("C:\Project\Kaggle\Cali_housing_Price\housing_price_dataset.csv"); |
|
y = Data['Price']; |
|
X = Data.drop(columns= ['Price']); |
|
for i in range(X['Neighborhood'].shape[0]): |
|
if X.loc[i,'Neighborhood'] == 'Suburb': |
|
X.loc[i,'Neighborhood'] = 2; |
|
elif X.loc[i,'Neighborhood'] == 'Urban': |
|
X.loc[i,'Neighborhood'] = 3; |
|
else: |
|
X.loc[i,'Neighborhood'] = 1; |
|
Dx = X.to_numpy(dtype= np.float64); |
|
Dy = y.to_numpy(dtype= np.float64); |
|
X_train, X_test, y_train, y_test = train_test_split(Dx, Dy, test_size=0.02, random_state=42); |
|
w = (np.random.rand(X.shape[1])*0.01).reshape(X.shape[1],1); |
|
y_train = y_train.reshape(len(y_train),1); |
|
y_test = y_test.reshape(len(y_test),1); |
|
HousePriceModel = LinearRegression(); |
|
HousePriceModel.fit(X_train, y_train); |
|
def HousePrice(SquareFeet, Bedrooms, Bathrooms, Neighborhood, YearBuilt): |
|
NumNeighborhood = 0; |
|
if Neighborhood == 'Suburb': |
|
NumNeighborhood = 2; |
|
elif Neighborhood == 'Urban': |
|
NumNeighborhood = 3; |
|
elif Neighborhood == 'Rural': |
|
NumNeighborhood = 1; |
|
else: |
|
raise gr.Error("Invalid Neighborhood"); |
|
if YearBuilt > 2024 or YearBuilt < 1900: |
|
raise gr.Error("Invalid Year Built"); |
|
if SquareFeet < 0: |
|
raise gr.Error("Invalid Square Feet"); |
|
if Bedrooms < 0: |
|
raise gr.Error("Invalid Bedrooms"); |
|
if Bathrooms < 0: |
|
raise gr.Error("Invalid Bathrooms"); |
|
X = np.array([SquareFeet, Bedrooms, Bathrooms, NumNeighborhood, YearBuilt]).reshape(1,5); |
|
YPredict = HousePriceModel.predict(X); |
|
|
|
filter_df = df[(df['SquareFeet'] >= SquareFeet - 20) & (df['SquareFeet'] <= SquareFeet + 20) & (df['Bedrooms'] >= Bedrooms - 0) & (df['Bedrooms'] <= Bedrooms + 0) & (df['Bathrooms'] >= Bathrooms - 0) & (df['Bathrooms'] <= Bathrooms + 0) & (df['YearBuilt'] >= YearBuilt - 5) & (df['YearBuilt'] <= YearBuilt + 5)]; |
|
df_list = filter_df.values.tolist() |
|
fig = go.Figure() |
|
fig.add_trace(go.Scatter( |
|
customdata=df_list, |
|
x = filter_df['SquareFeet'].tolist(), |
|
y = filter_df['Price'].tolist(), |
|
mode = 'markers', |
|
marker = dict(color = 'blue'), |
|
hoverinfo="text", |
|
hovertemplate= '<b>Square Feet</b>: %{x}<br><b>Bedrooms</b>: %{customdata[1]}<br><b>Bathrooms</b>: %{customdata[2]}<br><b>Neighborhood</b>: %{customdata[3]}<br><b>Year Built</b>: %{customdata[4]}<br><b>Price</b>: %{y}<extra></extra>', |
|
name = 'House Price Actual' |
|
)) |
|
|
|
fig.add_trace(go.Scatter( |
|
x = [SquareFeet], |
|
y = [YPredict], |
|
mode = 'markers', |
|
marker = dict(color = 'red'), |
|
hovertext = 'Predicted Price', |
|
name = 'House Price Prediction' |
|
)) |
|
|
|
fig.update_layout() |
|
|
|
return YPredict, fig |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(""" |
|
# House Price Prediction |
|
This is a simple model to predict the price of a house based on its features. The database's feature have min sqaure feet is about 1000 and max is 3000. |
|
""") |
|
gr.Markdown(""" |
|
Enter the features of the house and click 'Predict Price' to see the predicted price. |
|
You can also click 'Filter Map' to see the actual prices of houses with similar features on a map. |
|
""") |
|
with gr.Column(): |
|
with gr.Row(): |
|
SquareFeet = gr.Number(value=250, label="Square Feet") |
|
Bedrooms = gr.Number(value=3, label="Bedrooms") |
|
Bathrooms = gr.Number(value=1, label="Bathrooms") |
|
Neighborhood = gr.Radio(["Suburb", "Urban", "Rural"], label="Neighborhood") |
|
YearBuilt = gr.Number(value=2020, label="Year Built") |
|
gr.Button("Predict Price").click( |
|
fn=HousePrice, |
|
inputs=[SquareFeet, Bedrooms, Bathrooms, Neighborhood, YearBuilt], |
|
outputs=[gr.Textbox(label="Predicted Price"), gr.Plot(label="Similar Houses")] |
|
) |
|
demo.launch() |
|
|