Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""dynamic pricing.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1pMuvzwELNm1DsTdL5dfBdA2HCjB6uwgh | |
""" | |
# Commented out IPython magic to ensure Python compatibility. | |
import datetime | |
import joblib | |
import numpy as np | |
import pandas as pd | |
# import matplotlib.pyplot as plt | |
# import seaborn as sns | |
# %matplotlib inline | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.metrics import r2_score | |
dataset = pd.read_csv("Pop_Data.csv") | |
dataset.head(5) | |
label_encoder = LabelEncoder() | |
dataset['day_of_week'] = label_encoder.fit_transform(dataset['day_of_week']) | |
X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1], | |
dataset.iloc[:, -1], | |
test_size = 0.3, | |
random_state = 42) | |
X_train.info() | |
"""# EDA""" | |
X_train = X_train.iloc[:, 3:] | |
X_test = X_test.iloc[:, 3:] | |
X_train.info | |
# plt.figure(figsize = (12, 8)) | |
# plot = sns.countplot(x = 'day_of_week', data = X_train) | |
# plt.xticks(rotation = 90) | |
# for p in plot.patches: | |
# plot.annotate(p.get_height(), | |
# (p.get_x() + p.get_width() / 2.0, | |
# p.get_height()), | |
# ha = 'center', | |
# va = 'center', | |
# xytext = (0, 5), | |
# textcoords = 'offset points') | |
# plt.title("Price changes based on day") | |
# plt.xlabel("Day") | |
# plt.ylabel("Price") | |
print(sum(X_train["day_of_week"].isnull())) | |
print(sum(X_test["day_of_week"].isnull())) | |
print(sum(X_train["hour_of_day"].isnull())) | |
print(sum(X_test["hour_of_day"].isnull())) | |
print(sum(X_train["popularity_percent_normal"].isnull())) | |
print(sum(X_test["popularity_percent_normal"].isnull())) | |
X_train["popularity_percent_normal"].fillna(X_train["popularity_percent_normal"].astype("float64").mean(), inplace = True) | |
# X_train = pd.get_dummies(X_train, | |
# columns = ["day_of_week"], | |
# drop_first = True) | |
# X_test = pd.get_dummies(X_test, | |
# columns = ["day_of_week"], | |
# drop_first = True) | |
# missing_cols = set(X_train.columns) - set(X_test.columns) | |
# for col in missing_cols: | |
# X_test[col] = 0 | |
# X_test = X_test[X_train.columns] | |
standardScaler = StandardScaler() | |
standardScaler.fit(X_train) | |
X_train = standardScaler.transform(X_train) | |
X_test = standardScaler.transform(X_test) | |
linearRegression = LinearRegression() | |
linearRegression.fit(X_train, y_train) | |
y_pred = linearRegression.predict(X_test) | |
r2_score(y_test, y_pred) | |
rf = RandomForestRegressor(n_estimators = 100) | |
rf.fit(X_train, y_train) | |
y_pred = rf.predict(X_test) | |
r2_score(y_test, y_pred) | |
def save_model(model, filename): | |
joblib.dump(model, filename) | |
# Save the model | |
save_model(rf, "random_forest_model.pkl") | |
def predict(data): | |
dataArr = list(data) | |
day_of_week_encoded = label_encoder.fit_transform([dataArr[0]])[0] | |
datapoint = [day_of_week_encoded, dataArr[1], dataArr[2]] | |
npArr = np.asarray(datapoint).reshape(1,-1) | |
x_test = standardScaler.fit_transform(npArr) | |
return rf.predict(x_test) | |