|
|
|
|
|
import pandas as pd |
|
import numpy as np |
|
|
|
from prophet import Prophet |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.preprocessing import OrdinalEncoder |
|
from category_encoders import BinaryEncoder |
|
|
|
from sklearn.metrics import mean_absolute_error as MAE |
|
from sklearn.metrics import mean_squared_log_error as MSLE |
|
from sklearn.metrics import mean_squared_error as MSE |
|
from statsmodels.tools.eval_measures import rmse |
|
|
|
import plotly.express as ex |
|
import plotly.offline as po |
|
from prophet.plot import plot_plotly, plot_components_plotly |
|
import streamlit as st |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
|
|
import joblib |
|
|
|
|
|
|
|
|
|
train= pd.read_csv("dataframes/train.csv") |
|
train= train.drop("Unnamed: 0", axis=1) |
|
train_target= train[["y"]] |
|
|
|
|
|
|
|
t_w_target= pd.read_csv("dataframes/test_with_y.csv") |
|
|
|
|
|
|
|
test= pd.read_csv("dataframes/test.csv") |
|
test_target= t_w_target[["sales"]] |
|
|
|
|
|
|
|
|
|
train_copy= train.copy() |
|
|
|
|
|
|
|
|
|
|
|
BE= BinaryEncoder(cols= "holiday") |
|
|
|
|
|
|
|
train= BE.fit_transform(train.drop("y", axis=1)) |
|
|
|
|
|
|
|
|
|
rank= ["National", "Regional", "Local", "Not Holiday"] |
|
OE= OrdinalEncoder(categories=[rank]) |
|
train[["locale"]]=OE.fit_transform(train[["locale"]]) |
|
|
|
|
|
LE= LabelEncoder() |
|
|
|
train["transferred"]=LE.fit_transform(train["transferred"]) |
|
|
|
|
|
|
|
train.head() |
|
|
|
|
|
|
|
|
|
model=Prophet(yearly_seasonality= True, seasonality_mode= "multiplicative", seasonality_prior_scale=25) |
|
|
|
|
|
exo_cols=[ 'holiday_0', 'holiday_1', 'holiday_2', 'locale', 'transferred', 'onpromotion', 'transactions'] |
|
|
|
for cols in exo_cols: |
|
model.add_regressor(cols, standardize=True) |
|
|
|
|
|
|
|
full_train= pd.merge(left= train, right= train_target,left_index=True, right_index=True) |
|
|
|
full_train |
|
|
|
|
|
model.fit(full_train) |
|
|
|
|
|
|
|
|
|
|
|
|
|
test_features= test.copy() |
|
|
|
test_features= test_features.drop("Unnamed: 0", axis= 1) |
|
|
|
test_features |
|
|
|
|
|
test_features= test_features.rename(columns= {"date": "ds"}) |
|
|
|
|
|
|
|
test_features[["locale"]]= OE.transform(test_features[["locale"]]) |
|
|
|
test_features["transferred"]= LE.transform(test_features["transferred"]) |
|
test_features= BE.transform(test_features) |
|
|
|
test_features |
|
|
|
|
|
eval_fbp= model.predict(test_features) |
|
|
|
|
|
eval= eval_fbp[["yhat"]] |
|
|
|
|
|
|
|
|
|
mean_abs_err= (MAE(eval,test_target)/test_target.mean())*100 |
|
|
|
rmsle= np.sqrt(MSLE(eval,test_target)) |
|
|
|
rmse=np.sqrt(MSE(eval,test_target)) |
|
|
|
rmsle |
|
mean_abs_err |
|
rmse |
|
|
|
final_results= pd.DataFrame({"MAE":mean_abs_err, "RMSLE": rmsle, "RMSE":rmse }) |
|
final_results |
|
|
|
|
|
model.plot(eval_fbp) |
|
plt.show() |
|
|
|
|
|
|
|
|
|
"""in this section, I am going to drop the holidays column and then use the inbuilt Facebook Prophet Holiday |
|
feature |
|
""" |
|
|
|
|
|
|
|
train_2= train_copy.drop(columns= ["holiday", "locale", "transferred"], axis= 1) |
|
|
|
train_2 |
|
|
|
|
|
test_2= test.drop(columns= ["Unnamed: 0","holiday", "locale", "transferred"], axis= 1) |
|
|
|
test_2 |
|
|
|
train_2 |
|
|
|
|
|
|
|
model_2= Prophet(yearly_seasonality= True, |
|
seasonality_mode= "multiplicative", seasonality_prior_scale=25) |
|
|
|
|
|
model_2.add_country_holidays(country_name= "ECU") |
|
|
|
|
|
|
|
for col in train_2.drop(columns=["ds", "y"], axis= 1): |
|
model_2.add_regressor(col, standardize=True, prior_scale=20) |
|
|
|
|
|
model_2.fit(train_2) |
|
|
|
eval_2_fbp=model_2.predict(test_2) |
|
|
|
|
|
|
|
eval_2= eval_2_fbp[["yhat"]] |
|
|
|
|
|
|
|
mae_2= (MAE(test_target,eval_2)/test_target.mean()) * 100 |
|
|
|
rmsle_2= np.sqrt(MSLE(test_target,eval_2)) |
|
|
|
rmse_2= np.sqrt(MSE(test_target,eval_2)) |
|
|
|
final_error_2= pd.DataFrame({"MAE":mae_2, "RMSLE": rmsle_2, "RMSE":rmse_2 }) |
|
|
|
final_error_2 |
|
|
|
final_results |
|
|
|
model_2.plot_components(eval_2_fbp) |
|
model_2.plot(eval_2_fbp) |
|
plt.show() |
|
|
|
""" our second model with embedded holiday effects did pretty well by giving us a much lower rmsle than our |
|
initial model where we used had to hard-encode our holiday effect |
|
""" |
|
|
|
|
|
|
|
""" |
|
Note: Since we used facebook prophet's inbuilt holiday effect, we will not need to save any of the encoders |
|
""" |
|
|