File size: 5,340 Bytes
f1e0278 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
## libraries importation
##data handling
import pandas as pd
import numpy as np
###encoders and model
from prophet import Prophet
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from category_encoders import BinaryEncoder
##error metrics
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_log_error as MSLE
from sklearn.metrics import mean_squared_error as MSE
from statsmodels.tools.eval_measures import rmse
##visualization and webapp
import plotly.express as ex
import plotly.offline as po
from prophet.plot import plot_plotly, plot_components_plotly
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
##model loading
import joblib
###importing my dataset
##our train
train= pd.read_csv("dataframes/train.csv")
train= train.drop("Unnamed: 0", axis=1)
train_target= train[["y"]]
##getting our test with target
t_w_target= pd.read_csv("dataframes/test_with_y.csv")
##filtering out our y
test= pd.read_csv("dataframes/test.csv")
test_target= t_w_target[["sales"]]
##creating a copy of my train
train_copy= train.copy()
## data encoding
##importing my binary encoder
BE= BinaryEncoder(cols= "holiday")
##fitting and transforming my train dataset
train= BE.fit_transform(train.drop("y", axis=1))
##instantiating and using my ordinal encoder
##creating a rank
rank= ["National", "Regional", "Local", "Not Holiday"]
OE= OrdinalEncoder(categories=[rank])
train[["locale"]]=OE.fit_transform(train[["locale"]])
##finally my label encoder
LE= LabelEncoder()
##transforming our train
train["transferred"]=LE.fit_transform(train["transferred"])
##loading my train set again
train.head()
##everything has works so let's fit our model once again
model=Prophet(yearly_seasonality= True, seasonality_mode= "multiplicative", seasonality_prior_scale=25)
##stating my exogenous variables (extra regressors)
exo_cols=[ 'holiday_0', 'holiday_1', 'holiday_2', 'locale', 'transferred', 'onpromotion', 'transactions']
for cols in exo_cols:
model.add_regressor(cols, standardize=True)
##concating my train features with my train target to allow me fit
full_train= pd.merge(left= train, right= train_target,left_index=True, right_index=True)
full_train
##fitting out data on the full train
model.fit(full_train)
##let's make a prediction on our test
###before that I will make a copy of my test sample
test_features= test.copy()
test_features= test_features.drop("Unnamed: 0", axis= 1)
test_features
##rename the date column
test_features= test_features.rename(columns= {"date": "ds"})
##using my binary encoder to transform it
##transforming my test
test_features[["locale"]]= OE.transform(test_features[["locale"]])
test_features["transferred"]= LE.transform(test_features["transferred"])
test_features= BE.transform(test_features)
test_features
##making predictions on my test data
eval_fbp= model.predict(test_features)
##filtering out my yhat to let me calculate my error metrics
eval= eval_fbp[["yhat"]]
##error metrics
## i will be evaluating the model's performance using MAE, MSE, and RMSLE
mean_abs_err= (MAE(eval,test_target)/test_target.mean())*100
rmsle= np.sqrt(MSLE(eval,test_target))
rmse=np.sqrt(MSE(eval,test_target))
rmsle
mean_abs_err
rmse
###Our model is working well, so we will go ahead and dump the various components
final_results= pd.DataFrame({"MAE":mean_abs_err, "RMSLE": rmsle, "RMSE":rmse })
final_results
##plotting my outcome
model.plot(eval_fbp)
plt.show()
### I am going to drop my prophet and thhen
"""in this section, I am going to drop the holidays column and then use the inbuilt Facebook Prophet Holiday
feature
"""
##loading my dataframes again
##dropping the holiday columns for the new training dataframe
train_2= train_copy.drop(columns= ["holiday", "locale", "transferred"], axis= 1)
train_2
##dropping the holiday columns for the new test dataframe
test_2= test.drop(columns= ["Unnamed: 0","holiday", "locale", "transferred"], axis= 1)
test_2
train_2
##instatiating my model
model_2= Prophet(yearly_seasonality= True,
seasonality_mode= "multiplicative", seasonality_prior_scale=25)
##adding the holiday effect
model_2.add_country_holidays(country_name= "ECU")
##adding my regressors (exogenous variables)
for col in train_2.drop(columns=["ds", "y"], axis= 1):
model_2.add_regressor(col, standardize=True, prior_scale=20)
model_2.fit(train_2)
eval_2_fbp=model_2.predict(test_2)
##getting my predicted values
eval_2= eval_2_fbp[["yhat"]]
##evaluating my model's performance
mae_2= (MAE(test_target,eval_2)/test_target.mean()) * 100
rmsle_2= np.sqrt(MSLE(test_target,eval_2))
rmse_2= np.sqrt(MSE(test_target,eval_2))
final_error_2= pd.DataFrame({"MAE":mae_2, "RMSLE": rmsle_2, "RMSE":rmse_2 })
final_error_2
final_results
model_2.plot_components(eval_2_fbp)
model_2.plot(eval_2_fbp)
plt.show()
""" our second model with embedded holiday effects did pretty well by giving us a much lower rmsle than our
initial model where we used had to hard-encode our holiday effect
"""
##saving my Facebook Prophet model
#joblib.dump(model_2,"Z:/Users/Sonny Otchi/Desktop/models/fbpmodel.joblib")
"""
Note: Since we used facebook prophet's inbuilt holiday effect, we will not need to save any of the encoders
"""
|