File size: 5,340 Bytes
f1e0278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
## libraries importation
##data handling
import pandas as pd
import numpy as np
###encoders and model
from prophet import Prophet
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from category_encoders import BinaryEncoder
##error metrics
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_log_error as MSLE
from sklearn.metrics import mean_squared_error as MSE
from statsmodels.tools.eval_measures import rmse
##visualization and webapp
import plotly.express as ex
import plotly.offline as po
from prophet.plot import plot_plotly, plot_components_plotly
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns

##model loading
import joblib

###importing my dataset

##our train
train= pd.read_csv("dataframes/train.csv")
train= train.drop("Unnamed: 0", axis=1)
train_target= train[["y"]]

##getting our test with target

t_w_target= pd.read_csv("dataframes/test_with_y.csv")

##filtering out our y

test= pd.read_csv("dataframes/test.csv")
test_target= t_w_target[["sales"]]


##creating a copy of my train

train_copy= train.copy()


## data encoding

##importing my binary encoder
BE= BinaryEncoder(cols= "holiday")

##fitting and transforming my train dataset

train= BE.fit_transform(train.drop("y", axis=1))


##instantiating and using my ordinal encoder
##creating a rank
rank= ["National", "Regional", "Local", "Not Holiday"]
OE= OrdinalEncoder(categories=[rank])
train[["locale"]]=OE.fit_transform(train[["locale"]])

##finally my label encoder
LE= LabelEncoder()
##transforming our train
train["transferred"]=LE.fit_transform(train["transferred"])

##loading my train set again

train.head()


##everything has works so let's fit our model once again

model=Prophet(yearly_seasonality= True, seasonality_mode= "multiplicative", seasonality_prior_scale=25)

##stating my exogenous variables (extra regressors)
exo_cols=[ 'holiday_0', 'holiday_1', 'holiday_2', 'locale', 'transferred', 'onpromotion', 'transactions']

for cols in exo_cols:
        model.add_regressor(cols, standardize=True)

##concating my train features with my train target to allow me fit

full_train= pd.merge(left= train, right= train_target,left_index=True, right_index=True)

full_train

##fitting out data on the full train
model.fit(full_train)


##let's make a prediction on our test

###before that I will make a copy of my test sample

test_features= test.copy()

test_features= test_features.drop("Unnamed: 0", axis= 1)

test_features
##rename the date column

test_features= test_features.rename(columns= {"date": "ds"})

##using my binary encoder to transform it
##transforming my test
test_features[["locale"]]= OE.transform(test_features[["locale"]])

test_features["transferred"]= LE.transform(test_features["transferred"])
test_features= BE.transform(test_features)

test_features
##making predictions on my test data

eval_fbp= model.predict(test_features)

##filtering out my yhat to let me calculate my error metrics
eval= eval_fbp[["yhat"]]

##error metrics
## i will be evaluating the model's performance using MAE, MSE, and RMSLE

mean_abs_err= (MAE(eval,test_target)/test_target.mean())*100

rmsle= np.sqrt(MSLE(eval,test_target))

rmse=np.sqrt(MSE(eval,test_target))

rmsle
mean_abs_err
rmse
###Our model is working well, so we will go ahead and dump the various components
final_results= pd.DataFrame({"MAE":mean_abs_err, "RMSLE": rmsle, "RMSE":rmse })
final_results

##plotting my outcome
model.plot(eval_fbp)
plt.show()

### I am going to drop my prophet and thhen


"""in this section, I am going to drop the holidays column and then use the inbuilt Facebook Prophet Holiday
feature
"""
##loading my dataframes again

##dropping the holiday columns for the new training dataframe
train_2= train_copy.drop(columns= ["holiday", "locale", "transferred"], axis= 1)

train_2
##dropping the holiday columns for the new test dataframe

test_2= test.drop(columns= ["Unnamed: 0","holiday", "locale", "transferred"], axis= 1)

test_2

train_2

##instatiating my model

model_2= Prophet(yearly_seasonality= True,
                 seasonality_mode= "multiplicative", seasonality_prior_scale=25)

##adding the holiday effect
model_2.add_country_holidays(country_name= "ECU")

##adding my regressors (exogenous variables)

for col in train_2.drop(columns=["ds", "y"], axis= 1):
    model_2.add_regressor(col, standardize=True, prior_scale=20)


model_2.fit(train_2)

eval_2_fbp=model_2.predict(test_2)

##getting my predicted values

eval_2= eval_2_fbp[["yhat"]]

##evaluating my model's performance

mae_2= (MAE(test_target,eval_2)/test_target.mean()) * 100

rmsle_2= np.sqrt(MSLE(test_target,eval_2))

rmse_2= np.sqrt(MSE(test_target,eval_2))

final_error_2= pd.DataFrame({"MAE":mae_2, "RMSLE": rmsle_2, "RMSE":rmse_2 })

final_error_2

final_results

model_2.plot_components(eval_2_fbp)
model_2.plot(eval_2_fbp)
plt.show()

""" our second model with embedded holiday effects did pretty well by giving us a much lower rmsle than our
initial model where we used had to hard-encode our holiday effect
"""
##saving my Facebook Prophet model
#joblib.dump(model_2,"Z:/Users/Sonny Otchi/Desktop/models/fbpmodel.joblib")

"""
Note: Since we used facebook prophet's inbuilt holiday effect, we will not need to save any of the encoders
"""