Runtime error
Runtime error
File size: 3,491 Bytes
4cd6925 3e870fa 4cd6925 3e870fa 4cd6925 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# -*- coding: utf-8 -*-
"""dynamic pricing.ipynb
Automatically generated by Colaboratory.
Original file is located at
# Commented out IPython magic to ensure Python compatibility.
import datetime
import joblib
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
# %matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
dataset = pd.read_csv("Pop_Data.csv")
label_encoder = LabelEncoder()
dataset['day_of_week'] = label_encoder.fit_transform(dataset['day_of_week'])
X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1],
dataset.iloc[:, -1],
test_size = 0.3,
random_state = 42)
"""# EDA"""
X_train = X_train.iloc[:, 3:]
X_test = X_test.iloc[:, 3:]
# plt.figure(figsize = (12, 8))
# plot = sns.countplot(x = 'day_of_week', data = X_train)
# plt.xticks(rotation = 90)
# for p in plot.patches:
# plot.annotate(p.get_height(),
# (p.get_x() + p.get_width() / 2.0,
# p.get_height()),
# ha = 'center',
# va = 'center',
# xytext = (0, 5),
# textcoords = 'offset points')
# plt.title("Price changes based on day")
# plt.xlabel("Day")
# plt.ylabel("Price")
X_train["popularity_percent_normal"].fillna(X_train["popularity_percent_normal"].astype("float64").mean(), inplace = True)
# X_train = pd.get_dummies(X_train,
# columns = ["day_of_week"],
# drop_first = True)
# X_test = pd.get_dummies(X_test,
# columns = ["day_of_week"],
# drop_first = True)
# missing_cols = set(X_train.columns) - set(X_test.columns)
# for col in missing_cols:
# X_test[col] = 0
# X_test = X_test[X_train.columns]
standardScaler = StandardScaler()
X_train = standardScaler.transform(X_train)
X_test = standardScaler.transform(X_test)
linearRegression = LinearRegression(), y_train)
y_pred = linearRegression.predict(X_test)
r2_score(y_test, y_pred)
rf = RandomForestRegressor(n_estimators = 100), y_train)
y_pred = rf.predict(X_test)
r2_score(y_test, y_pred)
def save_model(model, filename):
joblib.dump(model, filename)
# Save the model
save_model(rf, "random_forest_model.pkl")
def predict(data):
dataArr = list(data)
day_of_week_encoded = label_encoder.fit_transform([dataArr[0]])[0]
datapoint = [day_of_week_encoded, dataArr[1], dataArr[2]]
npArr = np.asarray(datapoint).reshape(1,-1)
x_test = standardScaler.fit_transform(npArr)
return rf.predict(x_test)