dynamic-pricing / dynamic_pricing.py
shollercoaster's picture
first commit
25a7261
# -*- coding: utf-8 -*-
"""dynamic pricing.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1pMuvzwELNm1DsTdL5dfBdA2HCjB6uwgh
"""
# Commented out IPython magic to ensure Python compatibility.
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
dataset = pd.read_csv("Pop_Data.csv")
dataset.head(5)
X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1],
dataset.iloc[:, -1],
test_size = 0.3,
random_state = 42)
X_train.info()
"""# EDA"""
X_train = X_train.iloc[:, 3:]
X_test = X_test.iloc[:, 3:]
X_train.info
plt.figure(figsize = (12, 8))
plot = sns.countplot(x = 'day_of_week', data = X_train)
plt.xticks(rotation = 90)
for p in plot.patches:
plot.annotate(p.get_height(),
(p.get_x() + p.get_width() / 2.0,
p.get_height()),
ha = 'center',
va = 'center',
xytext = (0, 5),
textcoords = 'offset points')
plt.title("Price changes based on day")
plt.xlabel("Day")
plt.ylabel("Price")
print(sum(X_train["day_of_week"].isnull()))
print(sum(X_test["day_of_week"].isnull()))
print(sum(X_train["hour_of_day"].isnull()))
print(sum(X_test["hour_of_day"].isnull()))
print(sum(X_train["popularity_percent_normal"].isnull()))
print(sum(X_test["popularity_percent_normal"].isnull()))
X_train["popularity_percent_normal"].fillna(X_train["popularity_percent_normal"].astype("float64").mean(), inplace = True)
X_train = pd.get_dummies(X_train,
columns = ["day_of_week"],
drop_first = True)
X_test = pd.get_dummies(X_test,
columns = ["day_of_week"],
drop_first = True)
missing_cols = set(X_train.columns) - set(X_test.columns)
for col in missing_cols:
X_test[col] = 0
X_test = X_test[X_train.columns]
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train = standardScaler.transform(X_train)
X_test = standardScaler.transform(X_test)
linearRegression = LinearRegression()
linearRegression.fit(X_train, y_train)
y_pred = linearRegression.predict(X_test)
r2_score(y_test, y_pred)
rf = RandomForestRegressor(n_estimators = 100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
r2_score(y_test, y_pred)
def predict(x_test):
return rf.predict(x_test)