# -*- coding: utf-8 -*- """dynamic pricing.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1pMuvzwELNm1DsTdL5dfBdA2HCjB6uwgh """ # Commented out IPython magic to ensure Python compatibility. import datetime import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # %matplotlib inline from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.metrics import r2_score dataset = pd.read_csv("Pop_Data.csv") dataset.head(5) X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1], dataset.iloc[:, -1], test_size = 0.3, random_state = 42) X_train.info() """# EDA""" X_train = X_train.iloc[:, 3:] X_test = X_test.iloc[:, 3:] X_train.info plt.figure(figsize = (12, 8)) plot = sns.countplot(x = 'day_of_week', data = X_train) plt.xticks(rotation = 90) for p in plot.patches: plot.annotate(p.get_height(), (p.get_x() + p.get_width() / 2.0, p.get_height()), ha = 'center', va = 'center', xytext = (0, 5), textcoords = 'offset points') plt.title("Price changes based on day") plt.xlabel("Day") plt.ylabel("Price") print(sum(X_train["day_of_week"].isnull())) print(sum(X_test["day_of_week"].isnull())) print(sum(X_train["hour_of_day"].isnull())) print(sum(X_test["hour_of_day"].isnull())) print(sum(X_train["popularity_percent_normal"].isnull())) print(sum(X_test["popularity_percent_normal"].isnull())) X_train["popularity_percent_normal"].fillna(X_train["popularity_percent_normal"].astype("float64").mean(), inplace = True) X_train = pd.get_dummies(X_train, columns = ["day_of_week"], drop_first = True) X_test = pd.get_dummies(X_test, columns = ["day_of_week"], drop_first = True) missing_cols = set(X_train.columns) - set(X_test.columns) for col in missing_cols: X_test[col] = 0 X_test = X_test[X_train.columns] standardScaler = StandardScaler() standardScaler.fit(X_train) X_train = standardScaler.transform(X_train) X_test = standardScaler.transform(X_test) linearRegression = LinearRegression() linearRegression.fit(X_train, y_train) y_pred = linearRegression.predict(X_test) r2_score(y_test, y_pred) rf = RandomForestRegressor(n_estimators = 100) rf.fit(X_train, y_train) y_pred = rf.predict(X_test) r2_score(y_test, y_pred) def predict(x_test): return rf.predict(x_test)