Spaces:

shollercoaster
/

dynamic-pricing

Build error

App Files Files Community

dynamic-pricing / dynamic_pricing.py

shollercoaster

first commit

25a7261 over 1 year ago

raw

history blame contribute delete

2.89 kB

	# -- coding: utf-8 --
	"""dynamic pricing.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1pMuvzwELNm1DsTdL5dfBdA2HCjB6uwgh
	"""

	# Commented out IPython magic to ensure Python compatibility.
	import datetime

	import numpy as np
	import pandas as pd

	import matplotlib.pyplot as plt
	import seaborn as sns
	# %matplotlib inline

	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import r2_score

	dataset = pd.read_csv("Pop_Data.csv")
	dataset.head(5)

	X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1],
	dataset.iloc[:, -1],
	test_size = 0.3,
	random_state = 42)

	X_train.info()

	"""# EDA"""

	X_train = X_train.iloc[:, 3:]
	X_test = X_test.iloc[:, 3:]

	X_train.info

	plt.figure(figsize = (12, 8))
	plot = sns.countplot(x = 'day_of_week', data = X_train)
	plt.xticks(rotation = 90)
	for p in plot.patches:
	plot.annotate(p.get_height(),
	(p.get_x() + p.get_width() / 2.0,
	p.get_height()),
	ha = 'center',
	va = 'center',
	xytext = (0, 5),
	textcoords = 'offset points')

	plt.title("Price changes based on day")
	plt.xlabel("Day")
	plt.ylabel("Price")

	print(sum(X_train["day_of_week"].isnull()))
	print(sum(X_test["day_of_week"].isnull()))

	print(sum(X_train["hour_of_day"].isnull()))
	print(sum(X_test["hour_of_day"].isnull()))

	print(sum(X_train["popularity_percent_normal"].isnull()))
	print(sum(X_test["popularity_percent_normal"].isnull()))

	X_train["popularity_percent_normal"].fillna(X_train["popularity_percent_normal"].astype("float64").mean(), inplace = True)

	X_train = pd.get_dummies(X_train,
	columns = ["day_of_week"],
	drop_first = True)

	X_test = pd.get_dummies(X_test,
	columns = ["day_of_week"],
	drop_first = True)

	missing_cols = set(X_train.columns) - set(X_test.columns)
	for col in missing_cols:
	X_test[col] = 0
	X_test = X_test[X_train.columns]

	standardScaler = StandardScaler()
	standardScaler.fit(X_train)
	X_train = standardScaler.transform(X_train)
	X_test = standardScaler.transform(X_test)

	linearRegression = LinearRegression()
	linearRegression.fit(X_train, y_train)
	y_pred = linearRegression.predict(X_test)
	r2_score(y_test, y_pred)

	rf = RandomForestRegressor(n_estimators = 100)
	rf.fit(X_train, y_train)
	y_pred = rf.predict(X_test)
	r2_score(y_test, y_pred)

	def predict(x_test):
	return rf.predict(x_test)