Spaces:

AlainDeLong
/

End-To-End-Machine-Learning-Project

Build error

App Files Files Community

End-To-End-Machine-Learning-Project / notebook /src /components /model_trainer.py

AlainDeLong

first commit

3c8c0e4 about 1 year ago

raw

history blame

4.78 kB

	import os
	import sys
	from dataclasses import dataclass

	from sklearn.metrics import r2_score
	from sklearn.linear_model import LinearRegression
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.tree import DecisionTreeRegressor
	from sklearn.ensemble import (
	RandomForestRegressor,
	AdaBoostRegressor,
	GradientBoostingRegressor,
	)
	from xgboost import XGBRegressor
	from catboost import CatBoostRegressor

	from src.logger import logging
	from src.exception import CustomException
	from src.utils import save_object, evaluate_models


	@dataclass
	class ModelTrainerConfig:
	trained_model_file_path = os.path.join("artifacts", "model.pkl")


	class ModelTrainer:
	def __init__(self) -> None:
	self.model_trainer_config = ModelTrainerConfig()

	def initiate_model_trainer(self, train_array, test_array):
	try:
	logging.info("Split training and testing input data")
	X_train, y_train, X_test, y_test = (
	train_array[:, :-1],
	train_array[:, -1],
	test_array[:, :-1],
	test_array[:, -1],
	)

	models = {
	"Linear Regression": LinearRegression(),
	"K-Neighbors Regressor": KNeighborsRegressor(),
	"Decision Tree Regressor": DecisionTreeRegressor(),
	"Random Forest Regressor": RandomForestRegressor(),
	"AdaBoost Regressor": AdaBoostRegressor(),
	"Gradient Boosting Regressor": GradientBoostingRegressor(),
	"XGBRegressor": XGBRegressor(),
	"CatBoosting Regressor": CatBoostRegressor(verbose=False),
	}

	params_grid = {
	"Linear Regression": {},
	"K-Neighbors Regressor": {},
	"Decision Tree Regressor": {
	"criterion": [
	"squared_error",
	"friedman_mse",
	"absolute_error",
	"poisson",
	],
	# 'splitter':['best','random'],
	# 'max_features':['sqrt','log2'],
	},
	"Random Forest Regressor": {
	# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
	# 'max_features':['sqrt','log2',None],
	"n_estimators": [8, 16, 32, 64, 128, 256]
	},
	"AdaBoost Regressor": {
	"learning_rate": [0.1, 0.01, 0.5, 0.001],
	# 'loss':['linear','square','exponential'],
	"n_estimators": [8, 16, 32, 64, 128, 256],
	},
	"Gradient Boosting Regressor": {
	# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
	"learning_rate": [0.1, 0.01, 0.05, 0.001],
	"subsample": [0.6, 0.7, 0.75, 0.8, 0.85, 0.9],
	# 'criterion':['squared_error', 'friedman_mse'],
	# 'max_features':['auto','sqrt','log2'],
	"n_estimators": [8, 16, 32, 64, 128, 256],
	},
	"XGBRegressor": {
	"learning_rate": [0.1, 0.01, 0.05, 0.001],
	"n_estimators": [8, 16, 32, 64, 128, 256],
	},
	"CatBoosting Regressor": {
	"depth": [6, 8, 10],
	"learning_rate": [0.01, 0.05, 0.1],
	"iterations": [30, 50, 100],
	},
	}

	model_report: dict = evaluate_models(
	X_train=X_train,
	y_train=y_train,
	X_test=X_test,
	y_test=y_test,
	models=models,
	params_grid=params_grid,
	)

	# To get best model score from dict
	best_model_score = max(sorted(model_report.values()))

	# To get best model name from dict
	best_model_name = list(model_report.keys())[
	list(model_report.values()).index(best_model_score)
	]

	best_model = models[best_model_name]
	if best_model_score < 0.6:
	raise CustomException("No best model found", sys)
	logging.info(f"Best found model on both training and testing dataset")

	save_object(
	file_path=self.model_trainer_config.trained_model_file_path,
	obj=best_model,
	)

	print(best_model_name)
	predicted = best_model.predict(X_test)
	r2_square = r2_score(y_test, predicted)
	return r2_square

	except Exception as e:
	raise CustomException(e, sys)