AlainDeLong's picture
first commit
3c8c0e4
raw
history blame
4.78 kB
import os
import sys
from dataclasses import dataclass
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
RandomForestRegressor,
AdaBoostRegressor,
GradientBoostingRegressor,
)
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from src.logger import logging
from src.exception import CustomException
from src.utils import save_object, evaluate_models
@dataclass
class ModelTrainerConfig:
trained_model_file_path = os.path.join("artifacts", "model.pkl")
class ModelTrainer:
def __init__(self) -> None:
self.model_trainer_config = ModelTrainerConfig()
def initiate_model_trainer(self, train_array, test_array):
try:
logging.info("Split training and testing input data")
X_train, y_train, X_test, y_test = (
train_array[:, :-1],
train_array[:, -1],
test_array[:, :-1],
test_array[:, -1],
)
models = {
"Linear Regression": LinearRegression(),
"K-Neighbors Regressor": KNeighborsRegressor(),
"Decision Tree Regressor": DecisionTreeRegressor(),
"Random Forest Regressor": RandomForestRegressor(),
"AdaBoost Regressor": AdaBoostRegressor(),
"Gradient Boosting Regressor": GradientBoostingRegressor(),
"XGBRegressor": XGBRegressor(),
"CatBoosting Regressor": CatBoostRegressor(verbose=False),
}
params_grid = {
"Linear Regression": {},
"K-Neighbors Regressor": {},
"Decision Tree Regressor": {
"criterion": [
"squared_error",
"friedman_mse",
"absolute_error",
"poisson",
],
# 'splitter':['best','random'],
# 'max_features':['sqrt','log2'],
},
"Random Forest Regressor": {
# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
# 'max_features':['sqrt','log2',None],
"n_estimators": [8, 16, 32, 64, 128, 256]
},
"AdaBoost Regressor": {
"learning_rate": [0.1, 0.01, 0.5, 0.001],
# 'loss':['linear','square','exponential'],
"n_estimators": [8, 16, 32, 64, 128, 256],
},
"Gradient Boosting Regressor": {
# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
"learning_rate": [0.1, 0.01, 0.05, 0.001],
"subsample": [0.6, 0.7, 0.75, 0.8, 0.85, 0.9],
# 'criterion':['squared_error', 'friedman_mse'],
# 'max_features':['auto','sqrt','log2'],
"n_estimators": [8, 16, 32, 64, 128, 256],
},
"XGBRegressor": {
"learning_rate": [0.1, 0.01, 0.05, 0.001],
"n_estimators": [8, 16, 32, 64, 128, 256],
},
"CatBoosting Regressor": {
"depth": [6, 8, 10],
"learning_rate": [0.01, 0.05, 0.1],
"iterations": [30, 50, 100],
},
}
model_report: dict = evaluate_models(
X_train=X_train,
y_train=y_train,
X_test=X_test,
y_test=y_test,
models=models,
params_grid=params_grid,
)
# To get best model score from dict
best_model_score = max(sorted(model_report.values()))
# To get best model name from dict
best_model_name = list(model_report.keys())[
list(model_report.values()).index(best_model_score)
]
best_model = models[best_model_name]
if best_model_score < 0.6:
raise CustomException("No best model found", sys)
logging.info(f"Best found model on both training and testing dataset")
save_object(
file_path=self.model_trainer_config.trained_model_file_path,
obj=best_model,
)
print(best_model_name)
predicted = best_model.predict(X_test)
r2_square = r2_score(y_test, predicted)
return r2_square
except Exception as e:
raise CustomException(e, sys)