Abs6187's picture
Upload 12 files
c5ec08c verified
# utils/model_trainer.py (updated)
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
roc_auc_score, confusion_matrix, classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
# Suppress warnings
warnings.filterwarnings('ignore')
class ModelTrainer:
def __init__(self):
self.models = {
'Logistic Regression': LogisticRegression(max_iter=1000, class_weight='balanced'),
'Random Forest': RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42),
'XGBoost': XGBClassifier(scale_pos_weight=10, n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='logloss')
}
self.neural_net = None
def train_models(self, X_train, y_train):
"""Train multiple machine learning models"""
trained_models = {}
for name, model in self.models.items():
print(f"Training {name}...")
model.fit(X_train, y_train)
trained_models[name] = model
return trained_models
def train_neural_network(self, X_train, y_train, input_dim):
"""Train a neural network model"""
model = Sequential([
Dense(64, activation='relu', input_dim=input_dim),
Dropout(0.3),
Dense(32, activation='relu'),
Dropout(0.3),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
history = model.fit(
X_train, y_train,
epochs=20,
batch_size=64,
validation_split=0.2,
verbose=1
)
self.neural_net = model
return model, history
def evaluate_model(self, model, X_test, y_test, model_name="Model"):
"""Evaluate model performance with various metrics"""
if model_name == "Neural Network":
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(int)
else:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)
# Create confusion matrix
cm = confusion_matrix(y_test, y_pred)
# Detailed classification report
report = classification_report(y_test, y_pred)
results = {
'model_name': model_name,
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1,
'auc': auc,
'confusion_matrix': cm,
'classification_report': report,
'y_test': y_test,
'y_pred_proba': y_pred_proba
}
return results
def save_model(self, model, file_path):
"""Save the trained model to a file"""
if isinstance(model, Sequential):
model.save(file_path)
else:
with open(file_path, 'wb') as f:
pickle.dump(model, f)
def load_model(self, file_path, model_type='sklearn'):
"""Load a trained model from a file"""
if model_type == 'keras':
from tensorflow.keras.models import load_model
return load_model(file_path)
else:
with open(file_path, 'rb') as f:
return pickle.load(f)