|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import pickle
|
|
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from xgboost import XGBClassifier
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.layers import Dense, Dropout
|
|
from sklearn.metrics import (
|
|
accuracy_score, precision_score, recall_score, f1_score,
|
|
roc_auc_score, confusion_matrix, classification_report
|
|
)
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import warnings
|
|
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
class ModelTrainer:
|
|
def __init__(self):
|
|
self.models = {
|
|
'Logistic Regression': LogisticRegression(max_iter=1000, class_weight='balanced'),
|
|
'Random Forest': RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42),
|
|
'XGBoost': XGBClassifier(scale_pos_weight=10, n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='logloss')
|
|
}
|
|
self.neural_net = None
|
|
|
|
def train_models(self, X_train, y_train):
|
|
"""Train multiple machine learning models"""
|
|
trained_models = {}
|
|
|
|
for name, model in self.models.items():
|
|
print(f"Training {name}...")
|
|
model.fit(X_train, y_train)
|
|
trained_models[name] = model
|
|
|
|
return trained_models
|
|
|
|
def train_neural_network(self, X_train, y_train, input_dim):
|
|
"""Train a neural network model"""
|
|
model = Sequential([
|
|
Dense(64, activation='relu', input_dim=input_dim),
|
|
Dropout(0.3),
|
|
Dense(32, activation='relu'),
|
|
Dropout(0.3),
|
|
Dense(16, activation='relu'),
|
|
Dense(1, activation='sigmoid')
|
|
])
|
|
|
|
model.compile(
|
|
optimizer='adam',
|
|
loss='binary_crossentropy',
|
|
metrics=['accuracy']
|
|
)
|
|
|
|
history = model.fit(
|
|
X_train, y_train,
|
|
epochs=20,
|
|
batch_size=64,
|
|
validation_split=0.2,
|
|
verbose=1
|
|
)
|
|
|
|
self.neural_net = model
|
|
return model, history
|
|
|
|
def evaluate_model(self, model, X_test, y_test, model_name="Model"):
|
|
"""Evaluate model performance with various metrics"""
|
|
if model_name == "Neural Network":
|
|
y_pred_proba = model.predict(X_test)
|
|
y_pred = (y_pred_proba > 0.5).astype(int)
|
|
else:
|
|
y_pred = model.predict(X_test)
|
|
y_pred_proba = model.predict_proba(X_test)[:, 1]
|
|
|
|
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
precision = precision_score(y_test, y_pred)
|
|
recall = recall_score(y_test, y_pred)
|
|
f1 = f1_score(y_test, y_pred)
|
|
auc = roc_auc_score(y_test, y_pred_proba)
|
|
|
|
|
|
cm = confusion_matrix(y_test, y_pred)
|
|
|
|
|
|
report = classification_report(y_test, y_pred)
|
|
|
|
results = {
|
|
'model_name': model_name,
|
|
'accuracy': accuracy,
|
|
'precision': precision,
|
|
'recall': recall,
|
|
'f1_score': f1,
|
|
'auc': auc,
|
|
'confusion_matrix': cm,
|
|
'classification_report': report,
|
|
'y_test': y_test,
|
|
'y_pred_proba': y_pred_proba
|
|
}
|
|
|
|
return results
|
|
|
|
def save_model(self, model, file_path):
|
|
"""Save the trained model to a file"""
|
|
if isinstance(model, Sequential):
|
|
model.save(file_path)
|
|
else:
|
|
with open(file_path, 'wb') as f:
|
|
pickle.dump(model, f)
|
|
|
|
def load_model(self, file_path, model_type='sklearn'):
|
|
"""Load a trained model from a file"""
|
|
if model_type == 'keras':
|
|
from tensorflow.keras.models import load_model
|
|
return load_model(file_path)
|
|
else:
|
|
with open(file_path, 'rb') as f:
|
|
return pickle.load(f) |