Commit
·
3075c38
1
Parent(s):
cafbe14
update fix code GA feature selection
Browse files
features_ravdess.json
CHANGED
|
@@ -12,5 +12,5 @@
|
|
| 12 |
"mel": 0
|
| 13 |
},
|
| 14 |
"n_samples": 1440,
|
| 15 |
-
"extraction_date": "2025-10-
|
| 16 |
}
|
|
|
|
| 12 |
"mel": 0
|
| 13 |
},
|
| 14 |
"n_samples": 1440,
|
| 15 |
+
"extraction_date": "2025-10-05T12:24:04.258750"
|
| 16 |
}
|
src/__pycache__/genetic_algorithm.cpython-311.pyc
CHANGED
|
Binary files a/src/__pycache__/genetic_algorithm.cpython-311.pyc and b/src/__pycache__/genetic_algorithm.cpython-311.pyc differ
|
|
|
src/__pycache__/training.cpython-311.pyc
CHANGED
|
Binary files a/src/__pycache__/training.cpython-311.pyc and b/src/__pycache__/training.cpython-311.pyc differ
|
|
|
src/genetic_algorithm.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
Genetic Algorithm for feature selection and hyperparameter optimization
|
| 3 |
-
Supports AdaBoost algorithm selection and variable MFCC counts
|
| 4 |
"""
|
| 5 |
|
| 6 |
import numpy as np
|
|
@@ -17,7 +16,6 @@ from sklearn.metrics import accuracy_score
|
|
| 17 |
|
| 18 |
import config
|
| 19 |
|
| 20 |
-
# Suppress LightGBM warnings
|
| 21 |
warnings.filterwarnings(
|
| 22 |
'ignore', message='X does not have valid feature names')
|
| 23 |
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
|
@@ -26,18 +24,38 @@ warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
|
| 26 |
class GeneticAlgorithm:
|
| 27 |
"""GA for optimizing features + hyperparameters + ensemble weights"""
|
| 28 |
|
| 29 |
-
def __init__(self, X: np.ndarray, y: np.ndarray,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
self.X = X
|
| 31 |
self.y = y
|
| 32 |
self.n_features = X.shape[1]
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
print(
|
| 37 |
-
f"⚠️ Adjusted: {n_features_to_select} → {self.n_features} features")
|
| 38 |
self.n_select = self.n_features
|
|
|
|
|
|
|
| 39 |
else:
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
self.n_classes = len(np.unique(y))
|
| 43 |
|
|
@@ -63,14 +81,22 @@ class GeneticAlgorithm:
|
|
| 63 |
print(log_entry)
|
| 64 |
|
| 65 |
def create_chromosome(self) -> Dict:
|
| 66 |
-
"""Create random chromosome
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
))
|
| 71 |
-
}
|
| 72 |
|
| 73 |
-
# Add
|
| 74 |
for model_prefix, params in config.MODEL_HYPERPARAMS.items():
|
| 75 |
for param_name, param_values in params.items():
|
| 76 |
key = f"{model_prefix}_{param_name}"
|
|
@@ -87,23 +113,16 @@ class GeneticAlgorithm:
|
|
| 87 |
|
| 88 |
def fitness(self, chromosome: Dict, X_train: np.ndarray, y_train: np.ndarray,
|
| 89 |
X_val: np.ndarray, y_val: np.ndarray) -> float:
|
| 90 |
-
"""
|
| 91 |
-
Calculate fitness using validation accuracy
|
| 92 |
-
|
| 93 |
-
Now optimizes AdaBoost algorithm ('SAMME' vs 'SAMME.R')
|
| 94 |
-
"""
|
| 95 |
try:
|
| 96 |
feature_indices = chromosome['feature_indices']
|
| 97 |
|
| 98 |
-
# Keep as NumPy arrays - FAST and efficient
|
| 99 |
X_train_selected = X_train[:, feature_indices]
|
| 100 |
X_val_selected = X_val[:, feature_indices]
|
| 101 |
|
| 102 |
models = []
|
| 103 |
|
| 104 |
-
# ================================================================
|
| 105 |
# XGBoost
|
| 106 |
-
# ================================================================
|
| 107 |
xgb = XGBClassifier(
|
| 108 |
n_estimators=chromosome.get('xgb_n_estimators', 100),
|
| 109 |
max_depth=chromosome.get('xgb_max_depth', 6),
|
|
@@ -121,9 +140,7 @@ class GeneticAlgorithm:
|
|
| 121 |
xgb.fit(X_train_selected, y_train)
|
| 122 |
models.append(xgb)
|
| 123 |
|
| 124 |
-
# ================================================================
|
| 125 |
# LightGBM
|
| 126 |
-
# ================================================================
|
| 127 |
lgbm = LGBMClassifier(
|
| 128 |
n_estimators=chromosome.get('lgbm_n_estimators', 100),
|
| 129 |
num_leaves=chromosome.get('lgbm_num_leaves', 31),
|
|
@@ -143,9 +160,7 @@ class GeneticAlgorithm:
|
|
| 143 |
lgbm.fit(X_train_selected, y_train)
|
| 144 |
models.append(lgbm)
|
| 145 |
|
| 146 |
-
# ================================================================
|
| 147 |
# Gradient Boosting
|
| 148 |
-
# ================================================================
|
| 149 |
gb = GradientBoostingClassifier(
|
| 150 |
n_estimators=chromosome.get('gb_n_estimators', 100),
|
| 151 |
max_depth=chromosome.get('gb_max_depth', 5),
|
|
@@ -158,23 +173,17 @@ class GeneticAlgorithm:
|
|
| 158 |
gb.fit(X_train_selected, y_train)
|
| 159 |
models.append(gb)
|
| 160 |
|
| 161 |
-
#
|
| 162 |
-
# AdaBoost - NOW WITH ALGORITHM OPTIMIZATION
|
| 163 |
-
# ================================================================
|
| 164 |
-
ada_algorithm = chromosome.get(
|
| 165 |
-
'ada_algorithm', 'SAMME') # ← GA optimizes this!
|
| 166 |
-
|
| 167 |
ada = AdaBoostClassifier(
|
| 168 |
n_estimators=chromosome.get('ada_n_estimators', 100),
|
| 169 |
learning_rate=chromosome.get('ada_learning_rate', 1.0),
|
|
|
|
| 170 |
random_state=config.RANDOM_STATE
|
| 171 |
)
|
| 172 |
ada.fit(X_train_selected, y_train)
|
| 173 |
models.append(ada)
|
| 174 |
|
| 175 |
-
#
|
| 176 |
-
# Ensemble Prediction
|
| 177 |
-
# ================================================================
|
| 178 |
predictions = [model.predict_proba(
|
| 179 |
X_val_selected) for model in models]
|
| 180 |
weights = chromosome['weights']
|
|
@@ -198,31 +207,39 @@ class GeneticAlgorithm:
|
|
| 198 |
child1 = {}
|
| 199 |
child2 = {}
|
| 200 |
|
| 201 |
-
# Feature crossover
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
# Hyperparameter crossover
|
| 226 |
for key in parent1.keys():
|
| 227 |
if key != 'feature_indices':
|
| 228 |
if random.random() < 0.5:
|
|
@@ -238,21 +255,23 @@ class GeneticAlgorithm:
|
|
| 238 |
"""Mutation operation"""
|
| 239 |
mutated = chromosome.copy()
|
| 240 |
|
| 241 |
-
# Feature mutation
|
| 242 |
-
if
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
|
|
|
| 246 |
|
| 247 |
-
|
| 248 |
-
new_feat = random.randint(0, self.n_features - 1)
|
| 249 |
-
while new_feat in mutated['feature_indices']:
|
| 250 |
new_feat = random.randint(0, self.n_features - 1)
|
| 251 |
-
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
|
|
|
|
| 254 |
|
| 255 |
-
# Hyperparameter mutation
|
| 256 |
if random.random() < self.mutation_rate:
|
| 257 |
param_keys = [k for k in chromosome.keys() if k not in [
|
| 258 |
'feature_indices', 'weights']]
|
|
@@ -286,28 +305,17 @@ class GeneticAlgorithm:
|
|
| 286 |
X_val: np.ndarray, y_val: np.ndarray,
|
| 287 |
progress_callback: Optional[Callable] = None,
|
| 288 |
n_jobs: int = 2) -> Dict:
|
| 289 |
-
"""
|
| 290 |
-
Main GA evolution loop with parallel evaluation, early stopping, and logging
|
| 291 |
-
|
| 292 |
-
Args:
|
| 293 |
-
X_train, y_train: Training data (NumPy arrays)
|
| 294 |
-
X_val, y_val: Validation data (NumPy arrays)
|
| 295 |
-
progress_callback: Optional callback for progress updates
|
| 296 |
-
n_jobs: Number of parallel jobs
|
| 297 |
-
|
| 298 |
-
Returns:
|
| 299 |
-
Best chromosome found
|
| 300 |
-
"""
|
| 301 |
|
| 302 |
self.log("="*70)
|
| 303 |
self.log("🧬 GENETIC ALGORITHM OPTIMIZATION")
|
| 304 |
self.log("="*70)
|
| 305 |
self.log(f"Population size: {self.population_size}")
|
| 306 |
self.log(f"Generations: {self.n_generations}")
|
| 307 |
-
self.log(
|
|
|
|
| 308 |
self.log(f"Early stopping patience: {self.early_stopping_patience}")
|
| 309 |
self.log(f"Parallel jobs: {n_jobs}")
|
| 310 |
-
self.log(f"Optimizing AdaBoost algorithm: SAMME vs SAMME.R")
|
| 311 |
self.log("="*70)
|
| 312 |
|
| 313 |
population = [self.create_chromosome()
|
|
@@ -339,13 +347,8 @@ class GeneticAlgorithm:
|
|
| 339 |
self.best_chromosome = population[max_idx].copy()
|
| 340 |
no_improve_count = 0
|
| 341 |
improved = True
|
| 342 |
-
|
| 343 |
-
# Log best configuration
|
| 344 |
-
best_ada_algo = self.best_chromosome.get(
|
| 345 |
-
'ada_algorithm', 'SAMME')
|
| 346 |
self.log(
|
| 347 |
f" ✨ NEW BEST: {max_fitness:.4f} (+{max_fitness - prev_best:.4f})")
|
| 348 |
-
self.log(f" AdaBoost algorithm: {best_ada_algo}")
|
| 349 |
else:
|
| 350 |
no_improve_count += 1
|
| 351 |
self.log(
|
|
@@ -421,11 +424,6 @@ class GeneticAlgorithm:
|
|
| 421 |
self.log(f"Total time: {total_time/60:.1f} minutes")
|
| 422 |
self.log(
|
| 423 |
f"Average time per generation: {total_time/len(self.history):.1f}s")
|
| 424 |
-
|
| 425 |
-
if self.best_chromosome:
|
| 426 |
-
self.log(
|
| 427 |
-
f"\n🎯 Best AdaBoost Algorithm: {self.best_chromosome.get('ada_algorithm', 'SAMME')}")
|
| 428 |
-
|
| 429 |
self.log("="*70)
|
| 430 |
|
| 431 |
if self.best_chromosome is None:
|
|
|
|
| 1 |
"""
|
| 2 |
Genetic Algorithm for feature selection and hyperparameter optimization
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import numpy as np
|
|
|
|
| 16 |
|
| 17 |
import config
|
| 18 |
|
|
|
|
| 19 |
warnings.filterwarnings(
|
| 20 |
'ignore', message='X does not have valid feature names')
|
| 21 |
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
|
|
|
| 24 |
class GeneticAlgorithm:
|
| 25 |
"""GA for optimizing features + hyperparameters + ensemble weights"""
|
| 26 |
|
| 27 |
+
def __init__(self, X: np.ndarray, y: np.ndarray,
|
| 28 |
+
n_features_to_select: int = 80,
|
| 29 |
+
skip_feature_selection: bool = False):
|
| 30 |
+
"""
|
| 31 |
+
Initialize GA
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
X: Training data
|
| 35 |
+
y: Training labels
|
| 36 |
+
n_features_to_select: Number of features to select
|
| 37 |
+
skip_feature_selection: If True, use all features (only optimize hyperparams)
|
| 38 |
+
"""
|
| 39 |
self.X = X
|
| 40 |
self.y = y
|
| 41 |
self.n_features = X.shape[1]
|
| 42 |
+
self.skip_feature_selection = skip_feature_selection
|
| 43 |
|
| 44 |
+
if skip_feature_selection:
|
| 45 |
+
# Use ALL features, no selection
|
|
|
|
|
|
|
| 46 |
self.n_select = self.n_features
|
| 47 |
+
print(
|
| 48 |
+
f"✅ GA will optimize: HYPERPARAMETERS ONLY (using all {self.n_features} features)")
|
| 49 |
else:
|
| 50 |
+
# GA selects features
|
| 51 |
+
if n_features_to_select > self.n_features:
|
| 52 |
+
print(
|
| 53 |
+
f"⚠️ Adjusted: {n_features_to_select} → {self.n_features} features")
|
| 54 |
+
self.n_select = self.n_features
|
| 55 |
+
else:
|
| 56 |
+
self.n_select = n_features_to_select
|
| 57 |
+
print(
|
| 58 |
+
f"✅ GA will optimize: FEATURES ({self.n_select}/{self.n_features}) + HYPERPARAMETERS")
|
| 59 |
|
| 60 |
self.n_classes = len(np.unique(y))
|
| 61 |
|
|
|
|
| 81 |
print(log_entry)
|
| 82 |
|
| 83 |
def create_chromosome(self) -> Dict:
|
| 84 |
+
"""Create random chromosome"""
|
| 85 |
+
|
| 86 |
+
chromosome = {}
|
| 87 |
+
|
| 88 |
+
# Feature selection (skip if not optimizing features)
|
| 89 |
+
if self.skip_feature_selection:
|
| 90 |
+
# Use ALL features
|
| 91 |
+
chromosome['feature_indices'] = np.arange(self.n_features)
|
| 92 |
+
else:
|
| 93 |
+
# Select random subset
|
| 94 |
+
n_to_select = min(self.n_select, self.n_features)
|
| 95 |
+
chromosome['feature_indices'] = np.sort(np.random.choice(
|
| 96 |
+
self.n_features, n_to_select, replace=False
|
| 97 |
))
|
|
|
|
| 98 |
|
| 99 |
+
# Add hyperparameters for each model
|
| 100 |
for model_prefix, params in config.MODEL_HYPERPARAMS.items():
|
| 101 |
for param_name, param_values in params.items():
|
| 102 |
key = f"{model_prefix}_{param_name}"
|
|
|
|
| 113 |
|
| 114 |
def fitness(self, chromosome: Dict, X_train: np.ndarray, y_train: np.ndarray,
|
| 115 |
X_val: np.ndarray, y_val: np.ndarray) -> float:
|
| 116 |
+
"""Calculate fitness using validation accuracy"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
try:
|
| 118 |
feature_indices = chromosome['feature_indices']
|
| 119 |
|
|
|
|
| 120 |
X_train_selected = X_train[:, feature_indices]
|
| 121 |
X_val_selected = X_val[:, feature_indices]
|
| 122 |
|
| 123 |
models = []
|
| 124 |
|
|
|
|
| 125 |
# XGBoost
|
|
|
|
| 126 |
xgb = XGBClassifier(
|
| 127 |
n_estimators=chromosome.get('xgb_n_estimators', 100),
|
| 128 |
max_depth=chromosome.get('xgb_max_depth', 6),
|
|
|
|
| 140 |
xgb.fit(X_train_selected, y_train)
|
| 141 |
models.append(xgb)
|
| 142 |
|
|
|
|
| 143 |
# LightGBM
|
|
|
|
| 144 |
lgbm = LGBMClassifier(
|
| 145 |
n_estimators=chromosome.get('lgbm_n_estimators', 100),
|
| 146 |
num_leaves=chromosome.get('lgbm_num_leaves', 31),
|
|
|
|
| 160 |
lgbm.fit(X_train_selected, y_train)
|
| 161 |
models.append(lgbm)
|
| 162 |
|
|
|
|
| 163 |
# Gradient Boosting
|
|
|
|
| 164 |
gb = GradientBoostingClassifier(
|
| 165 |
n_estimators=chromosome.get('gb_n_estimators', 100),
|
| 166 |
max_depth=chromosome.get('gb_max_depth', 5),
|
|
|
|
| 173 |
gb.fit(X_train_selected, y_train)
|
| 174 |
models.append(gb)
|
| 175 |
|
| 176 |
+
# AdaBoost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
ada = AdaBoostClassifier(
|
| 178 |
n_estimators=chromosome.get('ada_n_estimators', 100),
|
| 179 |
learning_rate=chromosome.get('ada_learning_rate', 1.0),
|
| 180 |
+
# algorithm=config.ADABOOST_ALGORITHM,
|
| 181 |
random_state=config.RANDOM_STATE
|
| 182 |
)
|
| 183 |
ada.fit(X_train_selected, y_train)
|
| 184 |
models.append(ada)
|
| 185 |
|
| 186 |
+
# Ensemble prediction
|
|
|
|
|
|
|
| 187 |
predictions = [model.predict_proba(
|
| 188 |
X_val_selected) for model in models]
|
| 189 |
weights = chromosome['weights']
|
|
|
|
| 207 |
child1 = {}
|
| 208 |
child2 = {}
|
| 209 |
|
| 210 |
+
# Feature crossover (only if not skipping feature selection)
|
| 211 |
+
if self.skip_feature_selection:
|
| 212 |
+
# Keep all features
|
| 213 |
+
child1['feature_indices'] = parent1['feature_indices'].copy()
|
| 214 |
+
child2['feature_indices'] = parent2['feature_indices'].copy()
|
| 215 |
+
else:
|
| 216 |
+
# Crossover features
|
| 217 |
+
mask = np.random.rand(self.n_select) < 0.5
|
| 218 |
+
child1_features = np.where(
|
| 219 |
+
mask, parent1['feature_indices'], parent2['feature_indices'])
|
| 220 |
+
child2_features = np.where(
|
| 221 |
+
mask, parent2['feature_indices'], parent1['feature_indices'])
|
| 222 |
+
|
| 223 |
+
child1_features = np.unique(child1_features)
|
| 224 |
+
child2_features = np.unique(child2_features)
|
| 225 |
+
|
| 226 |
+
# Fill to required size
|
| 227 |
+
while len(child1_features) < self.n_select:
|
| 228 |
+
new_feat = random.randint(0, self.n_features - 1)
|
| 229 |
+
if new_feat not in child1_features:
|
| 230 |
+
child1_features = np.append(child1_features, new_feat)
|
| 231 |
|
| 232 |
+
while len(child2_features) < self.n_select:
|
| 233 |
+
new_feat = random.randint(0, self.n_features - 1)
|
| 234 |
+
if new_feat not in child2_features:
|
| 235 |
+
child2_features = np.append(child2_features, new_feat)
|
| 236 |
|
| 237 |
+
child1['feature_indices'] = np.sort(
|
| 238 |
+
child1_features[:self.n_select])
|
| 239 |
+
child2['feature_indices'] = np.sort(
|
| 240 |
+
child2_features[:self.n_select])
|
| 241 |
|
| 242 |
+
# Hyperparameter crossover
|
| 243 |
for key in parent1.keys():
|
| 244 |
if key != 'feature_indices':
|
| 245 |
if random.random() < 0.5:
|
|
|
|
| 255 |
"""Mutation operation"""
|
| 256 |
mutated = chromosome.copy()
|
| 257 |
|
| 258 |
+
# Feature mutation (only if not skipping feature selection)
|
| 259 |
+
if not self.skip_feature_selection:
|
| 260 |
+
if random.random() < self.mutation_rate:
|
| 261 |
+
n_replace = random.randint(1, min(5, self.n_select))
|
| 262 |
+
indices_to_replace = np.random.choice(
|
| 263 |
+
self.n_select, n_replace, replace=False)
|
| 264 |
|
| 265 |
+
for idx in indices_to_replace:
|
|
|
|
|
|
|
| 266 |
new_feat = random.randint(0, self.n_features - 1)
|
| 267 |
+
while new_feat in mutated['feature_indices']:
|
| 268 |
+
new_feat = random.randint(0, self.n_features - 1)
|
| 269 |
+
mutated['feature_indices'][idx] = new_feat
|
| 270 |
|
| 271 |
+
mutated['feature_indices'] = np.sort(
|
| 272 |
+
mutated['feature_indices'])
|
| 273 |
|
| 274 |
+
# Hyperparameter mutation
|
| 275 |
if random.random() < self.mutation_rate:
|
| 276 |
param_keys = [k for k in chromosome.keys() if k not in [
|
| 277 |
'feature_indices', 'weights']]
|
|
|
|
| 305 |
X_val: np.ndarray, y_val: np.ndarray,
|
| 306 |
progress_callback: Optional[Callable] = None,
|
| 307 |
n_jobs: int = 2) -> Dict:
|
| 308 |
+
"""Main GA evolution loop"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
self.log("="*70)
|
| 311 |
self.log("🧬 GENETIC ALGORITHM OPTIMIZATION")
|
| 312 |
self.log("="*70)
|
| 313 |
self.log(f"Population size: {self.population_size}")
|
| 314 |
self.log(f"Generations: {self.n_generations}")
|
| 315 |
+
self.log(
|
| 316 |
+
f"Feature selection: {'DISABLED (hyperparams only)' if self.skip_feature_selection else f'ENABLED ({self.n_select}/{self.n_features})'}")
|
| 317 |
self.log(f"Early stopping patience: {self.early_stopping_patience}")
|
| 318 |
self.log(f"Parallel jobs: {n_jobs}")
|
|
|
|
| 319 |
self.log("="*70)
|
| 320 |
|
| 321 |
population = [self.create_chromosome()
|
|
|
|
| 347 |
self.best_chromosome = population[max_idx].copy()
|
| 348 |
no_improve_count = 0
|
| 349 |
improved = True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
self.log(
|
| 351 |
f" ✨ NEW BEST: {max_fitness:.4f} (+{max_fitness - prev_best:.4f})")
|
|
|
|
| 352 |
else:
|
| 353 |
no_improve_count += 1
|
| 354 |
self.log(
|
|
|
|
| 424 |
self.log(f"Total time: {total_time/60:.1f} minutes")
|
| 425 |
self.log(
|
| 426 |
f"Average time per generation: {total_time/len(self.history):.1f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
self.log("="*70)
|
| 428 |
|
| 429 |
if self.best_chromosome is None:
|
src/training.py
CHANGED
|
@@ -16,9 +16,13 @@ from sklearn.metrics import accuracy_score, classification_report, confusion_mat
|
|
| 16 |
from xgboost import XGBClassifier
|
| 17 |
from lightgbm import LGBMClassifier
|
| 18 |
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
|
| 19 |
-
|
| 20 |
import config
|
| 21 |
from src.genetic_algorithm import GeneticAlgorithm
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def train_models_with_ga(use_ga: bool = True,
|
|
@@ -203,7 +207,8 @@ def _train_with_cross_validation(X, y_encoded, label_encoder, n_classes,
|
|
| 203 |
desc=f"Fold {fold_idx}/{n_folds}: Running GA optimization...")
|
| 204 |
|
| 205 |
ga = GeneticAlgorithm(X_train_ga, y_train_ga,
|
| 206 |
-
n_features_to_select=n_features_select
|
|
|
|
| 207 |
ga.population_size = ga_population
|
| 208 |
ga.n_generations = ga_generations
|
| 209 |
|
|
@@ -701,7 +706,7 @@ def _train_all_models(X_train, y_train, X_test, y_test, n_classes, config_dict):
|
|
| 701 |
ada = AdaBoostClassifier(
|
| 702 |
n_estimators=config_dict['ada_n_estimators'],
|
| 703 |
learning_rate=config_dict['ada_learning_rate'],
|
| 704 |
-
algorithm=config.ADABOOST_ALGORITHM,
|
| 705 |
random_state=config.RANDOM_STATE
|
| 706 |
)
|
| 707 |
ada.fit(X_train, y_train)
|
|
@@ -772,7 +777,7 @@ def _train_all_models_default(X_train, y_train, X_test, y_test, n_classes,
|
|
| 772 |
ada = AdaBoostClassifier(
|
| 773 |
n_estimators=100,
|
| 774 |
learning_rate=1.0,
|
| 775 |
-
algorithm=config.ADABOOST_ALGORITHM,
|
| 776 |
random_state=config.RANDOM_STATE
|
| 777 |
)
|
| 778 |
ada.fit(X_train, y_train)
|
|
|
|
| 16 |
from xgboost import XGBClassifier
|
| 17 |
from lightgbm import LGBMClassifier
|
| 18 |
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
|
| 19 |
+
import warnings
|
| 20 |
import config
|
| 21 |
from src.genetic_algorithm import GeneticAlgorithm
|
| 22 |
+
# Suppress LightGBM feature name warnings
|
| 23 |
+
warnings.filterwarnings(
|
| 24 |
+
'ignore', message='X does not have valid feature names')
|
| 25 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
| 26 |
|
| 27 |
|
| 28 |
def train_models_with_ga(use_ga: bool = True,
|
|
|
|
| 207 |
desc=f"Fold {fold_idx}/{n_folds}: Running GA optimization...")
|
| 208 |
|
| 209 |
ga = GeneticAlgorithm(X_train_ga, y_train_ga,
|
| 210 |
+
n_features_to_select=n_features_select,
|
| 211 |
+
skip_feature_selection=(not optimize_features) or (n_features_select == n_features_available))
|
| 212 |
ga.population_size = ga_population
|
| 213 |
ga.n_generations = ga_generations
|
| 214 |
|
|
|
|
| 706 |
ada = AdaBoostClassifier(
|
| 707 |
n_estimators=config_dict['ada_n_estimators'],
|
| 708 |
learning_rate=config_dict['ada_learning_rate'],
|
| 709 |
+
# algorithm=config.ADABOOST_ALGORITHM,
|
| 710 |
random_state=config.RANDOM_STATE
|
| 711 |
)
|
| 712 |
ada.fit(X_train, y_train)
|
|
|
|
| 777 |
ada = AdaBoostClassifier(
|
| 778 |
n_estimators=100,
|
| 779 |
learning_rate=1.0,
|
| 780 |
+
# algorithm=config.ADABOOST_ALGORITHM,
|
| 781 |
random_state=config.RANDOM_STATE
|
| 782 |
)
|
| 783 |
ada.fit(X_train, y_train)
|