Ahmedik95316 commited on
Commit
5cb20e9
·
1 Parent(s): 6b4cc07

Create uncertainty_quantification.py

Browse files

Enhanced Uncertainty Quantification (`utils/uncertainty_quantification.py`)
- Model performance uncertainty quantification
- Feature importance uncertainty with stability rankings
- Prediction-level uncertainty assessment using entropy
- Cross-validation stability analysis
- Comprehensive uncertainty reporting with actionable recommendations

Files changed (1) hide show
  1. utils/uncertainty_quantification.py +775 -0
utils/uncertainty_quantification.py ADDED
@@ -0,0 +1,775 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/uncertainty_quantification.py
2
+ # Enhanced uncertainty quantification integration for existing MLOps pipeline
3
+
4
+ import numpy as np
5
+ from typing import Dict, Any, Tuple, Optional, List, Callable
6
+ from pathlib import Path
7
+ import json
8
+ from datetime import datetime
9
+ from dataclasses import dataclass
10
+ import logging
11
+
12
+ # Import statistical analysis components
13
+ try:
14
+ from .statistical_analysis import (
15
+ MLOpsStatisticalAnalyzer, BootstrapAnalyzer,
16
+ FeatureImportanceAnalyzer, StatisticalResult
17
+ )
18
+ STATISTICAL_ANALYSIS_AVAILABLE = True
19
+ except ImportError:
20
+ STATISTICAL_ANALYSIS_AVAILABLE = False
21
+ logging.warning("Statistical analysis components not available")
22
+
23
+ # Import structured logging
24
+ try:
25
+ from .structured_logger import StructuredLogger, EventType, MLOpsLoggers
26
+ STRUCTURED_LOGGING_AVAILABLE = True
27
+ except ImportError:
28
+ STRUCTURED_LOGGING_AVAILABLE = False
29
+ import logging
30
+
31
+
32
+ @dataclass
33
+ class UncertaintyReport:
34
+ """Comprehensive uncertainty quantification report"""
35
+ model_performance_uncertainty: Dict[str, Any]
36
+ feature_importance_uncertainty: Dict[str, Any]
37
+ cross_validation_uncertainty: Dict[str, Any]
38
+ prediction_uncertainty: Dict[str, Any]
39
+ model_comparison_uncertainty: Dict[str, Any]
40
+ recommendations: List[Dict[str, Any]]
41
+ confidence_level: float
42
+ analysis_timestamp: str
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ """Convert to dictionary for serialization"""
46
+ return {
47
+ 'model_performance_uncertainty': self.model_performance_uncertainty,
48
+ 'feature_importance_uncertainty': self.feature_importance_uncertainty,
49
+ 'cross_validation_uncertainty': self.cross_validation_uncertainty,
50
+ 'prediction_uncertainty': self.prediction_uncertainty,
51
+ 'model_comparison_uncertainty': self.model_comparison_uncertainty,
52
+ 'recommendations': self.recommendations,
53
+ 'confidence_level': self.confidence_level,
54
+ 'analysis_timestamp': self.analysis_timestamp
55
+ }
56
+
57
+ def save_report(self, file_path: Path = None) -> Path:
58
+ """Save uncertainty report to file"""
59
+ if file_path is None:
60
+ file_path = Path("/tmp/logs/uncertainty_report.json")
61
+
62
+ file_path.parent.mkdir(parents=True, exist_ok=True)
63
+
64
+ with open(file_path, 'w') as f:
65
+ json.dump(self.to_dict(), f, indent=2, default=str)
66
+
67
+ return file_path
68
+
69
+
70
+ class EnhancedUncertaintyQuantifier:
71
+ """Enhanced uncertainty quantification for MLOps pipeline integration"""
72
+
73
+ def __init__(self,
74
+ confidence_level: float = 0.95,
75
+ n_bootstrap: int = 1000,
76
+ random_state: int = 42):
77
+
78
+ self.confidence_level = confidence_level
79
+ self.n_bootstrap = n_bootstrap
80
+ self.random_state = random_state
81
+
82
+ if STATISTICAL_ANALYSIS_AVAILABLE:
83
+ self.statistical_analyzer = MLOpsStatisticalAnalyzer(
84
+ confidence_level, n_bootstrap, random_state
85
+ )
86
+ self.bootstrap_analyzer = BootstrapAnalyzer(n_bootstrap, confidence_level, random_state)
87
+ self.feature_analyzer = FeatureImportanceAnalyzer(n_bootstrap, confidence_level, random_state)
88
+ else:
89
+ raise ImportError("Statistical analysis components required for uncertainty quantification")
90
+
91
+ if STRUCTURED_LOGGING_AVAILABLE:
92
+ self.logger = MLOpsLoggers.get_logger('uncertainty_quantification')
93
+ else:
94
+ self.logger = logging.getLogger(__name__)
95
+
96
+ def quantify_model_uncertainty(self,
97
+ model,
98
+ X_train: np.ndarray,
99
+ X_test: np.ndarray,
100
+ y_train: np.ndarray,
101
+ y_test: np.ndarray,
102
+ model_name: str = "model") -> Dict[str, Any]:
103
+ """Quantify uncertainty in model performance metrics"""
104
+
105
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
106
+
107
+ # Fit model
108
+ model.fit(X_train, y_train)
109
+ y_pred = model.predict(X_test)
110
+ y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else y_pred
111
+
112
+ # Define metric functions
113
+ metrics = {
114
+ 'accuracy': lambda y_true, y_pred: accuracy_score(y_true, y_pred),
115
+ 'f1': lambda y_true, y_pred: f1_score(y_true, y_pred, average='weighted'),
116
+ 'precision': lambda y_true, y_pred: precision_score(y_true, y_pred, average='weighted'),
117
+ 'recall': lambda y_true, y_pred: recall_score(y_true, y_pred, average='weighted'),
118
+ 'roc_auc': lambda y_true, y_pred_proba: roc_auc_score(y_true, y_pred_proba)
119
+ }
120
+
121
+ # Bootstrap confidence intervals for each metric
122
+ uncertainty_results = {}
123
+
124
+ for metric_name, metric_func in metrics.items():
125
+ try:
126
+ if metric_name == 'roc_auc':
127
+ result = self.bootstrap_analyzer.bootstrap_metric(
128
+ y_test, y_pred_proba, metric_func
129
+ )
130
+ else:
131
+ result = self.bootstrap_analyzer.bootstrap_metric(
132
+ y_test, y_pred, metric_func
133
+ )
134
+
135
+ uncertainty_results[metric_name] = {
136
+ 'point_estimate': result.point_estimate,
137
+ 'confidence_interval': result.confidence_interval,
138
+ 'margin_of_error': result.margin_of_error(),
139
+ 'relative_uncertainty': result.margin_of_error() / result.point_estimate if result.point_estimate > 0 else np.inf,
140
+ 'confidence_level': result.confidence_level,
141
+ 'sample_size': result.sample_size,
142
+ 'metadata': result.metadata
143
+ }
144
+
145
+ except Exception as e:
146
+ uncertainty_results[metric_name] = {'error': str(e)}
147
+
148
+ # Overall uncertainty assessment
149
+ valid_uncertainties = [
150
+ r['relative_uncertainty'] for r in uncertainty_results.values()
151
+ if isinstance(r, dict) and 'relative_uncertainty' in r and np.isfinite(r['relative_uncertainty'])
152
+ ]
153
+
154
+ overall_assessment = {
155
+ 'model_name': model_name,
156
+ 'average_relative_uncertainty': float(np.mean(valid_uncertainties)) if valid_uncertainties else np.inf,
157
+ 'max_relative_uncertainty': float(np.max(valid_uncertainties)) if valid_uncertainties else np.inf,
158
+ 'uncertainty_level': self._classify_uncertainty_level(np.mean(valid_uncertainties)) if valid_uncertainties else 'unknown'
159
+ }
160
+
161
+ return {
162
+ 'metric_uncertainties': uncertainty_results,
163
+ 'overall_assessment': overall_assessment,
164
+ 'analysis_metadata': {
165
+ 'confidence_level': self.confidence_level,
166
+ 'n_bootstrap': self.n_bootstrap,
167
+ 'test_size': len(y_test),
168
+ 'train_size': len(y_train)
169
+ }
170
+ }
171
+
172
+ def quantify_feature_importance_uncertainty(self,
173
+ model,
174
+ X: np.ndarray,
175
+ y: np.ndarray,
176
+ feature_names: List[str] = None) -> Dict[str, Any]:
177
+ """Quantify uncertainty in feature importance rankings"""
178
+
179
+ try:
180
+ # Analyze feature importance stability
181
+ stability_results = self.feature_analyzer.analyze_importance_stability(
182
+ model, X, y, feature_names
183
+ )
184
+
185
+ # Extract uncertainty metrics
186
+ feature_uncertainties = {}
187
+ unstable_features = []
188
+
189
+ for feature_name, analysis in stability_results['feature_importance_analysis'].items():
190
+ cv = analysis['metadata']['coefficient_of_variation']
191
+
192
+ feature_uncertainties[feature_name] = {
193
+ 'importance_mean': analysis['point_estimate'],
194
+ 'importance_ci': analysis['confidence_interval'],
195
+ 'coefficient_of_variation': cv,
196
+ 'stability_rank': analysis['metadata']['stability_rank'],
197
+ 'uncertainty_level': self._classify_feature_uncertainty(cv)
198
+ }
199
+
200
+ # Flag highly uncertain features
201
+ if cv > 0.5: # 50% coefficient of variation threshold
202
+ unstable_features.append({
203
+ 'feature': feature_name,
204
+ 'cv': cv,
205
+ 'reason': 'High variance in importance across bootstrap samples'
206
+ })
207
+
208
+ return {
209
+ 'feature_importance_uncertainties': feature_uncertainties,
210
+ 'stability_ranking': stability_results['stability_ranking'],
211
+ 'unstable_features': unstable_features,
212
+ 'uncertainty_summary': {
213
+ 'total_features': len(feature_uncertainties),
214
+ 'unstable_features_count': len(unstable_features),
215
+ 'uncertainty_rate': len(unstable_features) / len(feature_uncertainties) if feature_uncertainties else 0
216
+ },
217
+ 'analysis_metadata': stability_results['analysis_metadata']
218
+ }
219
+
220
+ except Exception as e:
221
+ return {'error': str(e)}
222
+
223
+ def quantify_cross_validation_uncertainty(self,
224
+ model,
225
+ X: np.ndarray,
226
+ y: np.ndarray,
227
+ cv_folds: int = 5) -> Dict[str, Any]:
228
+ """Quantify uncertainty in cross-validation results"""
229
+
230
+ from sklearn.model_selection import cross_val_score, StratifiedKFold
231
+ from sklearn.metrics import f1_score, accuracy_score
232
+
233
+ try:
234
+ # Define CV strategy
235
+ cv_strategy = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=self.random_state)
236
+
237
+ # Comprehensive CV analysis with uncertainty quantification
238
+ metrics = {
239
+ 'accuracy': lambda y_true, y_pred: accuracy_score(y_true, y_pred),
240
+ 'f1': lambda y_true, y_pred: f1_score(y_true, y_pred, average='weighted')
241
+ }
242
+
243
+ cv_analysis = self.statistical_analyzer.cv_analyzer.comprehensive_cv_analysis(
244
+ model, X, y, metrics
245
+ )
246
+
247
+ # Extract uncertainty information
248
+ cv_uncertainties = {}
249
+
250
+ for metric_name, analysis in cv_analysis['metrics_analysis'].items():
251
+ test_scores = analysis['test_scores']
252
+
253
+ # Calculate additional uncertainty metrics
254
+ cv_coefficient = test_scores['std'] / test_scores['mean'] if test_scores['mean'] > 0 else np.inf
255
+
256
+ cv_uncertainties[metric_name] = {
257
+ 'cv_mean': test_scores['mean'],
258
+ 'cv_std': test_scores['std'],
259
+ 'cv_scores': test_scores['scores'],
260
+ 'coefficient_of_variation': cv_coefficient,
261
+ 'confidence_interval': test_scores['confidence_interval'],
262
+ 'stability_level': self._classify_cv_stability(cv_coefficient),
263
+ 'overfitting_analysis': analysis.get('overfitting_analysis', {}),
264
+ 'statistical_tests': analysis.get('statistical_tests', {})
265
+ }
266
+
267
+ return {
268
+ 'cv_uncertainties': cv_uncertainties,
269
+ 'cv_metadata': {
270
+ 'cv_folds': cv_folds,
271
+ 'sample_size': len(X),
272
+ 'confidence_level': self.confidence_level
273
+ },
274
+ 'stability_assessment': self._assess_cv_stability(cv_uncertainties)
275
+ }
276
+
277
+ except Exception as e:
278
+ return {'error': str(e)}
279
+
280
+ def quantify_prediction_uncertainty(self,
281
+ model,
282
+ X_new: np.ndarray,
283
+ n_bootstrap_predictions: int = 100) -> Dict[str, Any]:
284
+ """Quantify uncertainty in individual predictions using bootstrap"""
285
+
286
+ try:
287
+ # This requires the original training data - simplified version for demonstration
288
+ # In practice, you'd need to store bootstrap models or use other uncertainty methods
289
+
290
+ if hasattr(model, 'predict_proba'):
291
+ # For probabilistic models, use prediction probabilities as uncertainty proxy
292
+ probabilities = model.predict_proba(X_new)
293
+ predictions = model.predict(X_new)
294
+
295
+ # Calculate prediction uncertainty metrics
296
+ prediction_uncertainties = []
297
+
298
+ for i, (pred, proba) in enumerate(zip(predictions, probabilities)):
299
+ max_proba = np.max(proba)
300
+ entropy = -np.sum(proba * np.log(proba + 1e-8)) # Add small constant for numerical stability
301
+
302
+ uncertainty_info = {
303
+ 'prediction': int(pred),
304
+ 'prediction_probability': float(max_proba),
305
+ 'entropy': float(entropy),
306
+ 'uncertainty_level': self._classify_prediction_uncertainty(max_proba),
307
+ 'all_class_probabilities': proba.tolist()
308
+ }
309
+
310
+ prediction_uncertainties.append(uncertainty_info)
311
+
312
+ # Overall prediction uncertainty summary
313
+ avg_entropy = np.mean([p['entropy'] for p in prediction_uncertainties])
314
+ avg_confidence = np.mean([p['prediction_probability'] for p in prediction_uncertainties])
315
+
316
+ uncertain_predictions = sum(1 for p in prediction_uncertainties if p['uncertainty_level'] in ['high', 'very_high'])
317
+
318
+ return {
319
+ 'individual_predictions': prediction_uncertainties,
320
+ 'uncertainty_summary': {
321
+ 'total_predictions': len(prediction_uncertainties),
322
+ 'uncertain_predictions': uncertain_predictions,
323
+ 'uncertainty_rate': uncertain_predictions / len(prediction_uncertainties),
324
+ 'average_entropy': float(avg_entropy),
325
+ 'average_confidence': float(avg_confidence)
326
+ }
327
+ }
328
+ else:
329
+ return {
330
+ 'error': 'Model does not support probability predictions - uncertainty quantification limited'
331
+ }
332
+
333
+ except Exception as e:
334
+ return {'error': str(e)}
335
+
336
+ def comprehensive_uncertainty_analysis(self,
337
+ models: Dict[str, Any],
338
+ X_train: np.ndarray,
339
+ X_test: np.ndarray,
340
+ y_train: np.ndarray,
341
+ y_test: np.ndarray,
342
+ feature_names: List[str] = None) -> UncertaintyReport:
343
+ """Perform comprehensive uncertainty analysis across all components"""
344
+
345
+ # Model performance uncertainty
346
+ model_uncertainties = {}
347
+ for model_name, model in models.items():
348
+ model_uncertainties[model_name] = self.quantify_model_uncertainty(
349
+ model, X_train, X_test, y_train, y_test, model_name
350
+ )
351
+
352
+ # Feature importance uncertainty (using best model)
353
+ best_model_name = min(model_uncertainties.keys(),
354
+ key=lambda k: model_uncertainties[k]['overall_assessment']['average_relative_uncertainty'])
355
+ best_model = models[best_model_name]
356
+
357
+ feature_uncertainty = self.quantify_feature_importance_uncertainty(
358
+ best_model, X_train, y_train, feature_names
359
+ )
360
+
361
+ # Cross-validation uncertainty
362
+ cv_uncertainty = self.quantify_cross_validation_uncertainty(
363
+ best_model, X_train, y_train
364
+ )
365
+
366
+ # Prediction uncertainty on test set
367
+ prediction_uncertainty = self.quantify_prediction_uncertainty(
368
+ best_model, X_test
369
+ )
370
+
371
+ # Model comparison uncertainty
372
+ if len(models) > 1:
373
+ comparison_uncertainty = self._quantify_model_comparison_uncertainty(
374
+ models, X_train, y_train
375
+ )
376
+ else:
377
+ comparison_uncertainty = {'single_model': 'No comparison available'}
378
+
379
+ # Generate recommendations
380
+ recommendations = self._generate_uncertainty_recommendations(
381
+ model_uncertainties, feature_uncertainty, cv_uncertainty, prediction_uncertainty
382
+ )
383
+
384
+ return UncertaintyReport(
385
+ model_performance_uncertainty=model_uncertainties,
386
+ feature_importance_uncertainty=feature_uncertainty,
387
+ cross_validation_uncertainty=cv_uncertainty,
388
+ prediction_uncertainty=prediction_uncertainty,
389
+ model_comparison_uncertainty=comparison_uncertainty,
390
+ recommendations=recommendations,
391
+ confidence_level=self.confidence_level,
392
+ analysis_timestamp=datetime.now().isoformat()
393
+ )
394
+
395
+ def _quantify_model_comparison_uncertainty(self,
396
+ models: Dict[str, Any],
397
+ X: np.ndarray,
398
+ y: np.ndarray) -> Dict[str, Any]:
399
+ """Quantify uncertainty in model comparisons"""
400
+
401
+ try:
402
+ # Use comprehensive model comparison with statistical analysis
403
+ from sklearn.metrics import f1_score, accuracy_score
404
+
405
+ metrics = {
406
+ 'f1': lambda y_true, y_pred: f1_score(y_true, y_pred, average='weighted'),
407
+ 'accuracy': lambda y_true, y_pred: accuracy_score(y_true, y_pred)
408
+ }
409
+
410
+ comparison_results = self.statistical_analyzer.comparison_analyzer.comprehensive_model_comparison(
411
+ models, X, y, metrics
412
+ )
413
+
414
+ # Extract uncertainty information from comparisons
415
+ comparison_uncertainties = {}
416
+
417
+ for comparison_name, comparison_data in comparison_results.get('pairwise_comparisons', {}).items():
418
+ overall_comp = comparison_data.get('overall_comparison', {})
419
+
420
+ comparison_uncertainties[comparison_name] = {
421
+ 'improvement_rate': overall_comp.get('improvement_rate', 0),
422
+ 'significant_improvements': overall_comp.get('significant_improvements', 0),
423
+ 'total_comparisons': overall_comp.get('total_comparisons', 0),
424
+ 'recommendation': overall_comp.get('recommendation', 'No recommendation'),
425
+ 'uncertainty_level': self._classify_comparison_uncertainty(overall_comp.get('improvement_rate', 0))
426
+ }
427
+
428
+ # Overall comparison uncertainty
429
+ ranking = comparison_results.get('model_ranking', {})
430
+ ranking_uncertainty = self._assess_ranking_uncertainty(ranking)
431
+
432
+ return {
433
+ 'pairwise_comparison_uncertainties': comparison_uncertainties,
434
+ 'ranking_uncertainty': ranking_uncertainty,
435
+ 'comparison_metadata': comparison_results.get('analysis_metadata', {})
436
+ }
437
+
438
+ except Exception as e:
439
+ return {'error': str(e)}
440
+
441
+ def _classify_uncertainty_level(self, relative_uncertainty: float) -> str:
442
+ """Classify overall uncertainty level"""
443
+ if relative_uncertainty < 0.05:
444
+ return 'very_low'
445
+ elif relative_uncertainty < 0.1:
446
+ return 'low'
447
+ elif relative_uncertainty < 0.2:
448
+ return 'medium'
449
+ elif relative_uncertainty < 0.5:
450
+ return 'high'
451
+ else:
452
+ return 'very_high'
453
+
454
+ def _classify_feature_uncertainty(self, cv: float) -> str:
455
+ """Classify feature importance uncertainty"""
456
+ if cv < 0.2:
457
+ return 'stable'
458
+ elif cv < 0.5:
459
+ return 'moderately_stable'
460
+ elif cv < 1.0:
461
+ return 'unstable'
462
+ else:
463
+ return 'very_unstable'
464
+
465
+ def _classify_cv_stability(self, cv_coefficient: float) -> str:
466
+ """Classify cross-validation stability"""
467
+ if cv_coefficient < 0.1:
468
+ return 'very_stable'
469
+ elif cv_coefficient < 0.2:
470
+ return 'stable'
471
+ elif cv_coefficient < 0.3:
472
+ return 'moderately_stable'
473
+ else:
474
+ return 'unstable'
475
+
476
+ def _classify_prediction_uncertainty(self, max_probability: float) -> str:
477
+ """Classify individual prediction uncertainty"""
478
+ if max_probability > 0.95:
479
+ return 'very_low'
480
+ elif max_probability > 0.8:
481
+ return 'low'
482
+ elif max_probability > 0.6:
483
+ return 'medium'
484
+ elif max_probability > 0.5:
485
+ return 'high'
486
+ else:
487
+ return 'very_high'
488
+
489
+ def _classify_comparison_uncertainty(self, improvement_rate: float) -> str:
490
+ """Classify model comparison uncertainty"""
491
+ if improvement_rate > 0.8:
492
+ return 'very_confident'
493
+ elif improvement_rate > 0.6:
494
+ return 'confident'
495
+ elif improvement_rate > 0.4:
496
+ return 'moderate'
497
+ elif improvement_rate > 0.2:
498
+ return 'uncertain'
499
+ else:
500
+ return 'very_uncertain'
501
+
502
+ def _assess_cv_stability(self, cv_uncertainties: Dict[str, Any]) -> Dict[str, Any]:
503
+ """Assess overall cross-validation stability"""
504
+
505
+ stability_levels = [info.get('stability_level', 'unknown') for info in cv_uncertainties.values()]
506
+
507
+ stable_count = sum(1 for level in stability_levels if level in ['very_stable', 'stable'])
508
+
509
+ return {
510
+ 'stable_metrics': stable_count,
511
+ 'total_metrics': len(stability_levels),
512
+ 'stability_rate': stable_count / len(stability_levels) if stability_levels else 0,
513
+ 'overall_stability': 'stable' if stable_count / len(stability_levels) > 0.6 else 'unstable'
514
+ }
515
+
516
+ def _assess_ranking_uncertainty(self, ranking: Dict[str, Any]) -> Dict[str, Any]:
517
+ """Assess uncertainty in model ranking"""
518
+
519
+ if not ranking or 'ranking' not in ranking:
520
+ return {'uncertainty': 'unknown', 'reason': 'No ranking data available'}
521
+
522
+ ranking_data = ranking['ranking']
523
+
524
+ if len(ranking_data) < 2:
525
+ return {'uncertainty': 'low', 'reason': 'Only one model'}
526
+
527
+ # Check if top model is significantly better than others
528
+ top_model = ranking_data[0]
529
+ significantly_better_count = len(top_model.get('significantly_better_than', []))
530
+ total_other_models = len(ranking_data) - 1
531
+
532
+ if significantly_better_count == total_other_models:
533
+ return {
534
+ 'uncertainty': 'low',
535
+ 'reason': 'Top model significantly better than all others',
536
+ 'confidence': 'high'
537
+ }
538
+ elif significantly_better_count > total_other_models / 2:
539
+ return {
540
+ 'uncertainty': 'medium',
541
+ 'reason': 'Top model significantly better than some others',
542
+ 'confidence': 'medium'
543
+ }
544
+ else:
545
+ return {
546
+ 'uncertainty': 'high',
547
+ 'reason': 'No clear statistical winner among models',
548
+ 'confidence': 'low'
549
+ }
550
+
551
+ def _generate_uncertainty_recommendations(self,
552
+ model_uncertainties: Dict[str, Any],
553
+ feature_uncertainty: Dict[str, Any],
554
+ cv_uncertainty: Dict[str, Any],
555
+ prediction_uncertainty: Dict[str, Any]) -> List[Dict[str, Any]]:
556
+ """Generate actionable recommendations based on uncertainty analysis"""
557
+
558
+ recommendations = []
559
+
560
+ # Model performance uncertainty recommendations
561
+ for model_name, uncertainty in model_uncertainties.items():
562
+ overall_assessment = uncertainty.get('overall_assessment', {})
563
+ uncertainty_level = overall_assessment.get('uncertainty_level', 'unknown')
564
+
565
+ if uncertainty_level in ['high', 'very_high']:
566
+ recommendations.append({
567
+ 'type': 'model_performance',
568
+ 'priority': 'high',
569
+ 'model': model_name,
570
+ 'issue': f'High performance uncertainty ({uncertainty_level})',
571
+ 'action': 'Collect more training data or consider model regularization',
572
+ 'details': {
573
+ 'avg_relative_uncertainty': overall_assessment.get('average_relative_uncertainty', 0),
574
+ 'max_relative_uncertainty': overall_assessment.get('max_relative_uncertainty', 0)
575
+ }
576
+ })
577
+
578
+ # Feature importance uncertainty recommendations
579
+ unstable_features = feature_uncertainty.get('unstable_features', [])
580
+ if unstable_features:
581
+ recommendations.append({
582
+ 'type': 'feature_importance',
583
+ 'priority': 'medium',
584
+ 'issue': f'{len(unstable_features)} features have unstable importance rankings',
585
+ 'action': 'Review feature engineering and consider feature selection',
586
+ 'details': {
587
+ 'unstable_features': [f['feature'] for f in unstable_features],
588
+ 'uncertainty_rate': feature_uncertainty.get('uncertainty_summary', {}).get('uncertainty_rate', 0)
589
+ }
590
+ })
591
+
592
+ # Cross-validation stability recommendations
593
+ cv_stability = cv_uncertainty.get('stability_assessment', {})
594
+ if cv_stability.get('overall_stability') == 'unstable':
595
+ recommendations.append({
596
+ 'type': 'cross_validation',
597
+ 'priority': 'medium',
598
+ 'issue': 'Unstable cross-validation performance',
599
+ 'action': 'Check data quality, consider stratified sampling, or increase CV folds',
600
+ 'details': {
601
+ 'stability_rate': cv_stability.get('stability_rate', 0),
602
+ 'stable_metrics': cv_stability.get('stable_metrics', 0),
603
+ 'total_metrics': cv_stability.get('total_metrics', 0)
604
+ }
605
+ })
606
+
607
+ # Prediction uncertainty recommendations
608
+ pred_summary = prediction_uncertainty.get('uncertainty_summary', {})
609
+ uncertainty_rate = pred_summary.get('uncertainty_rate', 0)
610
+
611
+ if uncertainty_rate > 0.2: # More than 20% uncertain predictions
612
+ recommendations.append({
613
+ 'type': 'prediction_uncertainty',
614
+ 'priority': 'high',
615
+ 'issue': f'{uncertainty_rate:.1%} of predictions have high uncertainty',
616
+ 'action': 'Consider implementing prediction confidence thresholds or human review for uncertain cases',
617
+ 'details': {
618
+ 'uncertain_predictions': pred_summary.get('uncertain_predictions', 0),
619
+ 'total_predictions': pred_summary.get('total_predictions', 0),
620
+ 'average_confidence': pred_summary.get('average_confidence', 0)
621
+ }
622
+ })
623
+
624
+ return recommendations
625
+
626
+
627
+ # Integration functions for existing codebase
628
+ def integrate_uncertainty_quantification_with_retrain():
629
+ """Integration function for retrain.py"""
630
+
631
+ def enhanced_model_comparison_with_uncertainty(models_dict, X_train, X_test, y_train, y_test):
632
+ """Enhanced model comparison with comprehensive uncertainty quantification"""
633
+
634
+ try:
635
+ quantifier = EnhancedUncertaintyQuantifier()
636
+
637
+ # Perform comprehensive uncertainty analysis
638
+ uncertainty_report = quantifier.comprehensive_uncertainty_analysis(
639
+ models_dict, X_train, X_test, y_train, y_test
640
+ )
641
+
642
+ # Save uncertainty report
643
+ report_path = uncertainty_report.save_report()
644
+
645
+ # Extract promotion decision based on uncertainty analysis
646
+ model_uncertainties = uncertainty_report.model_performance_uncertainty
647
+
648
+ # Find model with lowest uncertainty
649
+ best_model_name = min(
650
+ model_uncertainties.keys(),
651
+ key=lambda k: model_uncertainties[k]['overall_assessment']['average_relative_uncertainty']
652
+ )
653
+
654
+ best_uncertainty = model_uncertainties[best_model_name]['overall_assessment']['average_relative_uncertainty']
655
+ uncertainty_level = model_uncertainties[best_model_name]['overall_assessment']['uncertainty_level']
656
+
657
+ # Decision logic incorporating uncertainty
658
+ promote_candidate = (
659
+ uncertainty_level in ['very_low', 'low', 'medium'] and
660
+ len(uncertainty_report.recommendations) <= 2
661
+ )
662
+
663
+ return {
664
+ 'recommended_model': best_model_name,
665
+ 'uncertainty_level': uncertainty_level,
666
+ 'average_uncertainty': best_uncertainty,
667
+ 'uncertainty_report': uncertainty_report.to_dict(),
668
+ 'report_path': str(report_path),
669
+ 'promote_candidate': promote_candidate,
670
+ 'recommendations': uncertainty_report.recommendations
671
+ }
672
+
673
+ except Exception as e:
674
+ return {'error': f'Uncertainty quantification failed: {str(e)}'}
675
+
676
+ return enhanced_model_comparison_with_uncertainty
677
+
678
+ def integrate_uncertainty_quantification_with_train():
679
+ """Integration function for train.py"""
680
+
681
+ def enhanced_ensemble_validation_with_uncertainty(individual_models, ensemble_model, X, y):
682
+ """Enhanced ensemble validation with uncertainty quantification"""
683
+
684
+ try:
685
+ from sklearn.model_selection import train_test_split
686
+
687
+ quantifier = EnhancedUncertaintyQuantifier()
688
+
689
+ # Prepare models for analysis
690
+ models_to_analyze = {**individual_models, 'ensemble': ensemble_model}
691
+
692
+ # Split data for uncertainty analysis
693
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
694
+
695
+ # Perform uncertainty analysis
696
+ uncertainty_report = quantifier.comprehensive_uncertainty_analysis(
697
+ models_to_analyze, X_train, X_test, y_train, y_test
698
+ )
699
+
700
+ # Determine ensemble recommendation based on uncertainty
701
+ ensemble_uncertainty = uncertainty_report.model_performance_uncertainty.get('ensemble', {})
702
+ ensemble_uncertainty_level = ensemble_uncertainty.get('overall_assessment', {}).get('uncertainty_level', 'unknown')
703
+
704
+ # Compare ensemble uncertainty with individual models
705
+ individual_uncertainties = [
706
+ uncertainty_report.model_performance_uncertainty[name]['overall_assessment']['average_relative_uncertainty']
707
+ for name in individual_models.keys()
708
+ if name in uncertainty_report.model_performance_uncertainty
709
+ ]
710
+
711
+ ensemble_avg_uncertainty = ensemble_uncertainty.get('overall_assessment', {}).get('average_relative_uncertainty', np.inf)
712
+ best_individual_uncertainty = min(individual_uncertainties) if individual_uncertainties else np.inf
713
+
714
+ # Decision logic
715
+ use_ensemble = (
716
+ ensemble_uncertainty_level in ['very_low', 'low', 'medium'] and
717
+ ensemble_avg_uncertainty <= best_individual_uncertainty * 1.1 # Allow 10% increase in uncertainty
718
+ )
719
+
720
+ return {
721
+ 'use_ensemble': use_ensemble,
722
+ 'ensemble_uncertainty_level': ensemble_uncertainty_level,
723
+ 'ensemble_avg_uncertainty': ensemble_avg_uncertainty,
724
+ 'best_individual_uncertainty': best_individual_uncertainty,
725
+ 'uncertainty_analysis': uncertainty_report.to_dict(),
726
+ 'recommendations': uncertainty_report.recommendations
727
+ }
728
+
729
+ except Exception as e:
730
+ return {'error': f'Uncertainty quantification failed: {str(e)}'}
731
+
732
+ return enhanced_ensemble_validation_with_uncertainty
733
+
734
+
735
+ if __name__ == "__main__":
736
+ # Example usage and testing
737
+ print("Testing enhanced uncertainty quantification system...")
738
+
739
+ # Generate sample data
740
+ np.random.seed(42)
741
+ X = np.random.randn(300, 15)
742
+ y = (X[:, 0] + X[:, 1] + np.random.randn(300) * 0.2 > 0).astype(int)
743
+
744
+ # Create sample models
745
+ from sklearn.linear_model import LogisticRegression
746
+ from sklearn.ensemble import RandomForestClassifier
747
+ from sklearn.model_selection import train_test_split
748
+
749
+ models = {
750
+ 'logistic_regression': LogisticRegression(random_state=42),
751
+ 'random_forest': RandomForestClassifier(n_estimators=50, random_state=42)
752
+ }
753
+
754
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
755
+
756
+ # Test comprehensive uncertainty analysis
757
+ if STATISTICAL_ANALYSIS_AVAILABLE:
758
+ quantifier = EnhancedUncertaintyQuantifier(n_bootstrap=100) # Reduced for testing
759
+
760
+ print("Running comprehensive uncertainty analysis...")
761
+ uncertainty_report = quantifier.comprehensive_uncertainty_analysis(
762
+ models, X_train, X_test, y_train, y_test
763
+ )
764
+
765
+ print(f"Generated {len(uncertainty_report.recommendations)} uncertainty-based recommendations")
766
+ print(f"Overall confidence level: {uncertainty_report.confidence_level}")
767
+
768
+ # Save report
769
+ report_path = uncertainty_report.save_report()
770
+ print(f"Uncertainty report saved to: {report_path}")
771
+
772
+ print("Enhanced uncertainty quantification system test completed successfully!")
773
+
774
+ else:
775
+ print("Statistical analysis components not available - skipping test")