Ahmedik95316 commited on
Commit
0d47abb
·
1 Parent(s): 3f56aad

Create conftest.py

Browse files

Adding Tests for MLOps Infrastructure Enhancement

Files changed (1) hide show
  1. tests/conftest.py +493 -0
tests/conftest.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tests/conftest.py
2
+ # Shared test configuration and fixtures
3
+
4
+ import pytest
5
+ import numpy as np
6
+ import pandas as pd
7
+ import tempfile
8
+ import sys
9
+ import os
10
+ from pathlib import Path
11
+ from unittest.mock import patch
12
+
13
+ # Add project root to Python path
14
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
15
+
16
+ @pytest.fixture(scope="session")
17
+ def test_data_dir():
18
+ """Create temporary directory for test data"""
19
+ with tempfile.TemporaryDirectory() as temp_dir:
20
+ yield Path(temp_dir)
21
+
22
+ @pytest.fixture(scope="session")
23
+ def sample_fake_news_data():
24
+ """Generate realistic fake news dataset for testing"""
25
+ np.random.seed(42)
26
+
27
+ # Realistic fake news patterns
28
+ fake_texts = [
29
+ "BREAKING: Scientists discover shocking truth about vaccines that doctors don't want you to know!",
30
+ "EXCLUSIVE: Celebrity caught in major scandal - you won't believe what happened next!",
31
+ "ALERT: Government secretly planning massive operation - leaked documents reveal everything!",
32
+ "AMAZING: Local mom discovers one weird trick that makes millions - experts hate her!",
33
+ "URGENT: New study proves everything you know about nutrition is completely wrong!",
34
+ ] * 20
35
+
36
+ # Realistic real news patterns
37
+ real_texts = [
38
+ "Local city council approves new infrastructure budget for road maintenance and repairs.",
39
+ "University researchers publish peer-reviewed study on climate change impacts in regional ecosystems.",
40
+ "Stock market shows mixed results following quarterly earnings reports from major corporations.",
41
+ "Public health officials recommend updated vaccination schedules based on recent clinical trials.",
42
+ "Municipal government announces new public transportation routes to improve city connectivity.",
43
+ ] * 20
44
+
45
+ # Combine and create DataFrame
46
+ all_texts = fake_texts + real_texts
47
+ all_labels = [1] * len(fake_texts) + [0] * len(real_texts)
48
+
49
+ df = pd.DataFrame({
50
+ 'text': all_texts,
51
+ 'label': all_labels
52
+ })
53
+
54
+ return df.sample(frac=1, random_state=42).reset_index(drop=True)
55
+
56
+ @pytest.fixture
57
+ def mock_enhanced_features():
58
+ """Mock enhanced feature engineering when not available"""
59
+ with patch('model.retrain.ENHANCED_FEATURES_AVAILABLE', True):
60
+ with patch('model.retrain.AdvancedFeatureEngineer') as mock_fe:
61
+ # Configure mock to behave like real feature engineer
62
+ mock_instance = mock_fe.return_value
63
+ mock_instance.get_feature_metadata.return_value = {
64
+ 'total_features': 5000,
65
+ 'feature_types': {
66
+ 'tfidf_features': 3000,
67
+ 'sentiment_features': 10,
68
+ 'readability_features': 15,
69
+ 'entity_features': 25,
70
+ 'linguistic_features': 50
71
+ },
72
+ 'configuration': {'test': True}
73
+ }
74
+ mock_instance.get_feature_importance.return_value = {
75
+ 'feature_1': 0.15,
76
+ 'feature_2': 0.12,
77
+ 'feature_3': 0.10
78
+ }
79
+ mock_instance.get_feature_names.return_value = [f'feature_{i}' for i in range(5000)]
80
+
81
+ yield mock_fe
82
+
83
+ # tests/test_data_processing.py
84
+ # Test data processing and validation components
85
+
86
+ import pytest
87
+ import pandas as pd
88
+ import numpy as np
89
+ from pathlib import Path
90
+ import tempfile
91
+
92
+ from data.data_validator import DataValidator
93
+ from data.prepare_datasets import DatasetPreparer
94
+
95
+ class TestDataValidation:
96
+ """Test data validation functionality"""
97
+
98
+ def test_validate_text_column(self, sample_fake_news_data):
99
+ """Test text column validation"""
100
+ validator = DataValidator()
101
+
102
+ # Valid data should pass
103
+ is_valid, issues = validator.validate_dataframe(sample_fake_news_data)
104
+ assert is_valid == True
105
+ assert len(issues) == 0
106
+
107
+ # Test with invalid data
108
+ invalid_data = pd.DataFrame({
109
+ 'text': ['', 'x', None, 'Valid text here'],
110
+ 'label': [0, 1, 0, 2] # Invalid label
111
+ })
112
+
113
+ is_valid, issues = validator.validate_dataframe(invalid_data)
114
+ assert is_valid == False
115
+ assert len(issues) > 0
116
+
117
+ def test_text_quality_validation(self):
118
+ """Test text quality validation rules"""
119
+ validator = DataValidator()
120
+
121
+ # Test minimum length requirement
122
+ short_texts = pd.DataFrame({
123
+ 'text': ['hi', 'ok', 'This is a proper length text for validation'],
124
+ 'label': [0, 1, 0]
125
+ })
126
+
127
+ is_valid, issues = validator.validate_dataframe(short_texts)
128
+ assert is_valid == False
129
+ assert any('length' in str(issue).lower() for issue in issues)
130
+
131
+
132
+ # tests/test_train_integration.py
133
+ # Test integration with train.py to ensure compatibility
134
+
135
+ import pytest
136
+ import tempfile
137
+ from pathlib import Path
138
+ from unittest.mock import patch
139
+
140
+ class TestTrainRetrainCompatibility:
141
+ """Test compatibility between train.py and retrain.py"""
142
+
143
+ def test_metadata_compatibility(self):
144
+ """Test metadata format compatibility between train and retrain"""
145
+ from model.train import EnhancedModelTrainer
146
+ from model.retrain import EnhancedModelRetrainer
147
+
148
+ with tempfile.TemporaryDirectory() as temp_dir:
149
+ temp_path = Path(temp_dir)
150
+
151
+ # Mock trainer to avoid full training
152
+ trainer = EnhancedModelTrainer(use_enhanced_features=False)
153
+ trainer.base_dir = temp_path
154
+ trainer.setup_paths()
155
+
156
+ # Create sample metadata as train.py would
157
+ sample_metadata = {
158
+ 'model_version': 'v1.0',
159
+ 'model_type': 'enhanced_pipeline_cv',
160
+ 'feature_engineering': {'type': 'standard'},
161
+ 'test_f1': 0.85,
162
+ 'cross_validation': {
163
+ 'test_scores': {'f1': {'mean': 0.82, 'std': 0.03}}
164
+ }
165
+ }
166
+
167
+ # Save metadata
168
+ import json
169
+ with open(trainer.metadata_path, 'w') as f:
170
+ json.dump(sample_metadata, f)
171
+
172
+ # Test retrainer can read it
173
+ retrainer = EnhancedModelRetrainer()
174
+ retrainer.base_dir = temp_path
175
+ retrainer.setup_paths()
176
+
177
+ metadata = retrainer.load_existing_metadata()
178
+ assert metadata is not None
179
+ assert metadata['model_version'] == 'v1.0'
180
+ assert metadata['feature_engineering']['type'] == 'standard'
181
+
182
+ def test_model_file_compatibility(self):
183
+ """Test model file format compatibility"""
184
+ # Both train.py and retrain.py should save/load models consistently
185
+ from model.retrain import EnhancedModelRetrainer
186
+
187
+ with tempfile.TemporaryDirectory() as temp_dir:
188
+ temp_path = Path(temp_dir)
189
+
190
+ retrainer = EnhancedModelRetrainer()
191
+ retrainer.base_dir = temp_path
192
+ retrainer.setup_paths()
193
+
194
+ # Create mock pipeline as train.py would save
195
+ from sklearn.pipeline import Pipeline
196
+ from sklearn.linear_model import LogisticRegression
197
+ from sklearn.feature_extraction.text import TfidfVectorizer
198
+
199
+ mock_pipeline = Pipeline([
200
+ ('vectorize', TfidfVectorizer(max_features=1000)),
201
+ ('model', LogisticRegression())
202
+ ])
203
+
204
+ import joblib
205
+ joblib.dump(mock_pipeline, retrainer.prod_pipeline_path)
206
+
207
+ # Test retrainer can load it
208
+ success, model, message = retrainer.load_production_model()
209
+ assert success == True
210
+ assert model is not None
211
+
212
+
213
+ # tests/pytest.ini
214
+ # Pytest configuration file
215
+ [tool:pytest]
216
+ testpaths = tests
217
+ python_files = test_*.py
218
+ python_classes = Test*
219
+ python_functions = test_*
220
+ addopts =
221
+ -v
222
+ --tb=short
223
+ --strict-markers
224
+ --disable-warnings
225
+ --color=yes
226
+ markers =
227
+ slow: marks tests as slow (deselect with '-m "not slow"')
228
+ integration: marks tests as integration tests
229
+ unit: marks tests as unit tests
230
+ cpu_constraint: marks tests that verify CPU constraint compliance
231
+ filterwarnings =
232
+ ignore::UserWarning
233
+ ignore::FutureWarning
234
+ ignore::DeprecationWarning
235
+
236
+
237
+ # tests/test_lightgbm_integration.py
238
+ # Specific tests for LightGBM integration
239
+
240
+ import pytest
241
+ import numpy as np
242
+ from unittest.mock import patch
243
+ import lightgbm as lgb
244
+
245
+ class TestLightGBMIntegration:
246
+ """Test LightGBM-specific functionality"""
247
+
248
+ def test_lightgbm_model_configuration(self):
249
+ """Test LightGBM model is properly configured for CPU constraints"""
250
+ from model.retrain import EnhancedModelRetrainer
251
+
252
+ retrainer = EnhancedModelRetrainer()
253
+ lgb_config = retrainer.models['lightgbm']
254
+ lgb_model = lgb_config['model']
255
+
256
+ # Verify CPU-friendly configuration
257
+ assert isinstance(lgb_model, lgb.LGBMClassifier)
258
+ assert lgb_model.n_jobs == 1
259
+ assert lgb_model.verbose == -1
260
+ assert lgb_model.n_estimators <= 100
261
+ assert lgb_model.num_leaves <= 31
262
+
263
+ # Verify parameter grid is reasonable for CPU
264
+ param_grid = lgb_config['param_grid']
265
+ assert all(est <= 100 for est in param_grid['model__n_estimators'])
266
+ assert all(leaves <= 31 for leaves in param_grid['model__num_leaves'])
267
+
268
+ def test_lightgbm_training_integration(self):
269
+ """Test LightGBM integrates properly in training pipeline"""
270
+ from model.retrain import EnhancedModelRetrainer
271
+
272
+ # Create minimal dataset
273
+ X = np.random.randn(50, 10)
274
+ y = np.random.randint(0, 2, 50)
275
+
276
+ retrainer = EnhancedModelRetrainer()
277
+ retrainer.use_enhanced_features = False
278
+
279
+ # Test hyperparameter tuning works with LightGBM
280
+ pipeline = retrainer.create_preprocessing_pipeline()
281
+
282
+ try:
283
+ best_model, results = retrainer.hyperparameter_tuning_with_cv(
284
+ pipeline, X, y, 'lightgbm'
285
+ )
286
+
287
+ # Should complete without errors
288
+ assert best_model is not None
289
+ assert 'cross_validation' in results or 'error' in results
290
+
291
+ except Exception as e:
292
+ # If tuning fails, should fall back gracefully
293
+ assert 'fallback' in str(e).lower() or 'error' in str(e).lower()
294
+
295
+ def test_lightgbm_cpu_performance(self):
296
+ """Test LightGBM performance is acceptable under CPU constraints"""
297
+ import time
298
+ from model.retrain import EnhancedModelRetrainer
299
+
300
+ # Create reasonably sized dataset
301
+ X = np.random.randn(200, 20)
302
+ y = np.random.randint(0, 2, 200)
303
+
304
+ retrainer = EnhancedModelRetrainer()
305
+ pipeline = retrainer.create_preprocessing_pipeline()
306
+ lgb_model = retrainer.models['lightgbm']['model']
307
+ pipeline.set_params(model=lgb_model)
308
+
309
+ # Time the training
310
+ start_time = time.time()
311
+ pipeline.fit(X, y)
312
+ training_time = time.time() - start_time
313
+
314
+ # Should complete reasonably quickly on CPU
315
+ assert training_time < 30 # Should take less than 30 seconds
316
+
317
+ # Should produce valid predictions
318
+ predictions = pipeline.predict(X[:10])
319
+ assert len(predictions) == 10
320
+ assert all(pred in [0, 1] for pred in predictions)
321
+
322
+
323
+ # tests/test_ensemble_statistical_validation.py
324
+ # Test ensemble statistical validation logic
325
+
326
+ import pytest
327
+ import numpy as np
328
+ from scipy import stats
329
+ from unittest.mock import Mock, patch
330
+
331
+ class TestEnsembleStatisticalValidation:
332
+ """Test statistical validation for ensemble selection"""
333
+
334
+ def test_paired_ttest_ensemble_selection(self):
335
+ """Test paired t-test logic for ensemble vs individual models"""
336
+ from model.retrain import CVModelComparator
337
+
338
+ comparator = CVModelComparator(cv_folds=5, random_state=42)
339
+
340
+ # Create mock CV scores where ensemble is significantly better
341
+ individual_scores = [0.75, 0.74, 0.76, 0.73, 0.75]
342
+ ensemble_scores = [0.80, 0.81, 0.79, 0.78, 0.82]
343
+
344
+ # Test metric comparison
345
+ comparison = comparator._compare_metric_scores(
346
+ individual_scores, ensemble_scores, 'f1', 'individual', 'ensemble'
347
+ )
348
+
349
+ assert 'tests' in comparison
350
+ assert 'paired_ttest' in comparison['tests']
351
+
352
+ # Should detect significant improvement
353
+ t_test_result = comparison['tests']['paired_ttest']
354
+ assert 'p_value' in t_test_result
355
+ assert 'significant' in t_test_result
356
+
357
+ # With this clear difference, should be significant
358
+ if t_test_result['p_value'] is not None:
359
+ assert t_test_result['significant'] == True
360
+
361
+ def test_ensemble_not_selected_when_not_significant(self):
362
+ """Test ensemble is not selected when improvement is not significant"""
363
+ from model.retrain import CVModelComparator
364
+
365
+ comparator = CVModelComparator(cv_folds=5, random_state=42)
366
+
367
+ # Create mock CV scores where ensemble is only marginally better
368
+ individual_scores = [0.75, 0.74, 0.76, 0.73, 0.75]
369
+ ensemble_scores = [0.751, 0.741, 0.761, 0.731, 0.751] # Tiny improvement
370
+
371
+ comparison = comparator._compare_metric_scores(
372
+ individual_scores, ensemble_scores, 'f1', 'individual', 'ensemble'
373
+ )
374
+
375
+ # Should not show significant improvement
376
+ assert comparison['significant_improvement'] == False
377
+
378
+ def test_effect_size_calculation(self):
379
+ """Test Cohen's d effect size calculation"""
380
+ from model.retrain import CVModelComparator
381
+
382
+ comparator = CVModelComparator(cv_folds=5, random_state=42)
383
+
384
+ # Create scores with known effect size
385
+ individual_scores = [0.70, 0.71, 0.69, 0.72, 0.70]
386
+ ensemble_scores = [0.80, 0.81, 0.79, 0.82, 0.80] # Large effect
387
+
388
+ comparison = comparator._compare_metric_scores(
389
+ individual_scores, ensemble_scores, 'f1', 'individual', 'ensemble'
390
+ )
391
+
392
+ assert 'effect_size' in comparison
393
+ effect_size = comparison['effect_size']
394
+
395
+ # Should detect large effect size
396
+ assert abs(effect_size) > 0.5 # Large effect by Cohen's standards
397
+
398
+ def test_promotion_decision_with_feature_upgrade(self):
399
+ """Test promotion decision considers feature engineering upgrades"""
400
+ from model.retrain import CVModelComparator
401
+
402
+ comparator = CVModelComparator()
403
+
404
+ # Mock comparison results with feature upgrade
405
+ mock_results = {
406
+ 'metric_comparisons': {
407
+ 'f1': {
408
+ 'improvement': 0.008, # Small improvement
409
+ 'significant_improvement': False
410
+ },
411
+ 'accuracy': {
412
+ 'improvement': 0.005,
413
+ 'significant_improvement': False
414
+ }
415
+ },
416
+ 'feature_engineering_comparison': {
417
+ 'feature_upgrade': {
418
+ 'is_upgrade': True,
419
+ 'upgrade_type': 'standard_to_enhanced'
420
+ }
421
+ }
422
+ }
423
+
424
+ decision = comparator._make_enhanced_promotion_decision(mock_results)
425
+
426
+ # Should promote despite small improvement due to feature upgrade
427
+ assert decision['promote_candidate'] == True
428
+ assert decision['feature_engineering_factor'] == True
429
+ assert 'feature' in decision['reason'].lower()
430
+
431
+
432
+ # tests/run_tests.py
433
+ # Test runner script with different test categories
434
+
435
+ import pytest
436
+ import sys
437
+ from pathlib import Path
438
+
439
+ def run_unit_tests():
440
+ """Run fast unit tests"""
441
+ return pytest.main([
442
+ "tests/",
443
+ "-m", "not slow and not integration",
444
+ "-v",
445
+ "--tb=short"
446
+ ])
447
+
448
+ def run_integration_tests():
449
+ """Run slower integration tests"""
450
+ return pytest.main([
451
+ "tests/",
452
+ "-m", "integration",
453
+ "-v",
454
+ "--tb=short"
455
+ ])
456
+
457
+ def run_cpu_constraint_tests():
458
+ """Run tests that verify CPU constraint compliance"""
459
+ return pytest.main([
460
+ "tests/",
461
+ "-m", "cpu_constraint",
462
+ "-v",
463
+ "--tb=short"
464
+ ])
465
+
466
+ def run_all_tests():
467
+ """Run complete test suite"""
468
+ return pytest.main([
469
+ "tests/",
470
+ "-v",
471
+ "--tb=short",
472
+ "--cov=model",
473
+ "--cov-report=html"
474
+ ])
475
+
476
+ if __name__ == "__main__":
477
+ if len(sys.argv) > 1:
478
+ test_type = sys.argv[1]
479
+ if test_type == "unit":
480
+ exit_code = run_unit_tests()
481
+ elif test_type == "integration":
482
+ exit_code = run_integration_tests()
483
+ elif test_type == "cpu":
484
+ exit_code = run_cpu_constraint_tests()
485
+ elif test_type == "all":
486
+ exit_code = run_all_tests()
487
+ else:
488
+ print("Usage: python run_tests.py [unit|integration|cpu|all]")
489
+ exit_code = 1
490
+ else:
491
+ exit_code = run_unit_tests() # Default to unit tests
492
+
493
+ sys.exit(exit_code)