Ahmedik95316 commited on
Commit
c474963
·
1 Parent(s): 63682de

Update app/fastapi_server.py

Browse files

Adding LightGBM for Ensemble Model

Files changed (1) hide show
  1. app/fastapi_server.py +396 -1205
app/fastapi_server.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import json
2
  import time
3
  import joblib
@@ -24,6 +26,13 @@ from fastapi.middleware.trustedhost import TrustedHostMiddleware
24
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
25
  from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
26
 
 
 
 
 
 
 
 
27
  from data.data_validator import (
28
  DataValidationPipeline, validate_text, validate_articles_list,
29
  get_validation_stats, generate_quality_report
@@ -39,12 +48,10 @@ from deployment.traffic_router import TrafficRouter
39
  from deployment.model_registry import ModelRegistry
40
  from deployment.blue_green_manager import BlueGreenDeploymentManager
41
 
42
-
43
- # Import the new path manager
44
  try:
45
  from path_config import path_manager
46
  except ImportError:
47
- # Fallback for development environments
48
  import sys
49
  import os
50
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
@@ -53,26 +60,21 @@ except ImportError:
53
  # Configure logging with fallback for permission issues
54
  def setup_logging():
55
  """Setup logging with fallback for environments with restricted file access"""
56
- handlers = [logging.StreamHandler()] # Always include console output
57
 
58
  try:
59
- # Try to create log file in the logs directory
60
  log_file_path = path_manager.get_logs_path('fastapi_server.log')
61
  log_file_path.parent.mkdir(parents=True, exist_ok=True)
62
 
63
- # Test if we can write to the file
64
  test_handler = logging.FileHandler(log_file_path)
65
  test_handler.close()
66
 
67
- # If successful, add file handler
68
  handlers.append(logging.FileHandler(log_file_path))
69
- print(f"Logging to file: {log_file_path}") # Use print instead of logger
70
 
71
  except (PermissionError, OSError) as e:
72
- # If file logging fails, just use console logging
73
  print(f"Cannot create log file, using console only: {e}")
74
 
75
- # Try alternative locations for file logging
76
  try:
77
  import tempfile
78
  temp_log = tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False, prefix='fastapi_')
@@ -84,7 +86,7 @@ def setup_logging():
84
 
85
  return handlers
86
 
87
- # Setup logging with error handling
88
  logging.basicConfig(
89
  level=logging.INFO,
90
  format='%(asctime)s - %(levelname)s - %(message)s',
@@ -92,7 +94,7 @@ logging.basicConfig(
92
  )
93
  logger = logging.getLogger(__name__)
94
 
95
- # Now that logger is defined, log the environment info
96
  try:
97
  path_manager.log_environment_info()
98
  except Exception as e:
@@ -105,49 +107,86 @@ security = HTTPBearer(auto_error=False)
105
  rate_limit_storage = defaultdict(list)
106
 
107
 
108
- class ModelManager:
109
- """Manages model loading and health checks with dynamic paths"""
110
 
111
  def __init__(self):
112
  self.model = None
113
  self.vectorizer = None
114
  self.pipeline = None
 
115
  self.model_metadata = {}
 
116
  self.last_health_check = None
117
  self.health_status = "unknown"
 
 
118
  self.load_model()
119
 
120
  def load_model(self):
121
- """Load model with comprehensive error handling and dynamic paths"""
122
  try:
123
- logger.info("Loading ML model...")
124
 
125
  # Initialize all to None first
126
  self.model = None
127
  self.vectorizer = None
128
  self.pipeline = None
 
 
129
 
130
- # Try to load pipeline first (preferred)
131
- pipeline_path = path_manager.get_pipeline_path()
132
- logger.info(f"Checking for pipeline at: {pipeline_path}")
133
 
134
- if pipeline_path.exists():
135
  try:
136
- self.pipeline = joblib.load(pipeline_path)
137
- # Extract components from pipeline
138
- if hasattr(self.pipeline, 'named_steps'):
139
- self.model = self.pipeline.named_steps.get('model')
140
- self.vectorizer = (self.pipeline.named_steps.get('vectorizer') or
141
- self.pipeline.named_steps.get('vectorize'))
142
- logger.info("Loaded model pipeline successfully")
143
- logger.info(f"Pipeline steps: {list(self.pipeline.named_steps.keys()) if hasattr(self.pipeline, 'named_steps') else 'No named_steps'}")
 
 
 
 
 
 
 
144
  except Exception as e:
145
- logger.warning(f"Failed to load pipeline: {e}, falling back to individual components")
146
- self.pipeline = None
147
- else:
148
- logger.info(f"Pipeline file not found at {pipeline_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- # If pipeline loading failed or doesn't exist, load individual components
151
  if self.pipeline is None:
152
  model_path = path_manager.get_model_file_path()
153
  vectorizer_path = path_manager.get_vectorizer_path()
@@ -159,35 +198,52 @@ class ModelManager:
159
  try:
160
  self.model = joblib.load(model_path)
161
  self.vectorizer = joblib.load(vectorizer_path)
 
162
  logger.info("Loaded model components successfully")
163
  except Exception as e:
164
  logger.error(f"Failed to load individual components: {e}")
165
  raise e
166
  else:
167
- raise FileNotFoundError(f"No model files found. Checked:\n- {pipeline_path}\n- {model_path}\n- {vectorizer_path}")
168
-
169
- # Verify we have what we need for predictions
170
- if self.pipeline is None and (self.model is None or self.vectorizer is None):
171
- raise ValueError("Neither complete pipeline nor individual model components are available")
172
 
173
  # Load metadata
174
  metadata_path = path_manager.get_metadata_path()
175
  if metadata_path.exists():
176
  with open(metadata_path, 'r') as f:
177
  self.model_metadata = json.load(f)
 
 
 
 
 
 
 
178
  logger.info(f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
 
 
 
 
 
 
179
  else:
180
  logger.warning(f"Metadata file not found at: {metadata_path}")
181
  self.model_metadata = {"model_version": "unknown"}
182
 
 
 
 
 
183
  self.health_status = "healthy"
184
  self.last_health_check = datetime.now()
185
 
186
  # Log what was successfully loaded
187
  logger.info(f"Model loading summary:")
188
  logger.info(f" Pipeline available: {self.pipeline is not None}")
189
- logger.info(f" Model available: {self.model is not None}")
190
  logger.info(f" Vectorizer available: {self.vectorizer is not None}")
 
 
 
191
 
192
  except Exception as e:
193
  logger.error(f"Failed to load model: {e}")
@@ -196,15 +252,21 @@ class ModelManager:
196
  self.model = None
197
  self.vectorizer = None
198
  self.pipeline = None
 
199
 
200
  def predict(self, text: str) -> tuple[str, float]:
201
- """Make prediction with error handling"""
202
  try:
203
  if self.pipeline:
204
- # Use pipeline for prediction
205
  prediction = self.pipeline.predict([text])[0]
206
  probabilities = self.pipeline.predict_proba([text])[0]
207
- logger.debug("Used pipeline for prediction")
 
 
 
 
 
208
  elif self.model and self.vectorizer:
209
  # Use individual components
210
  X = self.vectorizer.transform([text])
@@ -231,7 +293,7 @@ class ModelManager:
231
  )
232
 
233
  def health_check(self) -> Dict[str, Any]:
234
- """Perform health check"""
235
  try:
236
  # Test prediction with sample text
237
  test_text = "This is a test article for health check purposes."
@@ -240,26 +302,44 @@ class ModelManager:
240
  self.health_status = "healthy"
241
  self.last_health_check = datetime.now()
242
 
243
- return {
244
  "status": "healthy",
245
  "last_check": self.last_health_check.isoformat(),
246
  "model_available": self.model is not None,
247
  "vectorizer_available": self.vectorizer is not None,
248
  "pipeline_available": self.pipeline is not None,
 
 
 
249
  "test_prediction": {"label": label, "confidence": confidence},
250
  "environment": path_manager.environment,
251
- "model_path": str(path_manager.get_model_file_path()),
252
- "vectorizer_path": str(path_manager.get_vectorizer_path()),
253
- "pipeline_path": str(path_manager.get_pipeline_path()),
254
- "data_path": str(path_manager.get_data_path()),
 
 
 
255
  "file_exists": {
 
 
256
  "model": path_manager.get_model_file_path().exists(),
257
  "vectorizer": path_manager.get_vectorizer_path().exists(),
258
- "pipeline": path_manager.get_pipeline_path().exists(),
259
- "metadata": path_manager.get_metadata_path().exists()
260
  }
261
  }
262
 
 
 
 
 
 
 
 
 
 
 
263
  except Exception as e:
264
  self.health_status = "unhealthy"
265
  self.last_health_check = datetime.now()
@@ -271,21 +351,15 @@ class ModelManager:
271
  "model_available": self.model is not None,
272
  "vectorizer_available": self.vectorizer is not None,
273
  "pipeline_available": self.pipeline is not None,
 
 
 
274
  "environment": path_manager.environment,
275
- "model_path": str(path_manager.get_model_file_path()),
276
- "vectorizer_path": str(path_manager.get_vectorizer_path()),
277
- "pipeline_path": str(path_manager.get_pipeline_path()),
278
- "data_path": str(path_manager.get_data_path()),
279
- "file_exists": {
280
- "model": path_manager.get_model_file_path().exists(),
281
- "vectorizer": path_manager.get_vectorizer_path().exists(),
282
- "pipeline": path_manager.get_pipeline_path().exists(),
283
- "metadata": path_manager.get_metadata_path().exists()
284
- }
285
  }
286
 
287
 
288
- # Background task functions
289
  async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
290
  """Log prediction details with error handling for file access"""
291
  try:
@@ -296,7 +370,9 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
296
  "prediction": prediction,
297
  "confidence": confidence,
298
  "processing_time": processing_time,
299
- "text_hash": hashlib.md5(text.encode()).hexdigest()
 
 
300
  }
301
 
302
  # Try to save to log file
@@ -325,7 +401,6 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
325
  await f.write(json.dumps(logs, indent=2))
326
 
327
  except (PermissionError, OSError) as e:
328
- # If file logging fails, just log to console
329
  logger.warning(f"Cannot write prediction log to file: {e}")
330
  logger.info(f"Prediction logged: {json.dumps(log_entry)}")
331
 
@@ -333,27 +408,8 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
333
  logger.error(f"Failed to log prediction: {e}")
334
 
335
 
336
- async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float):
337
- """Log batch prediction details"""
338
- try:
339
- log_entry = {
340
- "timestamp": datetime.now().isoformat(),
341
- "type": "batch_prediction",
342
- "client_ip": client_ip,
343
- "total_texts": total_texts,
344
- "successful_predictions": successful_predictions,
345
- "processing_time": processing_time,
346
- "success_rate": successful_predictions / total_texts if total_texts > 0 else 0
347
- }
348
-
349
- logger.info(f"Batch prediction logged: {json.dumps(log_entry)}")
350
-
351
- except Exception as e:
352
- logger.error(f"Failed to log batch prediction: {e}")
353
-
354
-
355
  # Global variables
356
- model_manager = ModelManager()
357
 
358
  # Initialize automation manager
359
  automation_manager = None
@@ -363,17 +419,21 @@ deployment_manager = None
363
  traffic_router = None
364
  model_registry = None
365
 
366
-
367
  @asynccontextmanager
368
  async def lifespan(app: FastAPI):
369
- """Manage application lifespan with deployment system"""
370
  global deployment_manager, traffic_router, model_registry
371
 
372
- logger.info("Starting FastAPI application...")
373
 
374
  # Startup tasks
375
  model_manager.load_model()
376
 
 
 
 
 
 
377
  # Initialize deployment components
378
  try:
379
  deployment_manager = BlueGreenDeploymentManager()
@@ -383,72 +443,37 @@ async def lifespan(app: FastAPI):
383
  except Exception as e:
384
  logger.error(f"Failed to initialize deployment system: {e}")
385
 
386
- # Initialize monitoring and automation...
 
 
 
 
 
 
 
 
 
 
387
 
388
  yield
389
 
390
  # Shutdown tasks
391
- logger.info("Shutting down FastAPI application...")
392
-
393
- # Initialize monitoring components
394
- prediction_monitor = PredictionMonitor(base_dir=Path("/tmp"))
395
- metrics_collector = MetricsCollector(base_dir=Path("/tmp"))
396
- alert_system = AlertSystem(base_dir=Path("/tmp"))
397
-
398
- # Start monitoring
399
- prediction_monitor.start_monitoring()
400
-
401
- alert_system.add_notification_handler("console", console_notification_handler)
402
-
403
-
404
- @asynccontextmanager
405
- async def lifespan(app: FastAPI):
406
- """Manage application lifespan"""
407
- logger.info("Starting FastAPI application...")
408
-
409
- # Startup tasks
410
- model_manager.load_model()
411
-
412
- # Schedule periodic health checks
413
- asyncio.create_task(periodic_health_check())
414
-
415
- yield
416
-
417
- # Shutdown tasks
418
- logger.info("Shutting down FastAPI application...")
419
-
420
-
421
- # Background tasks
422
- async def periodic_health_check():
423
- """Periodic health check"""
424
- while True:
425
- try:
426
- await asyncio.sleep(300) # Check every 5 minutes
427
- health_status = model_manager.health_check()
428
-
429
- if health_status["status"] == "unhealthy":
430
- logger.warning(
431
- "Model health check failed, attempting to reload...")
432
- model_manager.load_model()
433
-
434
- except Exception as e:
435
- logger.error(f"Periodic health check failed: {e}")
436
-
437
 
438
  # Create FastAPI app
439
  app = FastAPI(
440
- title="Fake News Detection API",
441
- description="Production-ready API for fake news detection with comprehensive monitoring and security features",
442
- version="2.0.0",
443
  docs_url="/docs",
444
  redoc_url="/redoc",
445
  lifespan=lifespan
446
  )
447
 
448
- # Add middleware
449
  app.add_middleware(
450
  CORSMiddleware,
451
- allow_origins=["*"], # Configure appropriately for production
452
  allow_credentials=True,
453
  allow_methods=["*"],
454
  allow_headers=["*"],
@@ -456,38 +481,31 @@ app.add_middleware(
456
 
457
  app.add_middleware(
458
  TrustedHostMiddleware,
459
- allowed_hosts=["*"] # Configure appropriately for production
460
  )
461
 
462
- # Custom OpenAPI setup - RIGHT AFTER app creation
463
- def custom_openapi():
464
- if app.openapi_schema:
465
- return app.openapi_schema
466
-
467
- openapi_schema = get_openapi(
468
- title="Fake News Detection API",
469
- version="2.0.0",
470
- description="Production-ready API for fake news detection with comprehensive monitoring and security features",
471
- routes=app.routes,
472
- )
473
-
474
- # Add security definitions
475
- openapi_schema["components"]["securitySchemes"] = {
476
- "Bearer": {
477
- "type": "http",
478
- "scheme": "bearer",
479
- "bearerFormat": "JWT",
480
- }
481
- }
482
-
483
- app.openapi_schema = openapi_schema
484
- return app.openapi_schema
485
-
486
- # Set the custom OpenAPI function
487
- app.openapi = custom_openapi
488
 
 
 
 
 
 
 
 
 
 
489
 
490
- # Request/Response models
491
  class PredictionRequest(BaseModel):
492
  text: str = Field(..., min_length=1, max_length=10000,
493
  description="Text to analyze for fake news detection")
@@ -496,67 +514,15 @@ class PredictionRequest(BaseModel):
496
  def validate_text(cls, v):
497
  if not v or not v.strip():
498
  raise ValueError('Text cannot be empty')
499
-
500
- # Basic content validation
501
  if len(v.strip()) < 10:
502
  raise ValueError('Text must be at least 10 characters long')
503
-
504
- # Check for suspicious patterns
505
  suspicious_patterns = ['<script', 'javascript:', 'data:']
506
  if any(pattern in v.lower() for pattern in suspicious_patterns):
507
  raise ValueError('Text contains suspicious content')
508
-
509
  return v.strip()
510
 
511
 
512
- class PredictionResponse(BaseModel):
513
- prediction: str = Field(...,
514
- description="Prediction result: 'Real' or 'Fake'")
515
- confidence: float = Field(..., ge=0.0, le=1.0,
516
- description="Confidence score between 0 and 1")
517
- model_version: str = Field(...,
518
- description="Version of the model used for prediction")
519
- timestamp: str = Field(..., description="Timestamp of the prediction")
520
- processing_time: float = Field(...,
521
- description="Time taken for processing in seconds")
522
-
523
-
524
- class BatchPredictionRequest(BaseModel):
525
- texts: List[str] = Field(..., min_items=1, max_items=10,
526
- description="List of texts to analyze")
527
-
528
- @validator('texts')
529
- def validate_texts(cls, v):
530
- if not v:
531
- raise ValueError('Texts list cannot be empty')
532
-
533
- for text in v:
534
- if not text or not text.strip():
535
- raise ValueError('All texts must be non-empty')
536
-
537
- if len(text.strip()) < 10:
538
- raise ValueError(
539
- 'All texts must be at least 10 characters long')
540
-
541
- return [text.strip() for text in v]
542
-
543
-
544
- class BatchPredictionResponse(BaseModel):
545
- predictions: List[PredictionResponse]
546
- total_count: int
547
- processing_time: float
548
-
549
-
550
- class HealthResponse(BaseModel):
551
- status: str
552
- timestamp: str
553
- model_health: Dict[str, Any]
554
- system_health: Dict[str, Any]
555
- api_health: Dict[str, Any]
556
- environment_info: Dict[str, Any]
557
-
558
-
559
- # Rate limiting
560
  async def rate_limit_check(request: Request):
561
  """Check rate limits"""
562
  client_ip = request.client.host
@@ -565,7 +531,7 @@ async def rate_limit_check(request: Request):
565
  # Clean old entries
566
  rate_limit_storage[client_ip] = [
567
  timestamp for timestamp in rate_limit_storage[client_ip]
568
- if current_time - timestamp < 3600 # 1 hour window
569
  ]
570
 
571
  # Check rate limit (100 requests per hour)
@@ -579,14 +545,11 @@ async def rate_limit_check(request: Request):
579
  rate_limit_storage[client_ip].append(current_time)
580
 
581
 
582
- # Logging middleware
583
  @app.middleware("http")
584
  async def log_requests(request: Request, call_next):
585
- """Log all requests"""
586
  start_time = time.time()
587
-
588
  response = await call_next(request)
589
-
590
  process_time = time.time() - start_time
591
 
592
  log_data = {
@@ -595,76 +558,42 @@ async def log_requests(request: Request, call_next):
595
  "client_ip": request.client.host,
596
  "status_code": response.status_code,
597
  "process_time": process_time,
598
- "timestamp": datetime.now().isoformat()
 
 
599
  }
600
 
601
  logger.info(f"Request: {json.dumps(log_data)}")
602
-
603
  return response
604
 
605
 
606
- # Error handlers
607
- @app.exception_handler(HTTPException)
608
- async def http_exception_handler(request: Request, exc: HTTPException):
609
- """Handle HTTP exceptions"""
610
- error_data = {
611
- "error": True,
612
- "message": exc.detail,
613
- "status_code": exc.status_code,
614
- "timestamp": datetime.now().isoformat(),
615
- "path": request.url.path
616
- }
617
-
618
- logger.error(f"HTTP Exception: {json.dumps(error_data)}")
619
-
620
- return JSONResponse(
621
- status_code=exc.status_code,
622
- content=error_data
623
- )
624
-
625
-
626
- @app.exception_handler(Exception)
627
- async def general_exception_handler(request: Request, exc: Exception):
628
- """Handle general exceptions"""
629
- error_data = {
630
- "error": True,
631
- "message": "Internal server error",
632
- "timestamp": datetime.now().isoformat(),
633
- "path": request.url.path
634
- }
635
-
636
- logger.error(f"General Exception: {str(exc)}\n{traceback.format_exc()}")
637
-
638
- return JSONResponse(
639
- status_code=500,
640
- content=error_data
641
- )
642
-
643
-
644
- # API Routes
645
- @app.get("/", response_model=Dict[str, str])
646
  async def root():
647
- """Root endpoint"""
648
  return {
649
- "message": "Fake News Detection API",
650
- "version": "2.0.0",
651
  "environment": path_manager.environment,
 
 
 
652
  "documentation": "/docs",
653
  "health_check": "/health"
654
  }
655
 
656
 
657
- @app.post("/predict", response_model=PredictionResponse)
658
  async def predict(
659
  request: PredictionRequest,
660
  background_tasks: BackgroundTasks,
661
  http_request: Request,
662
  _: None = Depends(rate_limit_check)
663
- ):
664
  """
665
- Predict whether a news article is fake or real using blue-green deployment routing
666
  - **text**: The news article text to analyze
667
- - **returns**: Prediction result with confidence score
668
  """
669
  start_time = time.time()
670
  client_ip = http_request.client.host
@@ -678,62 +607,49 @@ async def predict(
678
  detail="Model is not available. Please try again later."
679
  )
680
 
681
- # Prepare request data for routing
682
- request_data = {
683
- 'client_id': client_ip,
684
- 'user_agent': user_agent,
685
- 'timestamp': datetime.now().isoformat()
686
- }
687
-
688
- # Use traffic router if available, otherwise fallback to model manager
689
- if traffic_router and (traffic_router.blue_model or traffic_router.green_model):
690
- try:
691
- environment, result = traffic_router.make_prediction(request.text, request_data)
692
-
693
- # Extract results from traffic router response
694
- label = result['prediction']
695
- confidence = result['confidence']
696
- processing_time = result['processing_time']
697
-
698
- logger.debug(f"Used {environment} environment for prediction")
699
-
700
- except Exception as e:
701
- logger.warning(f"Traffic router failed, falling back to model manager: {e}")
702
- # Fallback to original model manager
703
- label, confidence = model_manager.predict(request.text)
704
- processing_time = time.time() - start_time
705
- environment = "blue" # Default fallback
706
- else:
707
- # Fallback to original model manager
708
- label, confidence = model_manager.predict(request.text)
709
- processing_time = time.time() - start_time
710
- environment = "blue" # Default when no traffic router
711
 
712
  # Record prediction for monitoring
713
- prediction_monitor.record_prediction(
714
- prediction=label,
715
- confidence=confidence,
716
- processing_time=processing_time,
717
- text=request.text,
718
- model_version=model_manager.model_metadata.get('model_version', 'unknown'),
719
- client_id=client_ip,
720
- user_agent=user_agent
721
- )
 
722
 
723
  # Record API request metrics
724
- metrics_collector.record_api_request(
725
- endpoint="/predict",
726
- method="POST",
727
- response_time=processing_time,
728
- status_code=200,
729
- client_ip=client_ip
730
- )
 
731
 
732
- # Create response
733
- response = PredictionResponse(
734
  prediction=label,
735
  confidence=confidence,
736
  model_version=model_manager.model_metadata.get('model_version', 'unknown'),
 
 
 
737
  timestamp=datetime.now().isoformat(),
738
  processing_time=processing_time
739
  )
@@ -753,36 +669,40 @@ async def predict(
753
  except HTTPException:
754
  # Record error for failed requests
755
  processing_time = time.time() - start_time
756
- prediction_monitor.record_error(
757
- error_type="http_error",
758
- error_message="Service unavailable",
759
- context={"status_code": 503}
760
- )
761
- metrics_collector.record_api_request(
762
- endpoint="/predict",
763
- method="POST",
764
- response_time=processing_time,
765
- status_code=503,
766
- client_ip=client_ip
767
- )
 
 
768
  raise
769
  except Exception as e:
770
  processing_time = time.time() - start_time
771
 
772
  # Record error
773
- prediction_monitor.record_error(
774
- error_type="prediction_error",
775
- error_message=str(e),
776
- context={"text_length": len(request.text)}
777
- )
 
778
 
779
- metrics_collector.record_api_request(
780
- endpoint="/predict",
781
- method="POST",
782
- response_time=processing_time,
783
- status_code=500,
784
- client_ip=client_ip
785
- )
 
786
 
787
  logger.error(f"Prediction failed: {e}")
788
  raise HTTPException(
@@ -791,90 +711,11 @@ async def predict(
791
  )
792
 
793
 
794
- @app.post("/predict/batch", response_model=BatchPredictionResponse)
795
- async def predict_batch(
796
- request: BatchPredictionRequest,
797
- background_tasks: BackgroundTasks,
798
- http_request: Request,
799
- _: None = Depends(rate_limit_check)
800
- ):
801
- """
802
- Predict multiple news articles in batch
803
- - **texts**: List of news article texts to analyze
804
- - **returns**: List of prediction results
805
- """
806
- start_time = time.time()
807
-
808
- try:
809
- # Check model health
810
- if model_manager.health_status != "healthy":
811
- raise HTTPException(
812
- status_code=503,
813
- detail="Model is not available. Please try again later."
814
- )
815
-
816
- predictions = []
817
-
818
- for text in request.texts:
819
- try:
820
- label, confidence = model_manager.predict(text)
821
-
822
- prediction = PredictionResponse(
823
- prediction=label,
824
- confidence=confidence,
825
- model_version=model_manager.model_metadata.get(
826
- 'model_version', 'unknown'),
827
- timestamp=datetime.now().isoformat(),
828
- processing_time=0.0 # Will be updated with total time
829
- )
830
-
831
- predictions.append(prediction)
832
-
833
- except Exception as e:
834
- logger.error(f"Batch prediction failed for text: {e}")
835
- # Continue with other texts
836
- continue
837
-
838
- # Calculate total processing time
839
- total_processing_time = time.time() - start_time
840
-
841
- # Update processing time for all predictions
842
- for prediction in predictions:
843
- prediction.processing_time = total_processing_time / \
844
- len(predictions)
845
-
846
- response = BatchPredictionResponse(
847
- predictions=predictions,
848
- total_count=len(predictions),
849
- processing_time=total_processing_time
850
- )
851
-
852
- # Log batch prediction (background task)
853
- background_tasks.add_task(
854
- log_batch_prediction,
855
- len(request.texts),
856
- len(predictions),
857
- http_request.client.host,
858
- total_processing_time
859
- )
860
-
861
- return response
862
-
863
- except HTTPException:
864
- raise
865
- except Exception as e:
866
- logger.error(f"Batch prediction failed: {e}")
867
- raise HTTPException(
868
- status_code=500,
869
- detail=f"Batch prediction failed: {str(e)}"
870
- )
871
-
872
-
873
- @app.get("/health", response_model=HealthResponse)
874
  async def health_check():
875
  """
876
- Comprehensive health check endpoint
877
- - **returns**: Detailed health status of the API and model
878
  """
879
  try:
880
  # Model health
@@ -897,843 +738,193 @@ async def health_check():
897
 
898
  # Environment info
899
  environment_info = path_manager.get_environment_info()
 
 
 
 
 
 
 
 
 
 
 
 
900
 
901
  # Overall status
902
  overall_status = "healthy" if model_health["status"] == "healthy" else "unhealthy"
903
 
904
- return HealthResponse(
905
  status=overall_status,
906
  timestamp=datetime.now().isoformat(),
907
  model_health=model_health,
908
  system_health=system_health,
909
  api_health=api_health,
910
- environment_info=environment_info
 
911
  )
912
 
913
  except Exception as e:
914
  logger.error(f"Health check failed: {e}")
915
- return HealthResponse(
916
  status="unhealthy",
917
  timestamp=datetime.now().isoformat(),
918
  model_health={"status": "unhealthy", "error": str(e)},
919
  system_health={"error": str(e)},
920
  api_health={"error": str(e)},
921
- environment_info={"error": str(e)}
 
922
  )
923
 
924
 
925
- @app.get("/health/detailed")
926
- async def detailed_health_check():
927
  """
928
- Detailed health check endpoint with comprehensive CV results
929
- - **returns**: Detailed health status including cross-validation metrics
930
  """
931
  try:
932
- # Get basic health information
933
- basic_health = await health_check()
934
-
935
- # Load metadata to get CV results
936
- metadata_path = path_manager.get_metadata_path()
937
- cv_details = {}
938
-
939
- if metadata_path.exists():
940
- try:
941
- with open(metadata_path, 'r') as f:
942
- metadata = json.load(f)
943
-
944
- # Extract cross-validation information
945
- cv_info = metadata.get('cross_validation', {})
946
- if cv_info:
947
- cv_details = {
948
- 'cross_validation_available': True,
949
- 'n_splits': cv_info.get('n_splits', 'Unknown'),
950
- 'test_scores': cv_info.get('test_scores', {}),
951
- 'train_scores': cv_info.get('train_scores', {}),
952
- 'overfitting_score': cv_info.get('overfitting_score', 'Unknown'),
953
- 'stability_score': cv_info.get('stability_score', 'Unknown'),
954
- 'individual_fold_results': cv_info.get('individual_fold_results', [])
955
- }
956
-
957
- # Add summary statistics
958
- test_scores = cv_info.get('test_scores', {})
959
- if 'f1' in test_scores:
960
- cv_details['cv_f1_summary'] = {
961
- 'mean': test_scores['f1'].get('mean', 'Unknown'),
962
- 'std': test_scores['f1'].get('std', 'Unknown'),
963
- 'min': test_scores['f1'].get('min', 'Unknown'),
964
- 'max': test_scores['f1'].get('max', 'Unknown'),
965
- 'scores': test_scores['f1'].get('scores', [])
966
- }
967
-
968
- if 'accuracy' in test_scores:
969
- cv_details['cv_accuracy_summary'] = {
970
- 'mean': test_scores['accuracy'].get('mean', 'Unknown'),
971
- 'std': test_scores['accuracy'].get('std', 'Unknown'),
972
- 'min': test_scores['accuracy'].get('min', 'Unknown'),
973
- 'max': test_scores['accuracy'].get('max', 'Unknown'),
974
- 'scores': test_scores['accuracy'].get('scores', [])
975
- }
976
-
977
- # Add model comparison results if available
978
- statistical_validation = metadata.get('statistical_validation', {})
979
- if statistical_validation:
980
- cv_details['statistical_validation'] = statistical_validation
981
-
982
- promotion_validation = metadata.get('promotion_validation', {})
983
- if promotion_validation:
984
- cv_details['promotion_validation'] = promotion_validation
985
-
986
- # Add model version and training info
987
- cv_details['model_info'] = {
988
- 'model_version': metadata.get('model_version', 'Unknown'),
989
- 'model_type': metadata.get('model_type', 'Unknown'),
990
- 'training_timestamp': metadata.get('timestamp', 'Unknown'),
991
- 'promotion_timestamp': metadata.get('promotion_timestamp'),
992
- 'cv_f1_mean': metadata.get('cv_f1_mean'),
993
- 'cv_f1_std': metadata.get('cv_f1_std'),
994
- 'cv_accuracy_mean': metadata.get('cv_accuracy_mean'),
995
- 'cv_accuracy_std': metadata.get('cv_accuracy_std')
996
- }
997
-
998
- except Exception as e:
999
- cv_details = {
1000
- 'cross_validation_available': False,
1001
- 'error': f"Failed to load CV details: {str(e)}"
1002
- }
1003
- else:
1004
- cv_details = {
1005
- 'cross_validation_available': False,
1006
- 'error': "No metadata file found"
1007
- }
1008
-
1009
- # Combine basic health with detailed CV information
1010
- detailed_response = {
1011
- 'basic_health': basic_health,
1012
- 'cross_validation_details': cv_details,
1013
- 'detailed_check_timestamp': datetime.now().isoformat()
1014
- }
1015
-
1016
- return detailed_response
1017
-
1018
- except Exception as e:
1019
- logger.error(f"Detailed health check failed: {e}")
1020
- return {
1021
- 'basic_health': {'status': 'unhealthy', 'error': str(e)},
1022
- 'cross_validation_details': {
1023
- 'cross_validation_available': False,
1024
- 'error': f"Detailed health check failed: {str(e)}"
1025
  },
1026
- 'detailed_check_timestamp': datetime.now().isoformat()
 
1027
  }
1028
 
1029
-
1030
- @app.get("/cv/results")
1031
- async def get_cv_results():
1032
- """
1033
- Get detailed cross-validation results for the current model
1034
- - **returns**: Comprehensive CV metrics and fold-by-fold results
1035
- """
1036
- try:
1037
- metadata_path = path_manager.get_metadata_path()
1038
-
1039
- if not metadata_path.exists():
1040
- raise HTTPException(
1041
- status_code=404,
1042
- detail="Model metadata not found. Train a model first."
1043
- )
1044
-
1045
- with open(metadata_path, 'r') as f:
1046
- metadata = json.load(f)
1047
-
1048
- cv_info = metadata.get('cross_validation', {})
1049
-
1050
- if not cv_info:
1051
- raise HTTPException(
1052
- status_code=404,
1053
- detail="No cross-validation results found. Model may not have been trained with CV."
1054
- )
1055
-
1056
- # Structure the CV results for API response
1057
- cv_response = {
1058
- 'model_version': metadata.get('model_version', 'Unknown'),
1059
- 'model_type': metadata.get('model_type', 'Unknown'),
1060
- 'training_timestamp': metadata.get('timestamp', 'Unknown'),
1061
- 'cross_validation': {
1062
- 'methodology': {
1063
- 'n_splits': cv_info.get('n_splits', 'Unknown'),
1064
- 'cv_type': 'StratifiedKFold',
1065
- 'random_state': 42
1066
- },
1067
- 'test_scores': cv_info.get('test_scores', {}),
1068
- 'train_scores': cv_info.get('train_scores', {}),
1069
- 'performance_indicators': {
1070
- 'overfitting_score': cv_info.get('overfitting_score', 'Unknown'),
1071
- 'stability_score': cv_info.get('stability_score', 'Unknown')
1072
- },
1073
- 'individual_fold_results': cv_info.get('individual_fold_results', [])
1074
- },
1075
- 'statistical_validation': metadata.get('statistical_validation', {}),
1076
- 'promotion_validation': metadata.get('promotion_validation', {}),
1077
- 'summary_statistics': {
1078
- 'cv_f1_mean': metadata.get('cv_f1_mean'),
1079
- 'cv_f1_std': metadata.get('cv_f1_std'),
1080
- 'cv_accuracy_mean': metadata.get('cv_accuracy_mean'),
1081
- 'cv_accuracy_std': metadata.get('cv_accuracy_std')
1082
  }
1083
- }
1084
-
1085
- return cv_response
1086
-
1087
- except HTTPException:
1088
- raise
1089
- except Exception as e:
1090
- logger.error(f"CV results retrieval failed: {e}")
1091
- raise HTTPException(
1092
- status_code=500,
1093
- detail=f"Failed to retrieve CV results: {str(e)}"
1094
- )
1095
 
 
1096
 
1097
- @app.get("/cv/comparison")
1098
- async def get_model_comparison_results():
1099
- """
1100
- Get latest model comparison results from retraining
1101
- - **returns**: Statistical comparison results between models
1102
- """
1103
- try:
1104
- # Load comparison logs
1105
- comparison_log_path = path_manager.get_logs_path("model_comparison.json")
1106
-
1107
- if not comparison_log_path.exists():
1108
- raise HTTPException(
1109
- status_code=404,
1110
- detail="No model comparison results found."
1111
- )
1112
-
1113
- with open(comparison_log_path, 'r') as f:
1114
- comparison_logs = json.load(f)
1115
-
1116
- if not comparison_logs:
1117
- raise HTTPException(
1118
- status_code=404,
1119
- detail="No comparison entries found."
1120
- )
1121
-
1122
- # Get the most recent comparison
1123
- latest_comparison = comparison_logs[-1]
1124
- comparison_details = latest_comparison.get('comparison_details', {})
1125
-
1126
- # Structure the response
1127
- comparison_response = {
1128
- 'comparison_timestamp': latest_comparison.get('timestamp', 'Unknown'),
1129
- 'session_id': latest_comparison.get('session_id', 'Unknown'),
1130
- 'models_compared': {
1131
- 'model1_name': comparison_details.get('model1_name', 'Production'),
1132
- 'model2_name': comparison_details.get('model2_name', 'Candidate')
1133
- },
1134
- 'cv_methodology': {
1135
- 'cv_folds': comparison_details.get('cv_folds', 'Unknown')
1136
- },
1137
- 'model_performance': {
1138
- 'production_model': comparison_details.get('model1_cv_results', {}),
1139
- 'candidate_model': comparison_details.get('model2_cv_results', {})
1140
- },
1141
- 'metric_comparisons': comparison_details.get('metric_comparisons', {}),
1142
- 'statistical_tests': comparison_details.get('statistical_tests', {}),
1143
- 'promotion_decision': comparison_details.get('promotion_decision', {}),
1144
- 'summary': {
1145
- 'decision': comparison_details.get('promotion_decision', {}).get('promote_candidate', False),
1146
- 'reason': comparison_details.get('promotion_decision', {}).get('reason', 'Unknown'),
1147
- 'confidence': comparison_details.get('promotion_decision', {}).get('confidence', 0)
1148
- }
1149
- }
1150
-
1151
- return comparison_response
1152
-
1153
- except HTTPException:
1154
- raise
1155
  except Exception as e:
1156
- logger.error(f"Model comparison results retrieval failed: {e}")
1157
  raise HTTPException(
1158
  status_code=500,
1159
- detail=f"Failed to retrieve model comparison results: {str(e)}"
1160
  )
1161
-
1162
 
1163
- @app.get("/metrics")
1164
- async def get_metrics():
 
1165
  """
1166
- Get comprehensive API metrics including CV results
1167
- - **returns**: Usage statistics, performance metrics, and CV information
1168
  """
1169
  try:
1170
- # Calculate metrics from rate limiting storage
1171
- total_requests = sum(len(requests)
1172
- for requests in rate_limit_storage.values())
1173
- unique_clients = len(rate_limit_storage)
1174
-
1175
- # Load metadata for CV information
1176
- metadata_path = path_manager.get_metadata_path()
1177
- cv_summary = {}
1178
-
1179
- if metadata_path.exists():
1180
- try:
1181
- with open(metadata_path, 'r') as f:
1182
- metadata = json.load(f)
1183
-
1184
- # Extract CV summary
1185
- cv_info = metadata.get('cross_validation', {})
1186
- if cv_info:
1187
- test_scores = cv_info.get('test_scores', {})
1188
- cv_summary = {
1189
- 'cv_available': True,
1190
- 'cv_folds': cv_info.get('n_splits', 'Unknown'),
1191
- 'cv_f1_mean': test_scores.get('f1', {}).get('mean'),
1192
- 'cv_f1_std': test_scores.get('f1', {}).get('std'),
1193
- 'cv_accuracy_mean': test_scores.get('accuracy', {}).get('mean'),
1194
- 'cv_accuracy_std': test_scores.get('accuracy', {}).get('std'),
1195
- 'overfitting_score': cv_info.get('overfitting_score'),
1196
- 'stability_score': cv_info.get('stability_score')
1197
- }
1198
- else:
1199
- cv_summary = {'cv_available': False}
1200
-
1201
- except Exception as e:
1202
- cv_summary = {'cv_available': False, 'cv_error': str(e)}
1203
- else:
1204
- cv_summary = {'cv_available': False, 'cv_error': 'No metadata file'}
1205
-
1206
- metrics = {
1207
- 'api_metrics': {
1208
- 'total_requests': total_requests,
1209
- 'unique_clients': unique_clients,
1210
- 'timestamp': datetime.now().isoformat()
1211
- },
1212
- 'model_info': {
1213
- 'model_version': model_manager.model_metadata.get('model_version', 'unknown'),
1214
- 'model_health': model_manager.health_status,
1215
- 'last_health_check': model_manager.last_health_check.isoformat() if model_manager.last_health_check else None
1216
  },
1217
- 'cross_validation_summary': cv_summary,
1218
- 'environment_info': {
1219
- 'environment': path_manager.environment,
1220
- 'available_datasets': path_manager.list_available_datasets(),
1221
- 'available_models': path_manager.list_available_models()
1222
  }
1223
  }
1224
 
1225
- return metrics
1226
-
1227
- except Exception as e:
1228
- logger.error(f"Metrics retrieval failed: {e}")
1229
- raise HTTPException(
1230
- status_code=500,
1231
- detail=f"Metrics retrieval failed: {str(e)}"
1232
- )
 
 
 
 
 
 
 
 
1233
 
1234
- @app.get("/validation/statistics")
1235
- async def get_validation_statistics():
1236
- """Get comprehensive validation statistics"""
1237
- try:
1238
- stats = get_validation_stats()
1239
-
1240
- if not stats:
1241
- return {
1242
- 'statistics_available': False,
1243
- 'message': 'No validation statistics available yet',
1244
- 'timestamp': datetime.now().isoformat()
1245
- }
1246
-
1247
- enhanced_stats = {
1248
- 'statistics_available': True,
1249
- 'last_updated': stats.get('last_updated'),
1250
- 'overall_metrics': {
1251
- 'total_validations': stats.get('total_validations', 0),
1252
- 'total_articles_processed': stats.get('total_articles', 0),
1253
- 'overall_success_rate': (stats.get('total_valid_articles', 0) /
1254
- max(stats.get('total_articles', 1), 1)),
1255
- 'average_quality_score': stats.get('average_quality_score', 0.0)
1256
- },
1257
- 'source_breakdown': stats.get('source_statistics', {}),
1258
- 'recent_performance': {
1259
- 'validation_history': stats.get('validation_history', [])[-10:],
1260
- 'quality_trends': stats.get('quality_trends', [])[-10:]
1261
- },
1262
- 'timestamp': datetime.now().isoformat()
1263
- }
1264
-
1265
- return enhanced_stats
1266
-
1267
- except Exception as e:
1268
- logger.error(f"Failed to get validation statistics: {e}")
1269
- raise HTTPException(
1270
- status_code=500,
1271
- detail=f"Failed to retrieve validation statistics: {str(e)}"
1272
- )
1273
 
1274
- @app.get("/validation/quality-report")
1275
- async def get_quality_report():
1276
- """Get comprehensive data quality report"""
1277
- try:
1278
- report = generate_quality_report()
1279
-
1280
- if 'error' in report:
1281
- raise HTTPException(
1282
- status_code=404,
1283
- detail=report['error']
1284
- )
1285
-
1286
- return report
1287
-
1288
- except HTTPException:
1289
- raise
1290
  except Exception as e:
1291
- logger.error(f"Failed to generate quality report: {e}")
1292
  raise HTTPException(
1293
  status_code=500,
1294
- detail=f"Failed to generate quality report: {str(e)}"
1295
- )
1296
-
1297
- @app.get("/validation/health")
1298
- async def get_validation_health():
1299
- """Get validation system health status"""
1300
- try:
1301
- stats = get_validation_stats()
1302
-
1303
- health_indicators = {
1304
- 'validation_system_active': True,
1305
- 'statistics_available': bool(stats),
1306
- 'recent_activity': False,
1307
- 'quality_status': 'unknown'
1308
- }
1309
-
1310
- if stats:
1311
- last_updated = stats.get('last_updated')
1312
- if last_updated:
1313
- try:
1314
- last_update_time = datetime.fromisoformat(last_updated)
1315
- hours_since_update = (datetime.now() - last_update_time).total_seconds() / 3600
1316
- health_indicators['recent_activity'] = hours_since_update <= 24
1317
- health_indicators['hours_since_last_validation'] = hours_since_update
1318
- except:
1319
- pass
1320
-
1321
- avg_quality = stats.get('average_quality_score', 0)
1322
- success_rate = stats.get('total_valid_articles', 0) / max(stats.get('total_articles', 1), 1)
1323
-
1324
- if avg_quality >= 0.7 and success_rate >= 0.8:
1325
- health_indicators['quality_status'] = 'excellent'
1326
- elif avg_quality >= 0.5 and success_rate >= 0.6:
1327
- health_indicators['quality_status'] = 'good'
1328
- elif avg_quality >= 0.3 and success_rate >= 0.4:
1329
- health_indicators['quality_status'] = 'fair'
1330
- else:
1331
- health_indicators['quality_status'] = 'poor'
1332
-
1333
- health_indicators['average_quality_score'] = avg_quality
1334
- health_indicators['validation_success_rate'] = success_rate
1335
-
1336
- overall_healthy = (
1337
- health_indicators['validation_system_active'] and
1338
- health_indicators['statistics_available'] and
1339
- health_indicators['quality_status'] not in ['poor', 'unknown']
1340
  )
1341
-
1342
- return {
1343
- 'validation_health': {
1344
- 'overall_status': 'healthy' if overall_healthy else 'degraded',
1345
- 'health_indicators': health_indicators,
1346
- 'last_check': datetime.now().isoformat()
1347
- }
1348
- }
1349
-
1350
- except Exception as e:
1351
- logger.error(f"Validation health check failed: {e}")
1352
- return {
1353
- 'validation_health': {
1354
- 'overall_status': 'unhealthy',
1355
- 'error': str(e),
1356
- 'last_check': datetime.now().isoformat()
1357
- }
1358
- }
1359
-
1360
-
1361
- # New monitoring endpoints
1362
- @app.get("/monitor/metrics/current")
1363
- async def get_current_metrics():
1364
- """Get current real-time metrics"""
1365
- try:
1366
- prediction_metrics = prediction_monitor.get_current_metrics()
1367
- system_metrics = metrics_collector.collect_system_metrics()
1368
- api_metrics = metrics_collector.collect_api_metrics()
1369
-
1370
- return {
1371
- "timestamp": datetime.now().isoformat(),
1372
- "prediction_metrics": asdict(prediction_metrics),
1373
- "system_metrics": asdict(system_metrics),
1374
- "api_metrics": asdict(api_metrics)
1375
- }
1376
- except Exception as e:
1377
- logger.error(f"Failed to get current metrics: {e}")
1378
- raise HTTPException(status_code=500, detail=str(e))
1379
-
1380
- @app.get("/monitor/metrics/historical")
1381
- async def get_historical_metrics(hours: int = 24):
1382
- """Get historical metrics"""
1383
- try:
1384
- return {
1385
- "prediction_metrics": [asdict(m) for m in prediction_monitor.get_historical_metrics(hours)],
1386
- "aggregated_metrics": metrics_collector.get_aggregated_metrics(hours)
1387
- }
1388
- except Exception as e:
1389
- logger.error(f"Failed to get historical metrics: {e}")
1390
- raise HTTPException(status_code=500, detail=str(e))
1391
 
1392
- @app.get("/monitor/alerts")
1393
- async def get_alerts():
1394
- """Get active alerts and statistics"""
1395
- try:
1396
- return {
1397
- "active_alerts": [asdict(alert) for alert in alert_system.get_active_alerts()],
1398
- "alert_statistics": alert_system.get_alert_statistics()
1399
- }
1400
- except Exception as e:
1401
- logger.error(f"Failed to get alerts: {e}")
1402
- raise HTTPException(status_code=500, detail=str(e))
1403
 
1404
- @app.get("/monitor/health")
1405
- async def get_monitoring_health():
1406
- """Get monitoring system health"""
1407
- try:
1408
- dashboard_data = metrics_collector.get_real_time_dashboard_data()
1409
- confidence_analysis = prediction_monitor.get_confidence_analysis()
1410
-
1411
- return {
1412
- "monitoring_status": "active",
1413
- "dashboard_data": dashboard_data,
1414
- "confidence_analysis": confidence_analysis,
1415
- "total_predictions": prediction_monitor.total_predictions
1416
- }
1417
- except Exception as e:
1418
- logger.error(f"Failed to get monitoring health: {e}")
1419
- raise HTTPException(status_code=500, detail=str(e))
1420
 
1421
- @app.get("/monitor/patterns")
1422
- async def get_prediction_patterns(hours: int = 24):
1423
- """Get prediction patterns and anomaly analysis"""
1424
- try:
1425
- return prediction_monitor.get_prediction_patterns(hours)
1426
- except Exception as e:
1427
- logger.error(f"Failed to get prediction patterns: {e}")
1428
- raise HTTPException(status_code=500, detail=str(e))
1429
-
1430
- @app.post("/monitor/alerts/{alert_id}/acknowledge")
1431
- async def acknowledge_alert(alert_id: str):
1432
- """Acknowledge an alert"""
1433
- try:
1434
- success = alert_system.acknowledge_alert(alert_id, "api_user")
1435
- if success:
1436
- return {"message": f"Alert {alert_id} acknowledged"}
1437
- else:
1438
- raise HTTPException(status_code=404, detail="Alert not found")
1439
- except HTTPException:
1440
- raise
1441
- except Exception as e:
1442
- logger.error(f"Failed to acknowledge alert: {e}")
1443
- raise HTTPException(status_code=500, detail=str(e))
1444
-
1445
- @app.post("/monitor/alerts/{alert_id}/resolve")
1446
- async def resolve_alert(alert_id: str, resolution_note: str = ""):
1447
- """Resolve an alert"""
1448
- try:
1449
- success = alert_system.resolve_alert(alert_id, "api_user", resolution_note)
1450
- if success:
1451
- return {"message": f"Alert {alert_id} resolved"}
1452
- else:
1453
- raise HTTPException(status_code=404, detail="Alert not found")
1454
- except HTTPException:
1455
- raise
1456
- except Exception as e:
1457
- logger.error(f"Failed to resolve alert: {e}")
1458
- raise HTTPException(status_code=500, detail=str(e))
1459
-
1460
-
1461
- @app.get("/automation/status")
1462
- async def get_automation_status():
1463
- """Get automation system status"""
1464
- try:
1465
- if automation_manager is None:
1466
- raise HTTPException(status_code=503, detail="Automation system not available")
1467
-
1468
- # Get automation status
1469
- automation_status = automation_manager.get_automation_status()
1470
-
1471
- # Get drift monitoring status
1472
- drift_status = automation_manager.drift_monitor.get_automation_status()
1473
-
1474
- return {
1475
- "timestamp": datetime.now().isoformat(),
1476
- "automation_system": automation_status,
1477
- "drift_monitoring": drift_status,
1478
- "system_health": "active" if automation_manager.retraining_active else "inactive"
1479
- }
1480
-
1481
- except Exception as e:
1482
- logger.error(f"Failed to get automation status: {e}")
1483
- raise HTTPException(status_code=500, detail=str(e))
1484
-
1485
- @app.get("/automation/triggers/check")
1486
- async def check_retraining_triggers():
1487
- """Check current retraining triggers"""
1488
- try:
1489
- if automation_manager is None:
1490
- raise HTTPException(status_code=503, detail="Automation system not available")
1491
-
1492
- trigger_results = automation_manager.drift_monitor.check_retraining_triggers()
1493
-
1494
- return {
1495
- "timestamp": datetime.now().isoformat(),
1496
- "trigger_evaluation": trigger_results,
1497
- "recommendation": "Retraining recommended" if trigger_results.get('should_retrain') else "No retraining needed"
1498
- }
1499
-
1500
- except Exception as e:
1501
- logger.error(f"Failed to check triggers: {e}")
1502
- raise HTTPException(status_code=500, detail=str(e))
1503
-
1504
- @app.post("/automation/retrain/trigger")
1505
- async def trigger_manual_retraining(reason: str = "manual_api_trigger"):
1506
- """Manually trigger retraining"""
1507
  try:
1508
- if automation_manager is None:
1509
- raise HTTPException(status_code=503, detail="Automation system not available")
1510
-
1511
- result = automation_manager.trigger_manual_retraining(reason)
1512
-
1513
- if result['success']:
1514
  return {
1515
- "message": "Retraining triggered successfully",
1516
- "timestamp": datetime.now().isoformat(),
1517
- "reason": reason
 
1518
  }
1519
- else:
1520
- raise HTTPException(status_code=500, detail=result.get('error', 'Unknown error'))
1521
-
1522
- except HTTPException:
1523
- raise
1524
- except Exception as e:
1525
- logger.error(f"Failed to trigger retraining: {e}")
1526
- raise HTTPException(status_code=500, detail=str(e))
1527
-
1528
- @app.get("/automation/queue")
1529
- async def get_retraining_queue():
1530
- """Get current retraining queue"""
1531
- try:
1532
- if automation_manager is None:
1533
- raise HTTPException(status_code=503, detail="Automation system not available")
1534
-
1535
- queue = automation_manager.load_retraining_queue()
1536
- recent_logs = automation_manager.get_recent_automation_logs(hours=24)
1537
-
1538
- return {
1539
- "timestamp": datetime.now().isoformat(),
1540
- "queued_jobs": queue,
1541
- "recent_activity": recent_logs,
1542
- "queue_length": len(queue)
1543
- }
1544
-
1545
- except Exception as e:
1546
- logger.error(f"Failed to get retraining queue: {e}")
1547
- raise HTTPException(status_code=500, detail=str(e))
1548
 
1549
- @app.get("/automation/drift/status")
1550
- async def get_drift_monitoring_status():
1551
- """Get drift monitoring status"""
1552
- try:
1553
- if automation_manager is None:
1554
- raise HTTPException(status_code=503, detail="Automation system not available")
1555
-
1556
- # Get recent drift results
1557
- drift_logs = automation_manager.get_recent_automation_logs(hours=48)
1558
- drift_checks = [log for log in drift_logs if 'drift' in log.get('event', '')]
1559
-
1560
- # Get current drift status
1561
- drift_status = automation_manager.drift_monitor.get_automation_status()
1562
-
1563
- return {
1564
- "timestamp": datetime.now().isoformat(),
1565
- "drift_monitoring_active": True,
1566
- "recent_drift_checks": drift_checks[-10:], # Last 10 checks
1567
- "drift_status": drift_status
1568
  }
1569
-
1570
- except Exception as e:
1571
- logger.error(f"Failed to get drift status: {e}")
1572
- raise HTTPException(status_code=500, detail=str(e))
1573
 
1574
- @app.post("/automation/settings/update")
1575
- async def update_automation_settings(settings: Dict[str, Any]):
1576
- """Update automation settings"""
1577
- try:
1578
- if automation_manager is None:
1579
- raise HTTPException(status_code=503, detail="Automation system not available")
1580
-
1581
- # Update settings
1582
- automation_manager.automation_config.update(settings)
1583
- automation_manager.save_automation_config()
1584
-
1585
- return {
1586
- "message": "Automation settings updated",
1587
- "timestamp": datetime.now().isoformat(),
1588
- "updated_settings": settings
1589
- }
1590
-
1591
- except Exception as e:
1592
- logger.error(f"Failed to update automation settings: {e}")
1593
- raise HTTPException(status_code=500, detail=str(e))
1594
-
1595
-
1596
- # Deployment endpoints
1597
- @app.get("/deployment/status")
1598
- async def get_deployment_status():
1599
- """Get deployment system status"""
1600
- try:
1601
- if not deployment_manager:
1602
- raise HTTPException(status_code=503, detail="Deployment system not available")
1603
-
1604
- return deployment_manager.get_deployment_status()
1605
-
1606
- except Exception as e:
1607
- logger.error(f"Failed to get deployment status: {e}")
1608
- raise HTTPException(status_code=500, detail=str(e))
1609
-
1610
- @app.post("/deployment/prepare")
1611
- async def prepare_deployment(target_version: str, strategy: str = "blue_green"):
1612
- """Prepare a new deployment"""
1613
- try:
1614
- if not deployment_manager:
1615
- raise HTTPException(status_code=503, detail="Deployment system not available")
1616
-
1617
- deployment_id = deployment_manager.prepare_deployment(target_version, strategy)
1618
-
1619
- return {
1620
- "message": "Deployment prepared",
1621
- "deployment_id": deployment_id,
1622
- "target_version": target_version,
1623
- "strategy": strategy
1624
- }
1625
-
1626
- except Exception as e:
1627
- logger.error(f"Failed to prepare deployment: {e}")
1628
- raise HTTPException(status_code=500, detail=str(e))
1629
-
1630
- @app.post("/deployment/start/{deployment_id}")
1631
- async def start_deployment(deployment_id: str):
1632
- """Start a prepared deployment"""
1633
- try:
1634
- if not deployment_manager:
1635
- raise HTTPException(status_code=503, detail="Deployment system not available")
1636
-
1637
- success = deployment_manager.start_deployment(deployment_id)
1638
-
1639
- if success:
1640
- return {"message": "Deployment started successfully", "deployment_id": deployment_id}
1641
- else:
1642
- raise HTTPException(status_code=500, detail="Deployment failed to start")
1643
-
1644
- except Exception as e:
1645
- logger.error(f"Failed to start deployment: {e}")
1646
- raise HTTPException(status_code=500, detail=str(e))
1647
-
1648
- @app.post("/deployment/rollback")
1649
- async def rollback_deployment(reason: str = "Manual rollback"):
1650
- """Rollback current deployment"""
1651
- try:
1652
- if not deployment_manager:
1653
- raise HTTPException(status_code=503, detail="Deployment system not available")
1654
-
1655
- success = deployment_manager.initiate_rollback(reason)
1656
-
1657
- if success:
1658
- return {"message": "Rollback initiated successfully", "reason": reason}
1659
- else:
1660
- raise HTTPException(status_code=500, detail="Rollback failed")
1661
 
1662
- except Exception as e:
1663
- logger.error(f"Failed to rollback deployment: {e}")
1664
- raise HTTPException(status_code=500, detail=str(e))
1665
-
1666
- @app.get("/deployment/traffic")
1667
- async def get_traffic_status():
1668
- """Get traffic routing status"""
1669
- try:
1670
- if not traffic_router:
1671
- raise HTTPException(status_code=503, detail="Traffic router not available")
1672
-
1673
- return traffic_router.get_routing_status()
1674
-
1675
- except Exception as e:
1676
- logger.error(f"Failed to get traffic status: {e}")
1677
- raise HTTPException(status_code=500, detail=str(e))
1678
-
1679
- @app.post("/deployment/traffic/weights")
1680
- async def set_traffic_weights(blue_weight: int, green_weight: int):
1681
- """Set traffic routing weights"""
1682
- try:
1683
- if not traffic_router:
1684
- raise HTTPException(status_code=503, detail="Traffic router not available")
1685
-
1686
- success = traffic_router.set_routing_weights(blue_weight, green_weight)
1687
-
1688
- if success:
1689
- return {
1690
- "message": "Traffic weights updated",
1691
- "blue_weight": blue_weight,
1692
- "green_weight": green_weight
1693
  }
1694
- else:
1695
- raise HTTPException(status_code=500, detail="Failed to update traffic weights")
1696
-
1697
- except Exception as e:
1698
- logger.error(f"Failed to set traffic weights: {e}")
1699
- raise HTTPException(status_code=500, detail=str(e))
1700
 
1701
- @app.get("/deployment/performance")
1702
- async def get_deployment_performance(window_minutes: int = 60):
1703
- """Get deployment performance comparison"""
1704
- try:
1705
- if not traffic_router:
1706
- raise HTTPException(status_code=503, detail="Traffic router not available")
1707
-
1708
- return traffic_router.compare_environment_performance(window_minutes)
1709
-
1710
- except Exception as e:
1711
- logger.error(f"Failed to get deployment performance: {e}")
1712
- raise HTTPException(status_code=500, detail=str(e))
1713
 
1714
- @app.get("/registry/models")
1715
- async def list_registry_models(status: str = None, limit: int = 10):
1716
- """List models in registry"""
1717
- try:
1718
- if not model_registry:
1719
- raise HTTPException(status_code=503, detail="Model registry not available")
1720
-
1721
- models = model_registry.list_models(status=status, limit=limit)
1722
- return {"models": [asdict(model) for model in models]}
1723
-
1724
  except Exception as e:
1725
- logger.error(f"Failed to list registry models: {e}")
1726
- raise HTTPException(status_code=500, detail=str(e))
1727
-
1728
- @app.get("/registry/stats")
1729
- async def get_registry_stats():
1730
- """Get model registry statistics"""
1731
- try:
1732
- if not model_registry:
1733
- raise HTTPException(status_code=503, detail="Model registry not available")
1734
-
1735
- return model_registry.get_registry_stats()
1736
-
1737
- except Exception as e:
1738
- logger.error(f"Failed to get registry stats: {e}")
1739
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ # Enhanced app/fastapi_server.py with LightGBM ensemble support
2
+
3
  import json
4
  import time
5
  import joblib
 
26
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
27
  from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
28
 
29
+ # LightGBM availability check
30
+ try:
31
+ import lightgbm as lgb
32
+ LIGHTGBM_AVAILABLE = True
33
+ except ImportError:
34
+ LIGHTGBM_AVAILABLE = False
35
+
36
  from data.data_validator import (
37
  DataValidationPipeline, validate_text, validate_articles_list,
38
  get_validation_stats, generate_quality_report
 
48
  from deployment.model_registry import ModelRegistry
49
  from deployment.blue_green_manager import BlueGreenDeploymentManager
50
 
51
+ # Import the path manager
 
52
  try:
53
  from path_config import path_manager
54
  except ImportError:
 
55
  import sys
56
  import os
57
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 
60
  # Configure logging with fallback for permission issues
61
  def setup_logging():
62
  """Setup logging with fallback for environments with restricted file access"""
63
+ handlers = [logging.StreamHandler()]
64
 
65
  try:
 
66
  log_file_path = path_manager.get_logs_path('fastapi_server.log')
67
  log_file_path.parent.mkdir(parents=True, exist_ok=True)
68
 
 
69
  test_handler = logging.FileHandler(log_file_path)
70
  test_handler.close()
71
 
 
72
  handlers.append(logging.FileHandler(log_file_path))
73
+ print(f"Logging to file: {log_file_path}")
74
 
75
  except (PermissionError, OSError) as e:
 
76
  print(f"Cannot create log file, using console only: {e}")
77
 
 
78
  try:
79
  import tempfile
80
  temp_log = tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False, prefix='fastapi_')
 
86
 
87
  return handlers
88
 
89
+ # Setup logging
90
  logging.basicConfig(
91
  level=logging.INFO,
92
  format='%(asctime)s - %(levelname)s - %(message)s',
 
94
  )
95
  logger = logging.getLogger(__name__)
96
 
97
+ # Log environment info
98
  try:
99
  path_manager.log_environment_info()
100
  except Exception as e:
 
107
  rate_limit_storage = defaultdict(list)
108
 
109
 
110
+ class EnhancedModelManager:
111
+ """Enhanced model manager with LightGBM ensemble support"""
112
 
113
  def __init__(self):
114
  self.model = None
115
  self.vectorizer = None
116
  self.pipeline = None
117
+ self.ensemble = None
118
  self.model_metadata = {}
119
+ self.ensemble_metadata = {}
120
  self.last_health_check = None
121
  self.health_status = "unknown"
122
+ self.model_type = "unknown"
123
+ self.is_ensemble = False
124
  self.load_model()
125
 
126
  def load_model(self):
127
+ """Load model with comprehensive error handling and ensemble support"""
128
  try:
129
+ logger.info("Loading ML model with ensemble support...")
130
 
131
  # Initialize all to None first
132
  self.model = None
133
  self.vectorizer = None
134
  self.pipeline = None
135
+ self.ensemble = None
136
+ self.is_ensemble = False
137
 
138
+ # Check for ensemble model first
139
+ ensemble_path = Path("/tmp/ensemble.pkl")
140
+ ensemble_metadata_path = Path("/tmp/ensemble_metadata.json")
141
 
142
+ if ensemble_path.exists():
143
  try:
144
+ self.ensemble = joblib.load(ensemble_path)
145
+ self.pipeline = self.ensemble # Use ensemble as pipeline
146
+ self.model_type = "ensemble"
147
+ self.is_ensemble = True
148
+
149
+ # Load ensemble metadata
150
+ if ensemble_metadata_path.exists():
151
+ with open(ensemble_metadata_path, 'r') as f:
152
+ self.ensemble_metadata = json.load(f)
153
+ logger.info(f"Loaded ensemble metadata: {self.ensemble_metadata.get('ensemble_type', 'unknown')}")
154
+
155
+ logger.info("Loaded ensemble model successfully")
156
+ logger.info(f"Ensemble type: {self.ensemble_metadata.get('ensemble_type', 'voting_classifier')}")
157
+ logger.info(f"Component models: {self.ensemble_metadata.get('component_models', [])}")
158
+
159
  except Exception as e:
160
+ logger.warning(f"Failed to load ensemble model: {e}, falling back to individual pipeline")
161
+ self.ensemble = None
162
+
163
+ # Try to load pipeline if ensemble not available
164
+ if self.pipeline is None:
165
+ pipeline_path = path_manager.get_pipeline_path()
166
+ logger.info(f"Checking for pipeline at: {pipeline_path}")
167
+
168
+ if pipeline_path.exists():
169
+ try:
170
+ self.pipeline = joblib.load(pipeline_path)
171
+ # Extract components from pipeline
172
+ if hasattr(self.pipeline, 'named_steps'):
173
+ self.model = self.pipeline.named_steps.get('model')
174
+ self.vectorizer = (self.pipeline.named_steps.get('vectorizer') or
175
+ self.pipeline.named_steps.get('vectorize'))
176
+
177
+ # Check if this is actually an ensemble pipeline
178
+ if 'ensemble' in self.pipeline.named_steps:
179
+ self.model_type = "ensemble_pipeline"
180
+ self.is_ensemble = True
181
+ logger.info("Detected ensemble within pipeline")
182
+
183
+ logger.info("Loaded model pipeline successfully")
184
+ logger.info(f"Pipeline steps: {list(self.pipeline.named_steps.keys()) if hasattr(self.pipeline, 'named_steps') else 'No named_steps'}")
185
+ except Exception as e:
186
+ logger.warning(f"Failed to load pipeline: {e}, falling back to individual components")
187
+ self.pipeline = None
188
 
189
+ # If pipeline loading failed, load individual components
190
  if self.pipeline is None:
191
  model_path = path_manager.get_model_file_path()
192
  vectorizer_path = path_manager.get_vectorizer_path()
 
198
  try:
199
  self.model = joblib.load(model_path)
200
  self.vectorizer = joblib.load(vectorizer_path)
201
+ self.model_type = "individual_components"
202
  logger.info("Loaded model components successfully")
203
  except Exception as e:
204
  logger.error(f"Failed to load individual components: {e}")
205
  raise e
206
  else:
207
+ raise FileNotFoundError(f"No model files found")
 
 
 
 
208
 
209
  # Load metadata
210
  metadata_path = path_manager.get_metadata_path()
211
  if metadata_path.exists():
212
  with open(metadata_path, 'r') as f:
213
  self.model_metadata = json.load(f)
214
+
215
+ # Update model type and ensemble status from metadata
216
+ if self.model_metadata.get('is_ensemble', False):
217
+ self.is_ensemble = True
218
+ if not self.model_type.startswith('ensemble'):
219
+ self.model_type = "ensemble_from_metadata"
220
+
221
  logger.info(f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
222
+ logger.info(f"Model type from metadata: {self.model_metadata.get('model_type', 'unknown')}")
223
+ logger.info(f"Is ensemble: {self.is_ensemble}")
224
+
225
+ if self.is_ensemble and 'ensemble_details' in self.model_metadata:
226
+ ensemble_details = self.model_metadata['ensemble_details']
227
+ logger.info(f"Ensemble details: {ensemble_details}")
228
  else:
229
  logger.warning(f"Metadata file not found at: {metadata_path}")
230
  self.model_metadata = {"model_version": "unknown"}
231
 
232
+ # Verify we have what we need for predictions
233
+ if self.pipeline is None and (self.model is None or self.vectorizer is None):
234
+ raise ValueError("Neither complete pipeline nor individual model components are available")
235
+
236
  self.health_status = "healthy"
237
  self.last_health_check = datetime.now()
238
 
239
  # Log what was successfully loaded
240
  logger.info(f"Model loading summary:")
241
  logger.info(f" Pipeline available: {self.pipeline is not None}")
242
+ logger.info(f" Individual model available: {self.model is not None}")
243
  logger.info(f" Vectorizer available: {self.vectorizer is not None}")
244
+ logger.info(f" Ensemble available: {self.ensemble is not None}")
245
+ logger.info(f" Model type: {self.model_type}")
246
+ logger.info(f" Is ensemble: {self.is_ensemble}")
247
 
248
  except Exception as e:
249
  logger.error(f"Failed to load model: {e}")
 
252
  self.model = None
253
  self.vectorizer = None
254
  self.pipeline = None
255
+ self.ensemble = None
256
 
257
  def predict(self, text: str) -> tuple[str, float]:
258
+ """Make prediction with enhanced ensemble support"""
259
  try:
260
  if self.pipeline:
261
+ # Use pipeline for prediction (works for both ensemble and individual models)
262
  prediction = self.pipeline.predict([text])[0]
263
  probabilities = self.pipeline.predict_proba([text])[0]
264
+
265
+ if self.is_ensemble:
266
+ logger.debug("Used ensemble pipeline for prediction")
267
+ else:
268
+ logger.debug("Used individual model pipeline for prediction")
269
+
270
  elif self.model and self.vectorizer:
271
  # Use individual components
272
  X = self.vectorizer.transform([text])
 
293
  )
294
 
295
  def health_check(self) -> Dict[str, Any]:
296
+ """Perform health check with ensemble information"""
297
  try:
298
  # Test prediction with sample text
299
  test_text = "This is a test article for health check purposes."
 
302
  self.health_status = "healthy"
303
  self.last_health_check = datetime.now()
304
 
305
+ health_info = {
306
  "status": "healthy",
307
  "last_check": self.last_health_check.isoformat(),
308
  "model_available": self.model is not None,
309
  "vectorizer_available": self.vectorizer is not None,
310
  "pipeline_available": self.pipeline is not None,
311
+ "ensemble_available": self.ensemble is not None,
312
+ "model_type": self.model_type,
313
+ "is_ensemble": self.is_ensemble,
314
  "test_prediction": {"label": label, "confidence": confidence},
315
  "environment": path_manager.environment,
316
+ "lightgbm_available": LIGHTGBM_AVAILABLE,
317
+ "model_paths": {
318
+ "pipeline": str(path_manager.get_pipeline_path()),
319
+ "ensemble": "/tmp/ensemble.pkl",
320
+ "model": str(path_manager.get_model_file_path()),
321
+ "vectorizer": str(path_manager.get_vectorizer_path())
322
+ },
323
  "file_exists": {
324
+ "pipeline": path_manager.get_pipeline_path().exists(),
325
+ "ensemble": Path("/tmp/ensemble.pkl").exists(),
326
  "model": path_manager.get_model_file_path().exists(),
327
  "vectorizer": path_manager.get_vectorizer_path().exists(),
328
+ "metadata": path_manager.get_metadata_path().exists(),
329
+ "ensemble_metadata": Path("/tmp/ensemble_metadata.json").exists()
330
  }
331
  }
332
 
333
+ # Add ensemble-specific information
334
+ if self.is_ensemble:
335
+ health_info["ensemble_info"] = {
336
+ "ensemble_type": self.ensemble_metadata.get('ensemble_type', 'unknown'),
337
+ "component_models": self.ensemble_metadata.get('component_models', []),
338
+ "voting_type": self.model_metadata.get('ensemble_details', {}).get('voting_type', 'unknown')
339
+ }
340
+
341
+ return health_info
342
+
343
  except Exception as e:
344
  self.health_status = "unhealthy"
345
  self.last_health_check = datetime.now()
 
351
  "model_available": self.model is not None,
352
  "vectorizer_available": self.vectorizer is not None,
353
  "pipeline_available": self.pipeline is not None,
354
+ "ensemble_available": self.ensemble is not None,
355
+ "model_type": self.model_type,
356
+ "is_ensemble": self.is_ensemble,
357
  "environment": path_manager.environment,
358
+ "lightgbm_available": LIGHTGBM_AVAILABLE
 
 
 
 
 
 
 
 
 
359
  }
360
 
361
 
362
+ # Background task functions remain the same...
363
  async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
364
  """Log prediction details with error handling for file access"""
365
  try:
 
370
  "prediction": prediction,
371
  "confidence": confidence,
372
  "processing_time": processing_time,
373
+ "text_hash": hashlib.md5(text.encode()).hexdigest(),
374
+ "model_type": model_manager.model_type,
375
+ "is_ensemble": model_manager.is_ensemble
376
  }
377
 
378
  # Try to save to log file
 
401
  await f.write(json.dumps(logs, indent=2))
402
 
403
  except (PermissionError, OSError) as e:
 
404
  logger.warning(f"Cannot write prediction log to file: {e}")
405
  logger.info(f"Prediction logged: {json.dumps(log_entry)}")
406
 
 
408
  logger.error(f"Failed to log prediction: {e}")
409
 
410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  # Global variables
412
+ model_manager = EnhancedModelManager()
413
 
414
  # Initialize automation manager
415
  automation_manager = None
 
419
  traffic_router = None
420
  model_registry = None
421
 
 
422
  @asynccontextmanager
423
  async def lifespan(app: FastAPI):
424
+ """Manage application lifespan with enhanced model support"""
425
  global deployment_manager, traffic_router, model_registry
426
 
427
+ logger.info("Starting Enhanced FastAPI application with ensemble support...")
428
 
429
  # Startup tasks
430
  model_manager.load_model()
431
 
432
+ # Log model information
433
+ logger.info(f"Model loaded: {model_manager.model_type}")
434
+ logger.info(f"Ensemble support: {model_manager.is_ensemble}")
435
+ logger.info(f"LightGBM available: {LIGHTGBM_AVAILABLE}")
436
+
437
  # Initialize deployment components
438
  try:
439
  deployment_manager = BlueGreenDeploymentManager()
 
443
  except Exception as e:
444
  logger.error(f"Failed to initialize deployment system: {e}")
445
 
446
+ # Initialize monitoring
447
+ try:
448
+ prediction_monitor = PredictionMonitor(base_dir=Path("/tmp"))
449
+ metrics_collector = MetricsCollector(base_dir=Path("/tmp"))
450
+ alert_system = AlertSystem(base_dir=Path("/tmp"))
451
+
452
+ prediction_monitor.start_monitoring()
453
+ alert_system.add_notification_handler("console", console_notification_handler)
454
+ logger.info("Monitoring system initialized")
455
+ except Exception as e:
456
+ logger.error(f"Failed to initialize monitoring: {e}")
457
 
458
  yield
459
 
460
  # Shutdown tasks
461
+ logger.info("Shutting down Enhanced FastAPI application...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  # Create FastAPI app
464
  app = FastAPI(
465
+ title="Enhanced Fake News Detection API with Ensemble Support",
466
+ description="Production-ready API for fake news detection with LightGBM ensemble support and comprehensive monitoring",
467
+ version="2.1.0",
468
  docs_url="/docs",
469
  redoc_url="/redoc",
470
  lifespan=lifespan
471
  )
472
 
473
+ # Add middleware (same as before)
474
  app.add_middleware(
475
  CORSMiddleware,
476
+ allow_origins=["*"],
477
  allow_credentials=True,
478
  allow_methods=["*"],
479
  allow_headers=["*"],
 
481
 
482
  app.add_middleware(
483
  TrustedHostMiddleware,
484
+ allowed_hosts=["*"]
485
  )
486
 
487
+ # Enhanced prediction response model
488
+ class EnhancedPredictionResponse(BaseModel):
489
+ prediction: str = Field(..., description="Prediction result: 'Real' or 'Fake'")
490
+ confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score between 0 and 1")
491
+ model_version: str = Field(..., description="Version of the model used for prediction")
492
+ model_type: str = Field(..., description="Type of model: individual, ensemble, etc.")
493
+ is_ensemble: bool = Field(..., description="Whether an ensemble model was used")
494
+ ensemble_info: Optional[Dict[str, Any]] = Field(None, description="Ensemble-specific information")
495
+ timestamp: str = Field(..., description="Timestamp of the prediction")
496
+ processing_time: float = Field(..., description="Time taken for processing in seconds")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
+ # Enhanced health response model
499
+ class EnhancedHealthResponse(BaseModel):
500
+ status: str
501
+ timestamp: str
502
+ model_health: Dict[str, Any]
503
+ system_health: Dict[str, Any]
504
+ api_health: Dict[str, Any]
505
+ environment_info: Dict[str, Any]
506
+ ensemble_info: Optional[Dict[str, Any]] = None
507
 
508
+ # Request models remain the same...
509
  class PredictionRequest(BaseModel):
510
  text: str = Field(..., min_length=1, max_length=10000,
511
  description="Text to analyze for fake news detection")
 
514
  def validate_text(cls, v):
515
  if not v or not v.strip():
516
  raise ValueError('Text cannot be empty')
 
 
517
  if len(v.strip()) < 10:
518
  raise ValueError('Text must be at least 10 characters long')
 
 
519
  suspicious_patterns = ['<script', 'javascript:', 'data:']
520
  if any(pattern in v.lower() for pattern in suspicious_patterns):
521
  raise ValueError('Text contains suspicious content')
 
522
  return v.strip()
523
 
524
 
525
+ # Rate limiting and error handlers remain the same...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  async def rate_limit_check(request: Request):
527
  """Check rate limits"""
528
  client_ip = request.client.host
 
531
  # Clean old entries
532
  rate_limit_storage[client_ip] = [
533
  timestamp for timestamp in rate_limit_storage[client_ip]
534
+ if current_time - timestamp < 3600
535
  ]
536
 
537
  # Check rate limit (100 requests per hour)
 
545
  rate_limit_storage[client_ip].append(current_time)
546
 
547
 
 
548
  @app.middleware("http")
549
  async def log_requests(request: Request, call_next):
550
+ """Log all requests with ensemble information"""
551
  start_time = time.time()
 
552
  response = await call_next(request)
 
553
  process_time = time.time() - start_time
554
 
555
  log_data = {
 
558
  "client_ip": request.client.host,
559
  "status_code": response.status_code,
560
  "process_time": process_time,
561
+ "timestamp": datetime.now().isoformat(),
562
+ "model_type": model_manager.model_type,
563
+ "is_ensemble": model_manager.is_ensemble
564
  }
565
 
566
  logger.info(f"Request: {json.dumps(log_data)}")
 
567
  return response
568
 
569
 
570
+ # Enhanced API Routes
571
+ @app.get("/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  async def root():
573
+ """Root endpoint with ensemble information"""
574
  return {
575
+ "message": "Enhanced Fake News Detection API with Ensemble Support",
576
+ "version": "2.1.0",
577
  "environment": path_manager.environment,
578
+ "model_type": model_manager.model_type,
579
+ "ensemble_support": model_manager.is_ensemble,
580
+ "lightgbm_available": LIGHTGBM_AVAILABLE,
581
  "documentation": "/docs",
582
  "health_check": "/health"
583
  }
584
 
585
 
586
+ @app.post("/predict", response_model=EnhancedPredictionResponse)
587
  async def predict(
588
  request: PredictionRequest,
589
  background_tasks: BackgroundTasks,
590
  http_request: Request,
591
  _: None = Depends(rate_limit_check)
592
+ ):
593
  """
594
+ Enhanced prediction with ensemble model support
595
  - **text**: The news article text to analyze
596
+ - **returns**: Enhanced prediction result with ensemble information
597
  """
598
  start_time = time.time()
599
  client_ip = http_request.client.host
 
607
  detail="Model is not available. Please try again later."
608
  )
609
 
610
+ # Make prediction using enhanced model manager
611
+ label, confidence = model_manager.predict(request.text)
612
+ processing_time = time.time() - start_time
613
+
614
+ # Prepare ensemble information
615
+ ensemble_info = None
616
+ if model_manager.is_ensemble:
617
+ ensemble_info = {
618
+ "ensemble_type": model_manager.ensemble_metadata.get('ensemble_type', 'unknown'),
619
+ "component_models": model_manager.ensemble_metadata.get('component_models', []),
620
+ "voting_type": model_manager.model_metadata.get('ensemble_details', {}).get('voting_type', 'soft')
621
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
  # Record prediction for monitoring
624
+ if 'prediction_monitor' in globals():
625
+ prediction_monitor.record_prediction(
626
+ prediction=label,
627
+ confidence=confidence,
628
+ processing_time=processing_time,
629
+ text=request.text,
630
+ model_version=model_manager.model_metadata.get('model_version', 'unknown'),
631
+ client_id=client_ip,
632
+ user_agent=user_agent
633
+ )
634
 
635
  # Record API request metrics
636
+ if 'metrics_collector' in globals():
637
+ metrics_collector.record_api_request(
638
+ endpoint="/predict",
639
+ method="POST",
640
+ response_time=processing_time,
641
+ status_code=200,
642
+ client_ip=client_ip
643
+ )
644
 
645
+ # Create enhanced response
646
+ response = EnhancedPredictionResponse(
647
  prediction=label,
648
  confidence=confidence,
649
  model_version=model_manager.model_metadata.get('model_version', 'unknown'),
650
+ model_type=model_manager.model_type,
651
+ is_ensemble=model_manager.is_ensemble,
652
+ ensemble_info=ensemble_info,
653
  timestamp=datetime.now().isoformat(),
654
  processing_time=processing_time
655
  )
 
669
  except HTTPException:
670
  # Record error for failed requests
671
  processing_time = time.time() - start_time
672
+ if 'prediction_monitor' in globals():
673
+ prediction_monitor.record_error(
674
+ error_type="http_error",
675
+ error_message="Service unavailable",
676
+ context={"status_code": 503}
677
+ )
678
+ if 'metrics_collector' in globals():
679
+ metrics_collector.record_api_request(
680
+ endpoint="/predict",
681
+ method="POST",
682
+ response_time=processing_time,
683
+ status_code=503,
684
+ client_ip=client_ip
685
+ )
686
  raise
687
  except Exception as e:
688
  processing_time = time.time() - start_time
689
 
690
  # Record error
691
+ if 'prediction_monitor' in globals():
692
+ prediction_monitor.record_error(
693
+ error_type="prediction_error",
694
+ error_message=str(e),
695
+ context={"text_length": len(request.text)}
696
+ )
697
 
698
+ if 'metrics_collector' in globals():
699
+ metrics_collector.record_api_request(
700
+ endpoint="/predict",
701
+ method="POST",
702
+ response_time=processing_time,
703
+ status_code=500,
704
+ client_ip=client_ip
705
+ )
706
 
707
  logger.error(f"Prediction failed: {e}")
708
  raise HTTPException(
 
711
  )
712
 
713
 
714
+ @app.get("/health", response_model=EnhancedHealthResponse)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  async def health_check():
716
  """
717
+ Enhanced health check endpoint with ensemble information
718
+ - **returns**: Detailed health status including ensemble information
719
  """
720
  try:
721
  # Model health
 
738
 
739
  # Environment info
740
  environment_info = path_manager.get_environment_info()
741
+ environment_info["lightgbm_available"] = LIGHTGBM_AVAILABLE
742
+
743
+ # Ensemble information
744
+ ensemble_info = None
745
+ if model_manager.is_ensemble:
746
+ ensemble_info = {
747
+ "is_ensemble": True,
748
+ "ensemble_type": model_manager.ensemble_metadata.get('ensemble_type', 'unknown'),
749
+ "component_models": model_manager.ensemble_metadata.get('component_models', []),
750
+ "ensemble_health": model_health.get('ensemble_info', {}),
751
+ "ensemble_metadata_available": Path("/tmp/ensemble_metadata.json").exists()
752
+ }
753
 
754
  # Overall status
755
  overall_status = "healthy" if model_health["status"] == "healthy" else "unhealthy"
756
 
757
+ return EnhancedHealthResponse(
758
  status=overall_status,
759
  timestamp=datetime.now().isoformat(),
760
  model_health=model_health,
761
  system_health=system_health,
762
  api_health=api_health,
763
+ environment_info=environment_info,
764
+ ensemble_info=ensemble_info
765
  )
766
 
767
  except Exception as e:
768
  logger.error(f"Health check failed: {e}")
769
+ return EnhancedHealthResponse(
770
  status="unhealthy",
771
  timestamp=datetime.now().isoformat(),
772
  model_health={"status": "unhealthy", "error": str(e)},
773
  system_health={"error": str(e)},
774
  api_health={"error": str(e)},
775
+ environment_info={"error": str(e)},
776
+ ensemble_info={"error": str(e)} if model_manager.is_ensemble else None
777
  )
778
 
779
 
780
+ @app.get("/model/info")
781
+ async def get_model_info():
782
  """
783
+ Get detailed model information including ensemble details
784
+ - **returns**: Comprehensive model information
785
  """
786
  try:
787
+ model_info = {
788
+ "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
789
+ "model_type": model_manager.model_type,
790
+ "is_ensemble": model_manager.is_ensemble,
791
+ "lightgbm_available": LIGHTGBM_AVAILABLE,
792
+ "training_method": model_manager.model_metadata.get('training_method', 'unknown'),
793
+ "timestamp": model_manager.model_metadata.get('timestamp', 'unknown'),
794
+ "performance_metrics": {
795
+ "test_accuracy": model_manager.model_metadata.get('test_accuracy', 'unknown'),
796
+ "test_f1": model_manager.model_metadata.get('test_f1', 'unknown'),
797
+ "cv_f1_mean": model_manager.model_metadata.get('cv_f1_mean', 'unknown'),
798
+ "cv_f1_std": model_manager.model_metadata.get('cv_f1_std', 'unknown')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
  },
800
+ "feature_engineering": model_manager.model_metadata.get('feature_engineering', {}),
801
+ "training_config": model_manager.model_metadata.get('training_config', {})
802
  }
803
 
804
+ # Add ensemble-specific information
805
+ if model_manager.is_ensemble:
806
+ ensemble_details = model_manager.model_metadata.get('ensemble_details', {})
807
+ model_info["ensemble_details"] = {
808
+ "ensemble_type": ensemble_details.get('ensemble_type', 'unknown'),
809
+ "component_models": ensemble_details.get('component_models', []),
810
+ "voting_type": ensemble_details.get('voting_type', 'soft'),
811
+ "component_performance": model_manager.model_metadata.get('component_performance', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812
  }
813
+
814
+ # Add ensemble metadata if available
815
+ if model_manager.ensemble_metadata:
816
+ model_info["ensemble_metadata"] = model_manager.ensemble_metadata
 
 
 
 
 
 
 
 
817
 
818
+ return model_info
819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
  except Exception as e:
821
+ logger.error(f"Model info retrieval failed: {e}")
822
  raise HTTPException(
823
  status_code=500,
824
+ detail=f"Failed to retrieve model info: {str(e)}"
825
  )
 
826
 
827
+
828
+ @app.get("/model/performance")
829
+ async def get_model_performance():
830
  """
831
+ Get detailed model performance metrics including ensemble comparison
832
+ - **returns**: Performance metrics and comparisons
833
  """
834
  try:
835
+ performance_info = {
836
+ "current_model": {
837
+ "model_type": model_manager.model_type,
838
+ "is_ensemble": model_manager.is_ensemble,
839
+ "test_metrics": {
840
+ "accuracy": model_manager.model_metadata.get('test_accuracy', 'unknown'),
841
+ "f1": model_manager.model_metadata.get('test_f1', 'unknown'),
842
+ "precision": model_manager.model_metadata.get('test_precision', 'unknown'),
843
+ "recall": model_manager.model_metadata.get('test_recall', 'unknown'),
844
+ "roc_auc": model_manager.model_metadata.get('test_roc_auc', 'unknown')
845
+ },
846
+ "cross_validation": model_manager.model_metadata.get('cross_validation', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
  },
848
+ "training_info": {
849
+ "training_method": model_manager.model_metadata.get('training_method', 'unknown'),
850
+ "lightgbm_used": model_manager.model_metadata.get('lightgbm_used', False),
851
+ "enhanced_features": model_manager.model_metadata.get('feature_engineering', {}).get('enhanced_features_used', False)
 
852
  }
853
  }
854
 
855
+ # Add ensemble-specific performance information
856
+ if model_manager.is_ensemble:
857
+ component_performance = model_manager.model_metadata.get('component_performance', {})
858
+ if component_performance:
859
+ performance_info["component_comparison"] = component_performance
860
+
861
+ # Calculate ensemble advantage
862
+ ensemble_f1 = model_manager.model_metadata.get('test_f1', 0)
863
+ if isinstance(ensemble_f1, (int, float)):
864
+ best_individual_f1 = max([comp.get('f1', 0) for comp in component_performance.values()], default=0)
865
+ if best_individual_f1 > 0:
866
+ ensemble_advantage = ensemble_f1 - best_individual_f1
867
+ performance_info["ensemble_advantage"] = {
868
+ "f1_improvement": ensemble_advantage,
869
+ "relative_improvement": (ensemble_advantage / best_individual_f1) * 100 if best_individual_f1 > 0 else 0
870
+ }
871
 
872
+ return performance_info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  except Exception as e:
875
+ logger.error(f"Performance info retrieval failed: {e}")
876
  raise HTTPException(
877
  status_code=500,
878
+ detail=f"Failed to retrieve performance info: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
879
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
 
 
 
 
 
 
 
 
 
 
 
 
881
 
882
+ # Keep all other existing endpoints (cv/results, metrics, etc.) but enhance them with ensemble information where relevant
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883
 
884
+ @app.get("/ensemble/status")
885
+ async def get_ensemble_status():
886
+ """
887
+ Get ensemble-specific status information
888
+ - **returns**: Ensemble status and configuration
889
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  try:
891
+ if not model_manager.is_ensemble:
 
 
 
 
 
892
  return {
893
+ "ensemble_active": False,
894
+ "message": "Current model is not an ensemble",
895
+ "model_type": model_manager.model_type,
896
+ "lightgbm_available": LIGHTGBM_AVAILABLE
897
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
 
899
+ ensemble_status = {
900
+ "ensemble_active": True,
901
+ "ensemble_type": model_manager.ensemble_metadata.get('ensemble_type', 'unknown'),
902
+ "component_models": model_manager.ensemble_metadata.get('component_models', []),
903
+ "ensemble_health": model_manager.health_status,
904
+ "lightgbm_available": LIGHTGBM_AVAILABLE,
905
+ "lightgbm_used": 'lightgbm' in model_manager.ensemble_metadata.get('component_models', []),
906
+ "voting_type": model_manager.model_metadata.get('ensemble_details', {}).get('voting_type', 'unknown'),
907
+ "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
908
+ "training_timestamp": model_manager.model_metadata.get('timestamp', 'unknown')
 
 
 
 
 
 
 
 
 
909
  }
 
 
 
 
910
 
911
+ # Add performance comparison if available
912
+ component_performance = model_manager.model_metadata.get('component_performance', {})
913
+ if component_performance:
914
+ ensemble_status["component_performance"] = component_performance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
915
 
916
+ # Calculate which model would have been best individually
917
+ best_individual = max(component_performance.items(), key=lambda x: x[1].get('f1', 0), default=('none', {'f1': 0}))
918
+ ensemble_status["best_individual_model"] = {
919
+ "name": best_individual[0],
920
+ "f1_score": best_individual[1].get('f1', 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
  }
 
 
 
 
 
 
922
 
923
+ return ensemble_status
 
 
 
 
 
 
 
 
 
 
 
924
 
 
 
 
 
 
 
 
 
 
 
925
  except Exception as e:
926
+ logger.error(f"Ensemble status retrieval failed: {e}")
927
+ raise HTTPException(
928
+ status_code=500,
929
+ detail=f"Failed to retrieve ensemble status: {str(e)}"
930
+ )