diff --git "a/app/fastapi_server.py" "b/app/fastapi_server.py" --- "a/app/fastapi_server.py" +++ "b/app/fastapi_server.py" @@ -1,2045 +1,2282 @@ import os -import io import sys import json import time -import hashlib +import joblib import logging -import requests -import subprocess -import pandas as pd +import hashlib +import uvicorn +import asyncio +import aiofiles +import traceback import numpy as np -import altair as alt -import streamlit as st from pathlib import Path -import plotly.express as px -import plotly.graph_objects as go -from plotly.subplots import make_subplots +from typing import Optional +from dataclasses import asdict +from collections import defaultdict from datetime import datetime, timedelta -from typing import Dict, List, Optional, Any +from contextlib import asynccontextmanager +from typing import List, Dict, Optional, Any +from fastapi.responses import JSONResponse +from fastapi.openapi.utils import get_openapi +from pydantic import BaseModel, Field, validator +from fastapi.middleware.cors import CORSMiddleware +from fastapi.openapi.docs import get_swagger_ui_html +from fastapi.middleware.trustedhost import TrustedHostMiddleware +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status + +from data.data_validator import ( + DataValidationPipeline, validate_text, validate_articles_list, + get_validation_stats, generate_quality_report +) + +from model.retrain import AutomatedRetrainingManager +from monitor.metrics_collector import MetricsCollector +from monitor.prediction_monitor import PredictionMonitor +from monitor.alert_system import AlertSystem, console_notification_handler + +from deployment.traffic_router import TrafficRouter +from deployment.model_registry import ModelRegistry +from deployment.blue_green_manager import BlueGreenDeploymentManager + # Import the new path manager try: from path_config import path_manager except ImportError: + # Fallback for development environments sys.path.append(os.path.dirname(os.path.abspath(__file__))) - sys.path.append('/app') from path_config import path_manager -# Configure logging -def setup_streamlit_logging(): - """Setup logging with fallback for restricted file access""" +try: + from data.data_validator import DataValidator + from data.validation_schemas import TextQualityLevel + VALIDATION_AVAILABLE = True +except ImportError as e: + logger.warning(f"Data validation not available: {e}") + VALIDATION_AVAILABLE = False + +# Configure logging with fallback for permission issues +def setup_logging(): + """Setup logging with fallback for environments with restricted file access""" + handlers = [logging.StreamHandler()] # Always include console output + try: - log_file_path = path_manager.get_logs_path('streamlit_app.log') + # Try to create log file in the logs directory + log_file_path = path_manager.get_logs_path('fastapi_server.log') log_file_path.parent.mkdir(parents=True, exist_ok=True) - with open(log_file_path, 'a') as test_file: - test_file.write('') + # Test if we can write to the file + test_handler = logging.FileHandler(log_file_path) + test_handler.close() - logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - handlers=[ - logging.FileHandler(log_file_path), - logging.StreamHandler() - ] - ) - return True - except (PermissionError, OSError): - logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - handlers=[logging.StreamHandler()] - ) - return False + # If successful, add file handler + handlers.append(logging.FileHandler(log_file_path)) + print(f"Logging to file: {log_file_path}") # Use print instead of logger + + except (PermissionError, OSError) as e: + # If file logging fails, just use console logging + print(f"Cannot create log file, using console only: {e}") + + # Try alternative locations for file logging + try: + import tempfile + temp_log = tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False, prefix='fastapi_') + temp_log.close() + handlers.append(logging.FileHandler(temp_log.name)) + print(f"Using temporary log file: {temp_log.name}") + except Exception as temp_e: + print(f"Temporary file logging also failed: {temp_e}") + + return handlers -file_logging_enabled = setup_streamlit_logging() +# Setup logging with error handling +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=setup_logging() +) logger = logging.getLogger(__name__) -if not file_logging_enabled: - logger.warning("File logging disabled due to permission restrictions") +# Now that logger is defined, log the environment info +try: + path_manager.log_environment_info() +except Exception as e: + logger.warning(f"Could not log environment info: {e}") -logger.info(f"Streamlit starting in {path_manager.environment} environment") +# Security +security = HTTPBearer(auto_error=False) +# Rate limiting storage +rate_limit_storage = defaultdict(list) -# Page configuration - MUST be first Streamlit command -st.set_page_config( - page_title="Fake News Detection System | MLOps Dashboard", - page_icon="📰", - layout="wide", - initial_sidebar_state="expanded", - menu_items={ - 'Get Help': 'https://github.com/your-repo/issues', - 'Report a bug': 'https://github.com/your-repo/issues', - 'About': """ - # Advanced Fake News Detection System - - A production-grade MLOps pipeline with statistical rigor and CPU optimization. - - ## Key Features: - - Bootstrap confidence intervals - - Statistical significance testing - - Cross-validation with uncertainty quantification - - CPU-optimized for HuggingFace Spaces - - Built with FastAPI, Streamlit, and scikit-learn. - """ - } -) -class EnhancedStreamlitApp: - """Enhanced Streamlit application with professional UI/UX""" +class ModelManager: + """Manages model loading and health checks with dynamic paths""" def __init__(self): - self.setup_config() - self.setup_api_client() - self.initialize_session_state() - self.setup_custom_css() - - def setup_config(self): - """Setup application configuration""" - self.config = { - 'api_url': "http://localhost:8000", - 'max_upload_size': 1000 * 1024 * 1024, - 'supported_file_types': ['csv', 'txt', 'json'], - 'max_text_length': 10000, - 'prediction_timeout': 30, - 'refresh_interval': 60, - 'max_batch_size': 100, - 'chart_theme': 'plotly_white' - } - - def setup_api_client(self): - """Setup API client with comprehensive error handling""" - self.session = requests.Session() - self.session.timeout = self.config['prediction_timeout'] - - # Add retry logic with compatibility handling - try: - from requests.adapters import HTTPAdapter - from urllib3.util.retry import Retry - - # Try new parameter name first, fall back to old one - try: - retry_strategy = Retry( - total=3, - status_forcelist=[429, 500, 502, 503, 504], - allowed_methods=["HEAD", "GET", "OPTIONS"] - ) - except TypeError: - # Fallback for older urllib3 versions - retry_strategy = Retry( - total=3, - status_forcelist=[429, 500, 502, 503, 504], - method_whitelist=["HEAD", "GET", "OPTIONS"] - ) - - adapter = HTTPAdapter(max_retries=retry_strategy) - self.session.mount("http://", adapter) - self.session.mount("https://", adapter) - - except Exception as e: - logger.warning(f"Could not setup retry strategy: {e}") - - self.api_available = self.test_api_connection() - - def test_api_connection(self) -> bool: - """Test API connection with detailed status""" + self.model = None + self.vectorizer = None + self.pipeline = None + self.model_metadata = {} + self.last_health_check = None + self.health_status = "unknown" + self.load_model() + + def load_model(self): + """Load model with comprehensive error handling and dynamic paths""" try: - response = self.session.get(f"{self.config['api_url']}/health", timeout=5) - return response.status_code == 200 - except Exception as e: - logger.warning(f"API connection failed: {e}") - return False - - def initialize_session_state(self): - """Initialize comprehensive session state""" - default_states = { - 'prediction_history': [], - 'upload_history': [], - 'last_refresh': datetime.now(), - 'auto_refresh': False, - 'selected_model_version': 'current', - 'dashboard_theme': 'professional', - 'expanded_sections': set(), - 'user_preferences': { - 'show_confidence_intervals': True, - 'show_statistical_tests': True, - 'chart_style': 'modern', - 'auto_scroll': True - } - } - - for key, value in default_states.items(): - if key not in st.session_state: - st.session_state[key] = value + logger.info("Loading ML model...") - def setup_custom_css(self): - """Setup advanced custom CSS styling with dark grey and gold theme""" - st.markdown(""" - - """, unsafe_allow_html=True) + # Initialize all to None first + self.model = None + self.vectorizer = None + self.pipeline = None - # API Methods (enhanced versions of existing methods) - def get_cv_results_from_api(self) -> Optional[Dict]: - """Get cross-validation results with enhanced error handling""" - try: - if not self.api_available: - return None - - response = self.session.get(f"{self.config['api_url']}/cv/results", timeout=10) + # Try to load pipeline first (preferred) + pipeline_path = path_manager.get_pipeline_path() + logger.info(f"Checking for pipeline at: {pipeline_path}") - if response.status_code == 200: - data = response.json() - # Cache the results - st.session_state['cached_cv_results'] = { - 'data': data, - 'timestamp': datetime.now(), - 'ttl': 300 # 5 minutes - } - return data - elif response.status_code == 404: - return {'error': 'No CV results available'} + if pipeline_path.exists(): + try: + self.pipeline = joblib.load(pipeline_path) + # Extract components from pipeline + if hasattr(self.pipeline, 'named_steps'): + self.model = self.pipeline.named_steps.get('model') + self.vectorizer = (self.pipeline.named_steps.get('vectorizer') or + self.pipeline.named_steps.get('vectorize')) + logger.info("Loaded model pipeline successfully") + logger.info(f"Pipeline steps: {list(self.pipeline.named_steps.keys()) if hasattr(self.pipeline, 'named_steps') else 'No named_steps'}") + except Exception as e: + logger.warning(f"Failed to load pipeline: {e}, falling back to individual components") + self.pipeline = None else: - return {'error': f'API Error: {response.status_code}'} + logger.info(f"Pipeline file not found at {pipeline_path}") + + # If pipeline loading failed or doesn't exist, load individual components + if self.pipeline is None: + model_path = path_manager.get_model_file_path() + vectorizer_path = path_manager.get_vectorizer_path() + logger.info(f"Checking for model at: {model_path}") + logger.info(f"Checking for vectorizer at: {vectorizer_path}") + + if model_path.exists() and vectorizer_path.exists(): + try: + self.model = joblib.load(model_path) + self.vectorizer = joblib.load(vectorizer_path) + logger.info("Loaded model components successfully") + except Exception as e: + logger.error(f"Failed to load individual components: {e}") + raise e + else: + raise FileNotFoundError(f"No model files found. Checked:\n- {pipeline_path}\n- {model_path}\n- {vectorizer_path}") + + # Verify we have what we need for predictions + if self.pipeline is None and (self.model is None or self.vectorizer is None): + raise ValueError("Neither complete pipeline nor individual model components are available") + + # Load metadata + metadata_path = path_manager.get_metadata_path() + if metadata_path.exists(): + with open(metadata_path, 'r') as f: + self.model_metadata = json.load(f) + logger.info(f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}") + else: + logger.warning(f"Metadata file not found at: {metadata_path}") + self.model_metadata = {"model_version": "unknown"} + + self.health_status = "healthy" + self.last_health_check = datetime.now() + + # Log what was successfully loaded + logger.info(f"Model loading summary:") + logger.info(f" Pipeline available: {self.pipeline is not None}") + logger.info(f" Model available: {self.model is not None}") + logger.info(f" Vectorizer available: {self.vectorizer is not None}") + except Exception as e: - logger.warning(f"Could not fetch CV results: {e}") - # Return cached results if available - cached = st.session_state.get('cached_cv_results') - if cached and (datetime.now() - cached['timestamp']).seconds < cached['ttl']: - return cached['data'] - return None - - def get_monitoring_metrics_from_api(self) -> Optional[Dict]: - """Get real-time monitoring metrics""" + logger.error(f"Failed to load model: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + self.health_status = "unhealthy" + self.model = None + self.vectorizer = None + self.pipeline = None + + def predict(self, text: str) -> tuple[str, float]: + """Make prediction with error handling""" try: - if not self.api_available: - return None - - response = self.session.get(f"{self.config['api_url']}/monitor/metrics/current", timeout=10) - return response.json() if response.status_code == 200 else None - - except Exception as e: - logger.warning(f"Could not fetch monitoring metrics: {e}") - return None + if self.pipeline: + # Use pipeline for prediction + prediction = self.pipeline.predict([text])[0] + probabilities = self.pipeline.predict_proba([text])[0] + logger.debug("Used pipeline for prediction") + elif self.model and self.vectorizer: + # Use individual components + X = self.vectorizer.transform([text]) + prediction = self.model.predict(X)[0] + probabilities = self.model.predict_proba(X)[0] + logger.debug("Used individual components for prediction") + else: + raise ValueError("No model available for prediction") - def make_prediction_request(self, text: str) -> Dict[str, Any]: - """Enhanced prediction request with better error handling""" - try: - if not self.api_available: - return {'error': 'API is not available'} - - # Show loading state - with st.spinner('Analyzing text with statistical validation...'): - response = self.session.post( - f"{self.config['api_url']}/predict", - json={"text": text}, - timeout=self.config['prediction_timeout'] - ) + # Get confidence score + confidence = float(max(probabilities)) - if response.status_code == 200: - result = response.json() - # Add to prediction history - self.add_to_prediction_history(text, result) - return result - else: - return {'error': f'API Error: {response.status_code} - {response.text}'} + # Convert prediction to readable format + label = "Fake" if prediction == 1 else "Real" + + return label, confidence - except requests.exceptions.Timeout: - return {'error': 'Request timed out. Please try again.'} - except requests.exceptions.ConnectionError: - return {'error': 'Cannot connect to prediction service.'} except Exception as e: - return {'error': f'Unexpected error: {str(e)}'} + logger.error(f"Prediction failed: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + raise HTTPException( + status_code=500, + detail=f"Prediction failed: {str(e)}" + ) - def add_to_prediction_history(self, text: str, result: Dict): - """Add prediction to session history with enhanced metadata""" - prediction_entry = { - 'timestamp': datetime.now().isoformat(), - 'text': text[:200] + "..." if len(text) > 200 else text, - 'prediction': result.get('prediction', 'Unknown'), - 'confidence': result.get('confidence', 0.0), - 'text_length': len(text), - 'word_count': len(text.split()), - 'processing_time': result.get('processing_time', 0.0), - 'model_version': result.get('model_version', 'unknown'), - 'session_id': st.session_state.get('session_id', 'default') - } + def health_check(self) -> Dict[str, Any]: + """Perform health check""" + try: + # Test prediction with sample text + test_text = "This is a test article for health check purposes." + label, confidence = self.predict(test_text) + + self.health_status = "healthy" + self.last_health_check = datetime.now() + + return { + "status": "healthy", + "last_check": self.last_health_check.isoformat(), + "model_available": self.model is not None, + "vectorizer_available": self.vectorizer is not None, + "pipeline_available": self.pipeline is not None, + "test_prediction": {"label": label, "confidence": confidence}, + "environment": path_manager.environment, + "model_path": str(path_manager.get_model_file_path()), + "vectorizer_path": str(path_manager.get_vectorizer_path()), + "pipeline_path": str(path_manager.get_pipeline_path()), + "data_path": str(path_manager.get_data_path()), + "file_exists": { + "model": path_manager.get_model_file_path().exists(), + "vectorizer": path_manager.get_vectorizer_path().exists(), + "pipeline": path_manager.get_pipeline_path().exists(), + "metadata": path_manager.get_metadata_path().exists() + } + } - st.session_state.prediction_history.append(prediction_entry) - - # Keep only last 100 predictions - if len(st.session_state.prediction_history) > 100: - st.session_state.prediction_history = st.session_state.prediction_history[-100:] - - # Enhanced Visualization Methods - def create_advanced_confidence_gauge(self, confidence: float, prediction: str): - """Create advanced confidence gauge with statistical context""" - # Color scheme based on prediction - color = "#e74c3c" if prediction == "Fake" else "#27ae60" - - fig = go.Figure(go.Indicator( - mode="gauge+number+delta", - value=confidence * 100, - domain={'x': [0, 1], 'y': [0, 1]}, - title={'text': f"Confidence: {prediction}
Statistical Validation"}, - delta={'reference': 75, 'position': "top"}, - gauge={ - 'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"}, - 'bar': {'color': color}, - 'bgcolor': "white", - 'borderwidth': 2, - 'bordercolor': "gray", - 'steps': [ - {'range': [0, 50], 'color': '#f8f9fa'}, - {'range': [50, 75], 'color': '#e9ecef'}, - {'range': [75, 90], 'color': '#dee2e6'}, - {'range': [90, 100], 'color': '#ced4da'} - ], - 'threshold': { - 'line': {'color': "red", 'width': 4}, - 'thickness': 0.75, - 'value': 90 + except Exception as e: + self.health_status = "unhealthy" + self.last_health_check = datetime.now() + + return { + "status": "unhealthy", + "last_check": self.last_health_check.isoformat(), + "error": str(e), + "model_available": self.model is not None, + "vectorizer_available": self.vectorizer is not None, + "pipeline_available": self.pipeline is not None, + "environment": path_manager.environment, + "model_path": str(path_manager.get_model_file_path()), + "vectorizer_path": str(path_manager.get_vectorizer_path()), + "pipeline_path": str(path_manager.get_pipeline_path()), + "data_path": str(path_manager.get_data_path()), + "file_exists": { + "model": path_manager.get_model_file_path().exists(), + "vectorizer": path_manager.get_vectorizer_path().exists(), + "pipeline": path_manager.get_pipeline_path().exists(), + "metadata": path_manager.get_metadata_path().exists() } } - )) - fig.update_layout( - paper_bgcolor="rgba(255,255,255,0)", - plot_bgcolor="rgba(255,255,255,0)", - font={'color': "darkblue", 'family': "Inter"}, - height=350 - ) - return fig +# Background task functions +async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float): + """Log prediction details with error handling for file access""" + try: + log_entry = { + "timestamp": datetime.now().isoformat(), + "client_ip": client_ip, + "text_length": len(text), + "prediction": prediction, + "confidence": confidence, + "processing_time": processing_time, + "text_hash": hashlib.md5(text.encode()).hexdigest() + } - def create_cv_performance_visualization(self, cv_results: Dict): - """Create comprehensive CV performance visualization""" - if not cv_results or 'cross_validation' not in cv_results: - return None + # Try to save to log file + try: + log_file = path_manager.get_logs_path("prediction_log.json") + + # Load existing logs + logs = [] + if log_file.exists(): + try: + async with aiofiles.open(log_file, 'r') as f: + content = await f.read() + logs = json.loads(content) + except: + logs = [] + + # Add new log + logs.append(log_entry) + + # Keep only last 1000 entries + if len(logs) > 1000: + logs = logs[-1000:] + + # Save logs + async with aiofiles.open(log_file, 'w') as f: + await f.write(json.dumps(logs, indent=2)) + + except (PermissionError, OSError) as e: + # If file logging fails, just log to console + logger.warning(f"Cannot write prediction log to file: {e}") + logger.info(f"Prediction logged: {json.dumps(log_entry)}") - cv_data = cv_results['cross_validation'] - fold_results = cv_data.get('individual_fold_results', []) - - if not fold_results: - return None + except Exception as e: + logger.error(f"Failed to log prediction: {e}") - # Create subplot figure - fig = make_subplots( - rows=2, cols=2, - subplot_titles=('F1 Score Distribution', 'Accuracy Distribution', - 'Performance by Fold', 'Train vs Test Scores'), - specs=[[{"type": "histogram"}, {"type": "histogram"}], - [{"type": "scatter"}, {"type": "bar"}]] - ) - # Extract data for visualization - f1_scores = [fold['test_scores'].get('f1', 0) for fold in fold_results if 'test_scores' in fold] - accuracy_scores = [fold['test_scores'].get('accuracy', 0) for fold in fold_results if 'test_scores' in fold] - fold_numbers = [fold.get('fold', i) for i, fold in enumerate(fold_results)] +async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float): + """Log batch prediction details""" + try: + log_entry = { + "timestamp": datetime.now().isoformat(), + "type": "batch_prediction", + "client_ip": client_ip, + "total_texts": total_texts, + "successful_predictions": successful_predictions, + "processing_time": processing_time, + "success_rate": successful_predictions / total_texts if total_texts > 0 else 0 + } - # F1 Distribution - fig.add_trace( - go.Histogram(x=f1_scores, nbinsx=10, name="F1 Distribution", - marker_color='rgba(55, 128, 191, 0.7)'), - row=1, col=1 - ) + logger.info(f"Batch prediction logged: {json.dumps(log_entry)}") - # Accuracy Distribution - fig.add_trace( - go.Histogram(x=accuracy_scores, nbinsx=10, name="Accuracy Distribution", - marker_color='rgba(219, 64, 82, 0.7)'), - row=1, col=2 - ) + except Exception as e: + logger.error(f"Failed to log batch prediction: {e}") - # Performance by Fold - fig.add_trace( - go.Scatter(x=fold_numbers, y=f1_scores, mode='lines+markers', - name='F1 Score', line=dict(color='blue', width=3)), - row=2, col=1 - ) - - fig.add_trace( - go.Scatter(x=fold_numbers, y=accuracy_scores, mode='lines+markers', - name='Accuracy', line=dict(color='red', width=3)), - row=2, col=1 - ) - # Train vs Test comparison (if available) - if fold_results and 'train_scores' in fold_results[0]: - test_f1 = [fold['test_scores'].get('f1', 0) for fold in fold_results] - train_f1 = [fold['train_scores'].get('f1', 0) for fold in fold_results] - - fig.add_trace( - go.Bar(x=fold_numbers, y=test_f1, name='Test F1', - marker_color='rgba(55, 128, 191, 0.7)'), - row=2, col=2 - ) - - fig.add_trace( - go.Bar(x=fold_numbers, y=train_f1, name='Train F1', - marker_color='rgba(219, 64, 82, 0.7)'), - row=2, col=2 - ) +# Global variables +model_manager = ModelManager() - fig.update_layout( - height=800, - showlegend=True, - title_text="Cross-Validation Performance Analysis", - template="plotly_white", - font=dict(family="Inter", size=12) - ) +# Initialize automation manager +automation_manager = None - return fig +# Initialize deployment components +deployment_manager = None +traffic_router = None +model_registry = None - def create_prediction_history_chart(self): - """Create enhanced prediction history visualization""" - if not st.session_state.prediction_history: - return None - df = pd.DataFrame(st.session_state.prediction_history) - df['timestamp'] = pd.to_datetime(df['timestamp']) - df['confidence_percent'] = df['confidence'] * 100 +@asynccontextmanager +async def lifespan(app: FastAPI): + """Manage application lifespan with deployment system""" + global deployment_manager, traffic_router, model_registry + + logger.info("Starting FastAPI application...") + + # Startup tasks + model_manager.load_model() + + # Initialize deployment components + try: + deployment_manager = BlueGreenDeploymentManager() + traffic_router = TrafficRouter() + model_registry = ModelRegistry() + logger.info("Deployment system initialized") + except Exception as e: + logger.error(f"Failed to initialize deployment system: {e}") + + # Initialize monitoring and automation... + + yield + + # Shutdown tasks + logger.info("Shutting down FastAPI application...") - # Create subplot figure - fig = make_subplots( - rows=2, cols=2, - subplot_titles=('Confidence Over Time', 'Prediction Distribution', - 'Processing Time Trend', 'Text Length vs Confidence'), - specs=[[{"secondary_y": True}, {"type": "pie"}], - [{"type": "scatter"}, {"type": "scatter"}]] - ) +# Initialize monitoring components +prediction_monitor = PredictionMonitor(base_dir=Path("/tmp")) +metrics_collector = MetricsCollector(base_dir=Path("/tmp")) +alert_system = AlertSystem(base_dir=Path("/tmp")) - # Confidence over time - fig.add_trace( - go.Scatter(x=df['timestamp'], y=df['confidence_percent'], - mode='lines+markers', name='Confidence %', - line=dict(color='blue', width=2), - marker=dict(color=df['prediction'].map({'Fake': 'red', 'Real': 'green'}), - size=8)), - row=1, col=1 - ) +# Start monitoring +prediction_monitor.start_monitoring() - # Prediction distribution - pred_counts = df['prediction'].value_counts() - fig.add_trace( - go.Pie(labels=pred_counts.index, values=pred_counts.values, - name="Predictions", hole=0.3), - row=1, col=2 - ) +alert_system.add_notification_handler("console", console_notification_handler) - # Processing time trend - fig.add_trace( - go.Scatter(x=df['timestamp'], y=df['processing_time'], - mode='lines+markers', name='Processing Time (s)', - line=dict(color='purple', width=2)), - row=2, col=1 - ) - # Text length vs confidence - fig.add_trace( - go.Scatter(x=df['text_length'], y=df['confidence_percent'], - mode='markers', name='Length vs Confidence', - marker=dict( - color=df['prediction'].map({'Fake': 'red', 'Real': 'green'}), - size=10, - opacity=0.7 - )), - row=2, col=2 - ) +@asynccontextmanager +async def lifespan(app: FastAPI): + """Manage application lifespan""" + logger.info("Starting FastAPI application...") - fig.update_layout( - height=700, - showlegend=True, - title_text="Prediction Analytics Dashboard", - template="plotly_white", - font=dict(family="Inter", size=12) - ) + # Startup tasks + model_manager.load_model() + + # Schedule periodic health checks + asyncio.create_task(periodic_health_check()) - return fig + yield - def render_system_health_dashboard(self): - """Render comprehensive system health dashboard""" - st.markdown("## đŸĨ System Health Dashboard") + # Shutdown tasks + logger.info("Shutting down FastAPI application...") - # Get health data + +# Background tasks +async def periodic_health_check(): + """Periodic health check""" + while True: try: - if self.api_available: - health_response = self.session.get(f"{self.config['api_url']}/health", timeout=10) - if health_response.status_code == 200: - health_data = health_response.json() - - # Create health metrics grid - col1, col2, col3, col4 = st.columns(4) - - with col1: - status = health_data.get('status', 'unknown') - if status == 'healthy': - st.markdown('

đŸŸĸ System Status

Healthy

', - unsafe_allow_html=True) - else: - st.markdown('

🔴 System Status

Issues Detected

', - unsafe_allow_html=True) - - with col2: - model_health = health_data.get('model_health', {}) - model_status = model_health.get('status', 'unknown') - st.markdown(f'

🤖 Model Health

{model_status.title()}

', - unsafe_allow_html=True) - - with col3: - system_health = health_data.get('system_health', {}) - cpu_percent = system_health.get('cpu_percent', 0) - st.markdown(f'

đŸ’ģ CPU Usage

{cpu_percent:.1f}%

', - unsafe_allow_html=True) - - with col4: - memory_percent = system_health.get('memory_percent', 0) - st.markdown(f'

🧠 Memory Usage

{memory_percent:.1f}%

', - unsafe_allow_html=True) - - # Detailed health information - with st.expander("🔍 Detailed Health Information", expanded=False): - st.json(health_data) - - else: - st.error("Failed to retrieve health data from API") - else: - st.error("API service is not available") - + await asyncio.sleep(300) # Check every 5 minutes + health_status = model_manager.health_check() + + if health_status["status"] == "unhealthy": + logger.warning( + "Model health check failed, attempting to reload...") + model_manager.load_model() + except Exception as e: - st.error(f"Error retrieving health data: {e}") + logger.error(f"Periodic health check failed: {e}") -# Initialize the enhanced app -app = EnhancedStreamlitApp() -# Main Application -def main(): - """Enhanced main application with professional design""" - - # Custom header - st.markdown(""" -
-

📰 Fake News Detection System

-
Production-Grade MLOps Pipeline with Statistical Rigor
-
-
🌐 Environment: """ + path_manager.environment.title() + """
-
- """, unsafe_allow_html=True) - - # API Status - col1, col2, col3 = st.columns([1, 2, 1]) - with col2: - if app.api_available: - st.markdown( - '
đŸŸĸ API Service: Online & Ready
', - unsafe_allow_html=True - ) - else: - st.markdown( - '
🔴 API Service: Offline
', - unsafe_allow_html=True - ) +# Create FastAPI app +app = FastAPI( + title="Fake News Detection API", + description="Production-ready API for fake news detection with comprehensive monitoring and security features", + version="2.0.0", + docs_url="/docs", + redoc_url="/redoc", + lifespan=lifespan +) - # Enhanced Navigation - tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8 = st.tabs([ - "🔍 Prediction", "📊 Batch Analysis", "📈 Analytics", "đŸŽ¯ Cross-Validation", - "🤖 Model Training", "📋 System Logs", "đŸĨ Health Monitor", "⚡ Real-time Metrics" - ]) +# Add middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Configure appropriately for production + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) - with tab1: - render_prediction_interface() - - with tab2: - render_batch_analysis() - - with tab3: - render_analytics_dashboard() - - with tab4: - render_cv_results_dashboard() - - with tab5: - render_model_training_interface() - - with tab6: - render_system_logs() - - with tab7: - render_health_monitoring() - - with tab8: - render_realtime_metrics() - -def render_prediction_interface(): - """Enhanced prediction interface with professional styling""" - st.markdown("## 🔍 Single Text Analysis") - st.markdown("*Analyze individual news articles with statistical confidence intervals*") - - # Input method selection with enhanced UI - col1, col2 = st.columns(2) - with col1: - input_method = st.radio( - "Choose input method:", - ["📝 Type Text", "📁 Upload File"], - horizontal=True - ) - - with col2: - show_advanced = st.checkbox("đŸ”Ŧ Show Advanced Options", value=False) +app.add_middleware( + TrustedHostMiddleware, + allowed_hosts=["*"] # Configure appropriately for production +) - user_text = "" +# Custom OpenAPI setup - RIGHT AFTER app creation +def custom_openapi(): + if app.openapi_schema: + return app.openapi_schema - if input_method == "📝 Type Text": - user_text = st.text_area( - "Enter news article text:", - height=200, - placeholder="Paste or type the news article you want to analyze...", - help="Enter at least 10 characters for analysis" - ) - - if user_text: - # Real-time text statistics - col1, col2, col3, col4 = st.columns(4) - with col1: - st.metric("Characters", len(user_text)) - with col2: - st.metric("Words", len(user_text.split())) - with col3: - st.metric("Sentences", user_text.count('.') + user_text.count('!') + user_text.count('?')) - with col4: - readability = min(100, max(0, 100 - len(user_text.split()) / 10)) - st.metric("Readability Score", f"{readability:.0f}") - - else: # Upload File - uploaded_file = st.file_uploader( - "Upload text file:", - type=['txt', 'csv'], - help="Upload a text file containing the article to analyze" - ) + openapi_schema = get_openapi( + title="Fake News Detection API", + version="2.0.0", + description="Production-ready API for fake news detection with comprehensive monitoring and security features", + routes=app.routes, + ) - if uploaded_file: - try: - if uploaded_file.type == "text/plain": - user_text = str(uploaded_file.read(), "utf-8") - elif uploaded_file.type == "text/csv": - df = pd.read_csv(uploaded_file) - if 'text' in df.columns: - user_text = df['text'].iloc[0] if len(df) > 0 else "" - else: - st.error("CSV file must contain a 'text' column") - - if user_text: - st.success(f"File uploaded successfully! ({len(user_text)} characters)") + # Add security definitions + openapi_schema["components"]["securitySchemes"] = { + "Bearer": { + "type": "http", + "scheme": "bearer", + "bearerFormat": "JWT", + } + } - except Exception as e: - st.error(f"Error reading file: {e}") - - # Advanced options - if show_advanced: - with st.expander("Advanced Analysis Options"): - col1, col2 = st.columns(2) - with col1: - show_uncertainty = st.checkbox("Show Uncertainty Analysis", value=True) - show_feature_importance = st.checkbox("Show Feature Importance", value=False) - with col2: - confidence_threshold = st.slider("Confidence Threshold", 0.5, 0.95, 0.75) - detailed_metrics = st.checkbox("Detailed Performance Metrics", value=False) - - # Prediction section with enhanced UI - col1, col2, col3 = st.columns([2, 1, 1]) - - with col1: - if st.button("Analyze Text", type="primary", use_container_width=True): - if user_text: - # Validate input - is_valid, validation_message = validate_text_input(user_text) - - if not is_valid: - st.error(validation_message) - else: - # Make prediction with enhanced UI - result = app.make_prediction_request(user_text) + app.openapi_schema = openapi_schema + return app.openapi_schema - if 'error' in result: - st.error(f"Analysis failed: {result['error']}") - else: - render_prediction_results(result, user_text, show_advanced) - else: - st.warning("Please enter text to analyze.") - - with col2: - if st.button("Clear Text", use_container_width=True): - st.rerun() - - with col3: - if st.button("Example Text", use_container_width=True): - example_text = """ - Scientists have discovered a revolutionary new method that could change everything we know about energy production. - This breakthrough technology promises unlimited clean energy with zero environmental impact. - However, experts remain skeptical about the claims and call for peer review of the findings. - """ - st.session_state['example_text'] = example_text - st.rerun() - - # Display example text if button was clicked - if 'example_text' in st.session_state: - st.text_area("Example text loaded:", value=st.session_state['example_text'], height=100) - -def render_prediction_results(result: Dict, user_text: str, show_advanced: bool): - """Render enhanced prediction results with statistical insights""" - prediction = result['prediction'] - confidence = result['confidence'] - - # Main result display - if prediction == "Fake": - st.markdown(f""" -
-

Prediction: FAKE NEWS

-

Confidence: {confidence:.1%}

-

Statistical Analysis Complete

-
- """, unsafe_allow_html=True) - else: - st.markdown(f""" -
-

Prediction: REAL NEWS

-

Confidence: {confidence:.1%}

-

Statistical Analysis Complete

-
- """, unsafe_allow_html=True) - - # Enhanced visualization section - col1, col2 = st.columns(2) - - with col1: - # Advanced confidence gauge - fig_gauge = app.create_advanced_confidence_gauge(confidence, prediction) - st.plotly_chart(fig_gauge, use_container_width=True) - - with col2: - # Create confidence interpretation chart - fig_interp = create_confidence_interpretation_chart(confidence) - st.plotly_chart(fig_interp, use_container_width=True) - - # Statistical details - if show_advanced: - with st.expander("Statistical Analysis Details"): - col1, col2, col3 = st.columns(3) - - with col1: - st.metric("Model Version", result.get('model_version', 'Unknown')) - st.metric("Processing Time", f"{result.get('processing_time', 0):.3f}s") - - with col2: - st.metric("Text Complexity", calculate_text_complexity(user_text)) - st.metric("Statistical Validation", "Bootstrap CI") - - with col3: - # Simulated confidence interval (in real implementation, this would come from API) - ci_lower = max(0, confidence - 0.05) - ci_upper = min(1, confidence + 0.05) - st.metric("Confidence Interval", f"[{ci_lower:.2f}, {ci_upper:.2f}]") - st.metric("Environment", path_manager.environment.title()) - -def create_confidence_interpretation_chart(confidence: float): - """Create chart showing confidence interpretation""" - categories = ['Very Low', 'Low', 'Medium', 'High', 'Very High'] - thresholds = [0.2, 0.4, 0.6, 0.8, 1.0] - colors = ['#e74c3c', '#f39c12', '#f1c40f', '#2ecc71', '#27ae60'] - - # Determine current category - current_idx = next((i for i, t in enumerate(thresholds) if confidence <= t), len(thresholds) - 1) - - fig = go.Figure(go.Bar( - x=categories, - y=[0.2, 0.2, 0.2, 0.2, 0.2], - marker_color=[colors[i] if i == current_idx else '#ecf0f1' for i in range(len(categories))], - text=[f'{t*100:.0f}%' for t in thresholds], - textposition='auto', - )) - - # Add confidence line - fig.add_hline(y=confidence/5, line_dash="dash", line_color="red", - annotation_text=f"Current: {confidence:.1%}") - - fig.update_layout( - title="Confidence Level Interpretation", - xaxis_title="Confidence Category", - yaxis_title="Threshold", - showlegend=False, - height=300, - template="plotly_white" - ) - - return fig - -def calculate_text_complexity(text: str) -> str: - """Calculate and return text complexity rating""" - word_count = len(text.split()) - avg_word_length = np.mean([len(word) for word in text.split()]) - sentence_count = text.count('.') + text.count('!') + text.count('?') - avg_sentence_length = word_count / max(sentence_count, 1) - - # Simple complexity score - complexity_score = (avg_word_length * 2 + avg_sentence_length) / 3 - - if complexity_score < 8: - return "Simple" - elif complexity_score < 12: - return "Moderate" - elif complexity_score < 16: - return "Complex" - else: - return "Very Complex" - -def validate_text_input(text: str) -> tuple[bool, str]: - """Enhanced text validation""" - if not text or not text.strip(): - return False, "Please enter some text to analyze." - - if len(text) < 10: - return False, "Text must be at least 10 characters long for reliable analysis." - - if len(text) > app.config['max_text_length']: - return False, f"Text must be less than {app.config['max_text_length']:,} characters." - - # Check for suspicious content - suspicious_patterns = [' max_articles: - st.warning(f"Processing first {max_articles:,} articles") - df = df.head(max_articles) - - # Process with progress tracking - progress_bar = st.progress(0) - status_text = st.empty() - results = [] - - for i, row in df.iterrows(): - status_text.text(f"Processing article {i+1:,}/{len(df):,}...") - progress_bar.progress((i + 1) / len(df)) - - result = app.make_prediction_request(row['text']) - - if 'error' not in result: - results.append({ - 'article_id': i + 1, - 'text_preview': row['text'][:200] + "...", - 'prediction': result['prediction'], - 'confidence': result['confidence'], - 'processing_time': result.get('processing_time', 0), - 'word_count': len(row['text'].split()), - 'character_count': len(row['text']) - }) - else: - results.append({ - 'article_id': i + 1, - 'text_preview': row['text'][:200] + "...", - 'prediction': 'Error', - 'confidence': 0, - 'processing_time': 0, - 'word_count': len(row['text'].split()), - 'character_count': len(row['text']), - 'error': result['error'] - }) - - status_text.text("Processing complete!") - progress_bar.progress(1.0) - - # Display enhanced results - render_batch_results(results, export_format) + # Basic content validation + if len(v.strip()) < 10: + raise ValueError('Text must be at least 10 characters long') - except Exception as e: - st.error(f"Error processing file: {e}") - -def render_batch_results(results: List[Dict], export_format: str): - """Render enhanced batch analysis results""" - results_df = pd.DataFrame(results) - - # Summary statistics with enhanced metrics - col1, col2, col3, col4, col5 = st.columns(5) - - with col1: - st.metric("Total Processed", len(results_df)) - - with col2: - fake_count = len(results_df[results_df['prediction'] == 'Fake']) - st.metric("Fake News", fake_count) - - with col3: - real_count = len(results_df[results_df['prediction'] == 'Real']) - st.metric("Real News", real_count) - - with col4: - valid_results = results_df[results_df['prediction'] != 'Error'] - avg_confidence = valid_results['confidence'].mean() if len(valid_results) > 0 else 0 - st.metric("Avg Confidence", f"{avg_confidence:.1%}") - - with col5: - total_processing_time = results_df['processing_time'].sum() - st.metric("Total Time", f"{total_processing_time:.1f}s") - - # Enhanced visualizations - if len(results_df) > 0: - # Create comprehensive batch analysis charts - fig = make_subplots( - rows=2, cols=2, - subplot_titles=('Prediction Distribution', 'Confidence Distribution', - 'Processing Time Analysis', 'Text Length vs Confidence'), - specs=[[{"type": "pie"}, {"type": "histogram"}], - [{"type": "scatter"}, {"type": "scatter"}]] - ) + # Check for suspicious patterns + suspicious_patterns = [' 0: - fig.add_trace( - go.Histogram(x=valid_results['confidence'], nbinsx=20), - row=1, col=2 - ) - # Processing time analysis - fig.add_trace( - go.Scatter(x=range(len(valid_results)), y=valid_results['processing_time'], - mode='lines+markers'), - row=2, col=1 - ) +class PredictionResponse(BaseModel): + prediction: str = Field(..., + description="Prediction result: 'Real' or 'Fake'") + confidence: float = Field(..., ge=0.0, le=1.0, + description="Confidence score between 0 and 1") + model_version: str = Field(..., + description="Version of the model used for prediction") + timestamp: str = Field(..., description="Timestamp of the prediction") + processing_time: float = Field(..., + description="Time taken for processing in seconds") - # Text length vs confidence - fig.add_trace( - go.Scatter(x=valid_results['character_count'], y=valid_results['confidence'], - mode='markers', - marker=dict(color=valid_results['prediction'].map({'Fake': 'red', 'Real': 'green'}))), - row=2, col=2 - ) - fig.update_layout(height=700, showlegend=True, title_text="Batch Analysis Results") - st.plotly_chart(fig, use_container_width=True) - - # Results table - st.subheader("Detailed Results") - st.dataframe(results_df, use_container_width=True) - - # Export functionality - if export_format == "CSV": - csv_buffer = io.StringIO() - results_df.to_csv(csv_buffer, index=False) - st.download_button( - label="Download Results (CSV)", - data=csv_buffer.getvalue(), - file_name=f"batch_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", - mime="text/csv" - ) +class BatchPredictionRequest(BaseModel): + texts: List[str] = Field(..., min_items=1, max_items=10, + description="List of texts to analyze") -def render_analytics_dashboard(): - """Enhanced analytics dashboard with advanced visualizations""" - st.markdown("## Analytics Dashboard") - st.markdown("*Comprehensive system analytics and performance insights*") + @validator('texts') + def validate_texts(cls, v): + if not v: + raise ValueError('Texts list cannot be empty') - # Refresh controls - col1, col2, col3 = st.columns([1, 1, 2]) - with col1: - if st.button("Refresh Data", use_container_width=True): - st.rerun() - - with col2: - auto_refresh = st.checkbox("Auto Refresh", value=st.session_state.auto_refresh) - st.session_state.auto_refresh = auto_refresh - - with col3: - time_range = st.selectbox("Time Range", ["Last Hour", "Last 24 Hours", "Last Week", "All Time"]) + for text in v: + if not text or not text.strip(): + raise ValueError('All texts must be non-empty') - # Prediction history analysis - if st.session_state.prediction_history: - st.subheader("Prediction History Analysis") - - fig_history = app.create_prediction_history_chart() - if fig_history: - st.plotly_chart(fig_history, use_container_width=True) + if len(text.strip()) < 10: + raise ValueError( + 'All texts must be at least 10 characters long') - # Advanced analytics - with st.expander("Advanced Analytics", expanded=True): - df = pd.DataFrame(st.session_state.prediction_history) - - col1, col2 = st.columns(2) - - with col1: - # Performance metrics over time - st.subheader("Performance Trends") - - # Group by hour for trend analysis - df['timestamp'] = pd.to_datetime(df['timestamp']) - df['hour'] = df['timestamp'].dt.floor('H') - hourly_stats = df.groupby('hour').agg({ - 'confidence': ['mean', 'std', 'count'], - 'processing_time': 'mean' - }).round(3) - - fig_trends = go.Figure() - - fig_trends.add_trace(go.Scatter( - x=hourly_stats.index, - y=hourly_stats[('confidence', 'mean')], - mode='lines+markers', - name='Avg Confidence', - line=dict(color='blue') - )) - - fig_trends.update_layout( - title="Confidence Trends Over Time", - xaxis_title="Time", - yaxis_title="Average Confidence", - template="plotly_white" - ) - - st.plotly_chart(fig_trends, use_container_width=True) - - with col2: - # Text analysis insights - st.subheader("Text Analysis Insights") - - # Calculate correlations - correlation_data = { - 'Text Length': df['text_length'], - 'Word Count': df['word_count'], - 'Confidence': df['confidence'], - 'Processing Time': df['processing_time'] - } - - corr_df = pd.DataFrame(correlation_data).corr() - - fig_corr = px.imshow( - corr_df, - text_auto=True, - aspect="auto", - title="Feature Correlations" - ) - - st.plotly_chart(fig_corr, use_container_width=True) + return [text.strip() for text in v] - else: - st.info("No prediction history available. Make some predictions to see analytics.") -def render_cv_results_dashboard(): - """Enhanced cross-validation results dashboard""" - st.markdown("## Cross-Validation Results Dashboard") - st.markdown("*Statistical validation and model performance analysis*") +class BatchPredictionResponse(BaseModel): + predictions: List[PredictionResponse] + total_count: int + processing_time: float - # Get CV results - cv_results = app.get_cv_results_from_api() - - if cv_results and 'error' not in cv_results: - # Model information header - model_info = { - 'Version': cv_results.get('model_version', 'Unknown'), - 'Type': cv_results.get('model_type', 'Unknown'), - 'Training Date': cv_results.get('training_timestamp', 'Unknown') - } - - col1, col2, col3 = st.columns(3) - for i, (key, value) in enumerate(model_info.items()): - with [col1, col2, col3][i]: - st.metric(key, str(value)) - - # CV Performance Visualization - st.subheader("Cross-Validation Performance") - - cv_data = cv_results.get('cross_validation', {}) - if cv_data: - # Create comprehensive CV visualization - fig_cv = app.create_cv_performance_visualization(cv_results) - if fig_cv: - st.plotly_chart(fig_cv, use_container_width=True) - - # Statistical summary - test_scores = cv_data.get('test_scores', {}) - if test_scores: - st.subheader("Performance Metrics Summary") + +class HealthResponse(BaseModel): + status: str + timestamp: str + model_health: Dict[str, Any] + system_health: Dict[str, Any] + api_health: Dict[str, Any] + environment_info: Dict[str, Any] + + +# Rate limiting +async def rate_limit_check(request: Request): + """Check rate limits""" + client_ip = request.client.host + current_time = time.time() + + # Clean old entries + rate_limit_storage[client_ip] = [ + timestamp for timestamp in rate_limit_storage[client_ip] + if current_time - timestamp < 3600 # 1 hour window + ] + + # Check rate limit (100 requests per hour) + if len(rate_limit_storage[client_ip]) >= 100: + raise HTTPException( + status_code=429, + detail="Rate limit exceeded. Maximum 100 requests per hour." + ) + + # Add current request + rate_limit_storage[client_ip].append(current_time) + + +# Logging middleware +@app.middleware("http") +async def log_requests(request: Request, call_next): + """Log all requests""" + start_time = time.time() + + response = await call_next(request) + + process_time = time.time() - start_time + + log_data = { + "method": request.method, + "url": str(request.url), + "client_ip": request.client.host, + "status_code": response.status_code, + "process_time": process_time, + "timestamp": datetime.now().isoformat() + } + + logger.info(f"Request: {json.dumps(log_data)}") + + return response + + +# Error handlers +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + """Handle HTTP exceptions""" + error_data = { + "error": True, + "message": exc.detail, + "status_code": exc.status_code, + "timestamp": datetime.now().isoformat(), + "path": request.url.path + } + + logger.error(f"HTTP Exception: {json.dumps(error_data)}") + + return JSONResponse( + status_code=exc.status_code, + content=error_data + ) + + +@app.exception_handler(Exception) +async def general_exception_handler(request: Request, exc: Exception): + """Handle general exceptions""" + error_data = { + "error": True, + "message": "Internal server error", + "timestamp": datetime.now().isoformat(), + "path": request.url.path + } + + logger.error(f"General Exception: {str(exc)}\n{traceback.format_exc()}") + + return JSONResponse( + status_code=500, + content=error_data + ) + + +# API Routes +@app.get("/", response_model=Dict[str, str]) +async def root(): + """Root endpoint""" + return { + "message": "Fake News Detection API", + "version": "2.0.0", + "environment": path_manager.environment, + "documentation": "/docs", + "health_check": "/health" + } + + +@app.post("/predict", response_model=PredictionResponse) +async def predict( + request: PredictionRequest, + background_tasks: BackgroundTasks, + http_request: Request, + _: None = Depends(rate_limit_check) + ): + """ + Predict whether a news article is fake or real using blue-green deployment routing + - **text**: The news article text to analyze + - **returns**: Prediction result with confidence score + """ + start_time = time.time() + client_ip = http_request.client.host + user_agent = http_request.headers.get("user-agent") + + try: + # Check model health + if model_manager.health_status != "healthy": + raise HTTPException( + status_code=503, + detail="Model is not available. Please try again later." + ) + + # Data validation before prediction + validation_result = None + try: + + validator = DataValidator() + validation_result = validator.validate_text(request.text) + + # Log validation result + validation_entry = { + 'timestamp': datetime.now().isoformat(), + 'text_length': len(request.text), + 'validation_level': validation_result.validation_level.value, + 'quality_score': validation_result.quality_score, + 'issues': [issue.dict() for issue in validation_result.issues], + 'passed_validation': validation_result.validation_level != TextQualityLevel.INVALID, + 'client_ip': client_ip, + 'user_agent': user_agent[:100] if user_agent else None + } + + # Save validation results + try: + validation_log_path = path_manager.get_logs_path("validation_log.json") + if validation_log_path.exists(): + with open(validation_log_path, 'r') as f: + validation_data = json.load(f) + else: + validation_data = [] - metrics_cols = st.columns(len(test_scores)) - for idx, (metric, scores) in enumerate(test_scores.items()): - with metrics_cols[idx]: - if isinstance(scores, dict): - mean_val = scores.get('mean', 0) - std_val = scores.get('std', 0) - st.metric( - f"{metric.upper()}", - f"{mean_val:.4f}", - delta=f"Âą{std_val:.4f}" - ) - - # Model quality indicators - performance_indicators = cv_data.get('performance_indicators', {}) - if performance_indicators: - st.subheader("Model Quality Assessment") + validation_data.append(validation_entry) - col1, col2 = st.columns(2) + # Keep only last 1000 entries + if len(validation_data) > 1000: + validation_data = validation_data[-1000:] - with col1: - overfitting_score = performance_indicators.get('overfitting_score', 'Unknown') - if isinstance(overfitting_score, (int, float)): - if overfitting_score < 0.05: - st.success(f"**Overfitting Score:** {overfitting_score:.4f} (Low Risk)") - elif overfitting_score < 0.15: - st.warning(f"**Overfitting Score:** {overfitting_score:.4f} (Moderate Risk)") - else: - st.error(f"**Overfitting Score:** {overfitting_score:.4f} (High Risk)") + with open(validation_log_path, 'w') as f: + json.dump(validation_data, f, indent=2) + + logger.debug(f"Validation logged: level={validation_result.validation_level.value}, quality={validation_result.quality_score:.3f}") - with col2: - stability_score = performance_indicators.get('stability_score', 'Unknown') - if isinstance(stability_score, (int, float)): - if stability_score > 0.9: - st.success(f"**Stability Score:** {stability_score:.4f} (High)") - elif stability_score > 0.7: - st.warning(f"**Stability Score:** {stability_score:.4f} (Moderate)") - else: - st.error(f"**Stability Score:** {stability_score:.4f} (Low)") - - # Detailed fold results - with st.expander("Detailed Cross-Validation Results"): - fold_results = cv_data.get('individual_fold_results', []) - if fold_results: - fold_df = pd.DataFrame([ - { - 'Fold': fold.get('fold', i+1), - 'Test F1': fold.get('test_scores', {}).get('f1', 0), - 'Test Accuracy': fold.get('test_scores', {}).get('accuracy', 0), - 'Train F1': fold.get('train_scores', {}).get('f1', 0), - 'Train Accuracy': fold.get('train_scores', {}).get('accuracy', 0) - } - for i, fold in enumerate(fold_results) - ]) - st.dataframe(fold_df, use_container_width=True) - - else: - error_msg = cv_results.get('error', 'No CV results available') if cv_results else 'API not available' - st.warning(f"Cross-validation results not available: {error_msg}") - - # Show placeholder for demo - st.info("CV results will be displayed here once model training is complete with cross-validation enabled.") - -def render_model_training_interface(): - """Enhanced model training interface""" - st.markdown("## Model Training Interface") - st.markdown("*Train custom models with advanced configuration options*") - - st.info("Upload your own dataset to retrain the model with custom data and statistical validation.") - - # Training configuration - with st.expander("Training Configuration", expanded=True): - col1, col2 = st.columns(2) - - with col1: - st.subheader("Basic Configuration") - test_size = st.slider("Test Size", 0.1, 0.4, 0.2, 0.05) - cv_folds = st.slider("CV Folds", 3, 10, 5) - max_features = st.number_input("Max Features", 1000, 20000, 10000, 1000) - - with col2: - st.subheader("Advanced Options") - cross_validation = st.checkbox("Enable Cross Validation", value=True) - statistical_validation = st.checkbox("Statistical Validation", value=True) - bootstrap_analysis = st.checkbox("Bootstrap Analysis", value=True) - ensemble_selection = st.checkbox("Ensemble Selection", value=False) - - # File upload - training_file = st.file_uploader( - "Upload training dataset (CSV):", - type=['csv'], - help="CSV file should contain 'text' and 'label' columns (label: 0=Real, 1=Fake)" - ) - - if training_file: - try: - df_train = pd.read_csv(training_file) + except Exception as e: + logger.warning(f"Could not save validation log: {e}") - required_columns = ['text', 'label'] - missing_columns = [col for col in required_columns if col not in df_train.columns] + # Block invalid inputs + if validation_result.validation_level == TextQualityLevel.INVALID: + raise HTTPException( + status_code=400, + detail=f"Input validation failed: {validation_result.issues[0].message if validation_result.issues else 'Invalid input'}" + ) + + except ImportError as e: + logger.warning(f"Data validation components not available: {e}") + validation_result = None + except Exception as e: + logger.warning(f"Data validation failed: {e}") + validation_result = None + + # Prepare request data for routing + request_data = { + 'client_id': client_ip, + 'user_agent': user_agent, + 'timestamp': datetime.now().isoformat() + } + + # Use traffic router if available, otherwise fallback to model manager + if traffic_router and (traffic_router.blue_model or traffic_router.green_model): + try: + environment, result = traffic_router.make_prediction(request.text, request_data) + + # Extract results from traffic router response + label = result['prediction'] + confidence = result['confidence'] + processing_time = result['processing_time'] + + logger.debug(f"Used {environment} environment for prediction") + + except Exception as e: + logger.warning(f"Traffic router failed, falling back to model manager: {e}") + # Fallback to original model manager + label, confidence = model_manager.predict(request.text) + processing_time = time.time() - start_time + environment = "blue" # Default fallback + else: + # Fallback to original model manager + label, confidence = model_manager.predict(request.text) + processing_time = time.time() - start_time + environment = "blue" # Default when no traffic router + + # Record prediction for monitoring + prediction_monitor.record_prediction( + prediction=label, + confidence=confidence, + processing_time=processing_time, + text=request.text, + model_version=model_manager.model_metadata.get('model_version', 'unknown'), + client_id=client_ip, + user_agent=user_agent + ) - if missing_columns: - st.error(f"Missing required columns: {missing_columns}") - else: - st.success(f"Training file loaded: {len(df_train):,} samples") - - # Enhanced data validation and visualization - render_training_data_analysis(df_train) - - # Start training with enhanced progress tracking - if st.button("Start Training", type="primary", use_container_width=True): - render_training_process(df_train, { - 'test_size': test_size, - 'cv_folds': cv_folds, - 'max_features': max_features, - 'cross_validation': cross_validation, - 'statistical_validation': statistical_validation, - 'bootstrap_analysis': bootstrap_analysis, - 'ensemble_selection': ensemble_selection - }) + # Record API request metrics + metrics_collector.record_api_request( + endpoint="/predict", + method="POST", + response_time=processing_time, + status_code=200, + client_ip=client_ip + ) - except Exception as e: - st.error(f"Error loading training file: {e}") + # Create response + response = PredictionResponse( + prediction=label, + confidence=confidence, + model_version=model_manager.model_metadata.get('model_version', 'unknown'), + timestamp=datetime.now().isoformat(), + processing_time=processing_time + ) -def render_training_data_analysis(df_train: pd.DataFrame): - """Render training data analysis""" - st.subheader("Dataset Analysis") - - label_counts = df_train['label'].value_counts() - - col1, col2 = st.columns(2) - - with col1: - # Basic statistics - st.metric("Total Samples", len(df_train)) - st.metric("Real News (0)", label_counts.get(0, 0)) - st.metric("Fake News (1)", label_counts.get(1, 0)) - st.metric("Class Balance", f"{min(label_counts) / max(label_counts):.2f}") - - with col2: - # Label distribution visualization - fig_labels = px.pie( - values=label_counts.values, - names=['Real', 'Fake'], - title="Class Distribution", - color_discrete_map={'Real': '#27ae60', 'Fake': '#e74c3c'} + # Log prediction (background task) with validation info + background_tasks.add_task( + log_prediction_with_validation, + request.text, + label, + confidence, + client_ip, + processing_time, + validation_result ) - st.plotly_chart(fig_labels, use_container_width=True) - # Text analysis - with st.expander("Text Analysis"): - df_train['text_length'] = df_train['text'].str.len() - df_train['word_count'] = df_train['text'].str.split().str.len() + return response + + except HTTPException: + # Record error for failed requests + processing_time = time.time() - start_time + prediction_monitor.record_error( + error_type="http_error", + error_message="Service unavailable or validation failed", + context={"status_code": 503} + ) + metrics_collector.record_api_request( + endpoint="/predict", + method="POST", + response_time=processing_time, + status_code=400 if "validation failed" in str(sys.exc_info()[1]) else 503, + client_ip=client_ip + ) + raise + except Exception as e: + processing_time = time.time() - start_time - col1, col2 = st.columns(2) + # Record error + prediction_monitor.record_error( + error_type="prediction_error", + error_message=str(e), + context={"text_length": len(request.text)} + ) - with col1: - fig_length = px.histogram( - df_train, x='text_length', color='label', - title="Text Length Distribution", - color_discrete_map={0: '#27ae60', 1: '#e74c3c'} - ) - st.plotly_chart(fig_length, use_container_width=True) + metrics_collector.record_api_request( + endpoint="/predict", + method="POST", + response_time=processing_time, + status_code=500, + client_ip=client_ip + ) - with col2: - fig_words = px.histogram( - df_train, x='word_count', color='label', - title="Word Count Distribution", - color_discrete_map={0: '#27ae60', 1: '#e74c3c'} - ) - st.plotly_chart(fig_words, use_container_width=True) - -def render_training_process(df_train: pd.DataFrame, config: Dict): - """Render enhanced training process with real-time updates""" - # Create containers for real-time updates - progress_container = st.container() - status_container = st.container() - results_container = st.container() - - with progress_container: - progress_bar = st.progress(0) - status_text = st.empty() - - # Training steps - training_steps = [ - "Initializing training environment...", - "Preprocessing data and validation...", - "Splitting dataset with stratification...", - "Feature engineering and vectorization...", - "Training base models...", - "Cross-validation analysis...", - "Statistical validation...", - "Model comparison and selection...", - "Saving model artifacts...", - "Generating performance report..." - ] + logger.error(f"Prediction failed: {e}") + raise HTTPException( + status_code=500, + detail=f"Prediction failed: {str(e)}" + ) - # Simulate training process (in real implementation, this would call actual training) - for i, step in enumerate(training_steps): - with status_container: - status_text.text(step) + +async def log_prediction_with_validation(text: str, prediction: str, confidence: float, + client_ip: str, processing_time: float, + validation_result=None): + """Enhanced logging function that includes validation data""" + try: + prediction_entry = { + 'timestamp': datetime.now().isoformat(), + 'prediction': prediction, + 'confidence': confidence, + 'processing_time': processing_time, + 'client_ip': client_ip, + 'text_length': len(text), + 'text_preview': text[:100] + "..." if len(text) > 100 else text + } + + # Add validation information if available + if validation_result: + prediction_entry.update({ + 'validation_level': validation_result.validation_level.value, + 'quality_score': validation_result.quality_score, + 'validation_issues_count': len(validation_result.issues), + 'validation_passed': validation_result.validation_level.value != 'invalid' + }) + + prediction_log_path = path_manager.get_logs_path("prediction_log.json") + + if prediction_log_path.exists(): + with open(prediction_log_path, 'r') as f: + prediction_data = json.load(f) + else: + prediction_data = [] + + prediction_data.append(prediction_entry) + + # Keep only last 1000 entries + if len(prediction_data) > 1000: + prediction_data = prediction_data[-1000:] + + with open(prediction_log_path, 'w') as f: + json.dump(prediction_data, f, indent=2) + + except Exception as e: + logger.error(f"Failed to log prediction: {e}") + + +async def log_prediction_with_validation(text: str, prediction: str, confidence: float, + client_ip: str, processing_time: float, + validation_result=None): + """Enhanced logging function that includes validation data""" + try: + prediction_entry = { + 'timestamp': datetime.now().isoformat(), + 'prediction': prediction, + 'confidence': confidence, + 'processing_time': processing_time, + 'client_ip': client_ip, + 'text_length': len(text), + 'text_preview': text[:100] + "..." if len(text) > 100 else text + } + + # Add validation information if available + if validation_result: + prediction_entry.update({ + 'validation_level': validation_result.validation_level.value, + 'quality_score': validation_result.quality_score, + 'validation_issues_count': len(validation_result.issues), + 'validation_passed': validation_result.validation_level.value != 'invalid' + }) + + prediction_log_path = path_manager.get_logs_path("prediction_log.json") + + if prediction_log_path.exists(): + with open(prediction_log_path, 'r') as f: + prediction_data = json.load(f) + else: + prediction_data = [] - progress_bar.progress((i + 1) / len(training_steps)) - time.sleep(1) # Simulate processing time + prediction_data.append(prediction_entry) + + # Keep only last 1000 entries + if len(prediction_data) > 1000: + prediction_data = prediction_data[-1000:] + + with open(prediction_log_path, 'w') as f: + json.dump(prediction_data, f, indent=2) + + except Exception as e: + logger.error(f"Failed to log prediction: {e}") - # Show results - with results_container: - st.success("Training completed successfully!") + +async def log_prediction_with_validation(text: str, prediction: str, confidence: float, + client_ip: str, processing_time: float, + validation_result=None): + """Enhanced logging function that includes validation data""" + try: + prediction_entry = { + 'timestamp': datetime.now().isoformat(), + 'prediction': prediction, + 'confidence': confidence, + 'processing_time': processing_time, + 'client_ip': client_ip, + 'text_length': len(text), + 'text_preview': text[:100] + "..." if len(text) > 100 else text + } + + # Add validation information if available + if validation_result: + prediction_entry.update({ + 'validation_level': validation_result.validation_level.value, + 'quality_score': validation_result.quality_score, + 'validation_issues_count': len(validation_result.issues) + }) + + prediction_log_path = path_manager.get_logs_path("prediction_log.json") + + if prediction_log_path.exists(): + with open(prediction_log_path, 'r') as f: + prediction_data = json.load(f) + else: + prediction_data = [] - # Simulated results (in real implementation, these would come from actual training) - col1, col2, col3, col4 = st.columns(4) + prediction_data.append(prediction_entry) - with col1: - st.metric("Final F1 Score", "0.847") - with col2: - st.metric("Cross-Val Mean", "0.852") - with col3: - st.metric("Training Time", "125.3s") - with col4: - st.metric("Model Selected", "Ensemble") + # Keep only last 1000 entries + if len(prediction_data) > 1000: + prediction_data = prediction_data[-1000:] + + with open(prediction_log_path, 'w') as f: + json.dump(prediction_data, f, indent=2) + + except Exception as e: + logger.error(f"Failed to log prediction: {e}") + + +@app.post("/predict/batch", response_model=BatchPredictionResponse) +async def predict_batch( + request: BatchPredictionRequest, + background_tasks: BackgroundTasks, + http_request: Request, + _: None = Depends(rate_limit_check) +): + """ + Predict multiple news articles in batch + - **texts**: List of news article texts to analyze + - **returns**: List of prediction results + """ + start_time = time.time() - st.info("Model training complete. The new model has been validated and is ready for deployment.") + try: + # Check model health + if model_manager.health_status != "healthy": + raise HTTPException( + status_code=503, + detail="Model is not available. Please try again later." + ) -def render_system_logs(): - """Enhanced system logs interface""" - st.markdown("## System Logs") - st.markdown("*Real-time system monitoring and log analysis*") + predictions = [] - # Log selection and filtering - col1, col2, col3 = st.columns([2, 1, 1]) - - with col1: - log_files = { - "Activity Log": path_manager.get_activity_log_path(), - "Prediction Log": path_manager.get_logs_path("prediction_log.json"), - "Scheduler Log": path_manager.get_logs_path("scheduler_execution.json"), - "Drift History": path_manager.get_logs_path("drift_history.json"), - "Validation Log": path_manager.get_logs_path("validation_log.json"), - "Metrics Log": path_manager.get_logs_path("monitor/metrics.json"), - "Alerts Log": path_manager.get_logs_path("monitor/alerts.json") + for text in request.texts: + try: + label, confidence = model_manager.predict(text) + + prediction = PredictionResponse( + prediction=label, + confidence=confidence, + model_version=model_manager.model_metadata.get( + 'model_version', 'unknown'), + timestamp=datetime.now().isoformat(), + processing_time=0.0 # Will be updated with total time + ) + + predictions.append(prediction) + + except Exception as e: + logger.error(f"Batch prediction failed for text: {e}") + # Continue with other texts + continue + + # Calculate total processing time + total_processing_time = time.time() - start_time + + # Update processing time for all predictions + for prediction in predictions: + prediction.processing_time = total_processing_time / \ + len(predictions) + + response = BatchPredictionResponse( + predictions=predictions, + total_count=len(predictions), + processing_time=total_processing_time + ) + + # Log batch prediction (background task) + background_tasks.add_task( + log_batch_prediction, + len(request.texts), + len(predictions), + http_request.client.host, + total_processing_time + ) + + return response + + except HTTPException: + raise + except Exception as e: + logger.error(f"Batch prediction failed: {e}") + raise HTTPException( + status_code=500, + detail=f"Batch prediction failed: {str(e)}" + ) + + +@app.get("/health", response_model=HealthResponse) +async def health_check(): + """ + Comprehensive health check endpoint + - **returns**: Detailed health status of the API and model + """ + try: + # Model health + model_health = model_manager.health_check() + + # System health + import psutil + system_health = { + "cpu_percent": psutil.cpu_percent(), + "memory_percent": psutil.virtual_memory().percent, + "disk_percent": psutil.disk_usage('/').percent, + "uptime": time.time() - psutil.boot_time() } - selected_log = st.selectbox("Select log file:", list(log_files.keys())) - - with col2: - max_entries = st.number_input("Max entries:", min_value=10, max_value=1000, value=50) - - with col3: - log_level = st.selectbox("Filter level:", ["All", "ERROR", "WARNING", "INFO", "DEBUG"]) - # Enhanced log display - if st.button("Load Log", type="primary", use_container_width=True): - log_path = log_files[selected_log] + # API health + api_health = { + "rate_limit_active": len(rate_limit_storage) > 0, + "active_connections": len(rate_limit_storage) + } + + # Environment info + environment_info = path_manager.get_environment_info() + + # Overall status + overall_status = "healthy" if model_health["status"] == "healthy" else "unhealthy" + + return HealthResponse( + status=overall_status, + timestamp=datetime.now().isoformat(), + model_health=model_health, + system_health=system_health, + api_health=api_health, + environment_info=environment_info + ) + + except Exception as e: + logger.error(f"Health check failed: {e}") + return HealthResponse( + status="unhealthy", + timestamp=datetime.now().isoformat(), + model_health={"status": "unhealthy", "error": str(e)}, + system_health={"error": str(e)}, + api_health={"error": str(e)}, + environment_info={"error": str(e)} + ) + + +@app.get("/health/detailed") +async def detailed_health_check(): + """ + Detailed health check endpoint with comprehensive CV results + - **returns**: Detailed health status including cross-validation metrics + """ + try: + # Get basic health information + basic_health = await health_check() - if log_path.exists(): + # Load metadata to get CV results + metadata_path = path_manager.get_metadata_path() + cv_details = {} + + if metadata_path.exists(): try: - with open(log_path, 'r') as f: - log_data = json.load(f) + with open(metadata_path, 'r') as f: + metadata = json.load(f) - if log_data: - # Filter by level if specified - if log_level != "All": - log_data = [entry for entry in log_data if entry.get('level', '').upper() == log_level] - - st.info(f"Total entries: {len(log_data):,}") - - if len(log_data) > max_entries: - log_data = log_data[-max_entries:] - st.warning(f"Showing last {max_entries:,} entries") + # Extract cross-validation information + cv_info = metadata.get('cross_validation', {}) + if cv_info: + cv_details = { + 'cross_validation_available': True, + 'n_splits': cv_info.get('n_splits', 'Unknown'), + 'test_scores': cv_info.get('test_scores', {}), + 'train_scores': cv_info.get('train_scores', {}), + 'overfitting_score': cv_info.get('overfitting_score', 'Unknown'), + 'stability_score': cv_info.get('stability_score', 'Unknown'), + 'individual_fold_results': cv_info.get('individual_fold_results', []) + } - # Enhanced log visualization - if log_data: - # Create log analysis charts - df_logs = pd.DataFrame(log_data) - - if 'timestamp' in df_logs.columns: - df_logs['timestamp'] = pd.to_datetime(df_logs['timestamp'], errors='coerce') - - col1, col2 = st.columns(2) - - with col1: - # Log level distribution - if 'level' in df_logs.columns: - level_counts = df_logs['level'].value_counts() - fig_levels = px.pie(values=level_counts.values, names=level_counts.index, - title="Log Level Distribution") - st.plotly_chart(fig_levels, use_container_width=True) - - with col2: - # Log frequency over time - if not df_logs['timestamp'].isna().all(): - df_logs['hour'] = df_logs['timestamp'].dt.floor('H') - hourly_counts = df_logs['hour'].value_counts().sort_index() - - fig_freq = px.line(x=hourly_counts.index, y=hourly_counts.values, - title="Log Frequency Over Time") - st.plotly_chart(fig_freq, use_container_width=True) - - # Interactive log table - st.subheader("Log Entries") - - # Add search functionality - search_term = st.text_input("Search logs:", placeholder="Enter search term...") - - if search_term: - # Search in all string columns - mask = df_logs.astype(str).apply(lambda x: x.str.contains(search_term, case=False, na=False)).any(axis=1) - df_logs = df_logs[mask] - st.info(f"Found {len(df_logs)} matching entries") - - # Display with enhanced formatting - st.dataframe( - df_logs.tail(50), # Show most recent entries - use_container_width=True, - column_config={ - "timestamp": st.column_config.DatetimeColumn("Timestamp"), - "level": st.column_config.TextColumn("Level"), - "message": st.column_config.TextColumn("Message", width="large") - } - ) - - # Export functionality - csv_buffer = io.StringIO() - df_logs.to_csv(csv_buffer, index=False) - - st.download_button( - label="Download Logs (CSV)", - data=csv_buffer.getvalue(), - file_name=f"{selected_log.lower().replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", - mime="text/csv" - ) - - # Raw JSON view - with st.expander("Raw JSON Data"): - st.json(log_data[-10:]) # Show last 10 entries - - else: - st.warning("Log file is empty") + # Add summary statistics + test_scores = cv_info.get('test_scores', {}) + if 'f1' in test_scores: + cv_details['cv_f1_summary'] = { + 'mean': test_scores['f1'].get('mean', 'Unknown'), + 'std': test_scores['f1'].get('std', 'Unknown'), + 'min': test_scores['f1'].get('min', 'Unknown'), + 'max': test_scores['f1'].get('max', 'Unknown'), + 'scores': test_scores['f1'].get('scores', []) + } + if 'accuracy' in test_scores: + cv_details['cv_accuracy_summary'] = { + 'mean': test_scores['accuracy'].get('mean', 'Unknown'), + 'std': test_scores['accuracy'].get('std', 'Unknown'), + 'min': test_scores['accuracy'].get('min', 'Unknown'), + 'max': test_scores['accuracy'].get('max', 'Unknown'), + 'scores': test_scores['accuracy'].get('scores', []) + } + + # Add model comparison results if available + statistical_validation = metadata.get('statistical_validation', {}) + if statistical_validation: + cv_details['statistical_validation'] = statistical_validation + + promotion_validation = metadata.get('promotion_validation', {}) + if promotion_validation: + cv_details['promotion_validation'] = promotion_validation + + # Add model version and training info + cv_details['model_info'] = { + 'model_version': metadata.get('model_version', 'Unknown'), + 'model_type': metadata.get('model_type', 'Unknown'), + 'training_timestamp': metadata.get('timestamp', 'Unknown'), + 'promotion_timestamp': metadata.get('promotion_timestamp'), + 'cv_f1_mean': metadata.get('cv_f1_mean'), + 'cv_f1_std': metadata.get('cv_f1_std'), + 'cv_accuracy_mean': metadata.get('cv_accuracy_mean'), + 'cv_accuracy_std': metadata.get('cv_accuracy_std') + } + except Exception as e: - st.error(f"Error reading log: {e}") + cv_details = { + 'cross_validation_available': False, + 'error': f"Failed to load CV details: {str(e)}" + } else: - st.warning(f"Log file not found: {log_path}") + cv_details = { + 'cross_validation_available': False, + 'error': "No metadata file found" + } + + # Combine basic health with detailed CV information + detailed_response = { + 'basic_health': basic_health, + 'cross_validation_details': cv_details, + 'detailed_check_timestamp': datetime.now().isoformat() + } + + return detailed_response + + except Exception as e: + logger.error(f"Detailed health check failed: {e}") + return { + 'basic_health': {'status': 'unhealthy', 'error': str(e)}, + 'cross_validation_details': { + 'cross_validation_available': False, + 'error': f"Detailed health check failed: {str(e)}" + }, + 'detailed_check_timestamp': datetime.now().isoformat() + } -def render_health_monitoring(): - """Enhanced health monitoring dashboard""" - st.markdown("## Health Monitoring Dashboard") - st.markdown("*Comprehensive system health and performance monitoring*") - # Auto-refresh controls - col1, col2, col3 = st.columns([1, 1, 2]) - - with col1: - if st.button("Refresh Status", use_container_width=True): - st.rerun() - - with col2: - auto_refresh = st.checkbox("Auto Refresh (30s)", value=st.session_state.auto_refresh) - st.session_state.auto_refresh = auto_refresh +# @app.get("/cv/results") +# async def get_cv_results(): +# """ +# Get detailed cross-validation results for the current model +# - **returns**: Comprehensive CV metrics and fold-by-fold results +# """ +# try: +# metadata_path = path_manager.get_metadata_path() + +# if not metadata_path.exists(): +# raise HTTPException( +# status_code=404, +# detail="Model metadata not found. Train a model first." +# ) + +# with open(metadata_path, 'r') as f: +# metadata = json.load(f) + +# cv_info = metadata.get('cross_validation', {}) + +# if not cv_info: +# raise HTTPException( +# status_code=404, +# detail="No cross-validation results found. Model may not have been trained with CV." +# ) + +# # Structure the CV results for API response +# cv_response = { +# 'model_version': metadata.get('model_version', 'Unknown'), +# 'model_type': metadata.get('model_type', 'Unknown'), +# 'training_timestamp': metadata.get('timestamp', 'Unknown'), +# 'cross_validation': { +# 'methodology': { +# 'n_splits': cv_info.get('n_splits', 'Unknown'), +# 'cv_type': 'StratifiedKFold', +# 'random_state': 42 +# }, +# 'test_scores': cv_info.get('test_scores', {}), +# 'train_scores': cv_info.get('train_scores', {}), +# 'performance_indicators': { +# 'overfitting_score': cv_info.get('overfitting_score', 'Unknown'), +# 'stability_score': cv_info.get('stability_score', 'Unknown') +# }, +# 'individual_fold_results': cv_info.get('individual_fold_results', []) +# }, +# 'statistical_validation': metadata.get('statistical_validation', {}), +# 'promotion_validation': metadata.get('promotion_validation', {}), +# 'summary_statistics': { +# 'cv_f1_mean': metadata.get('cv_f1_mean'), +# 'cv_f1_std': metadata.get('cv_f1_std'), +# 'cv_accuracy_mean': metadata.get('cv_accuracy_mean'), +# 'cv_accuracy_std': metadata.get('cv_accuracy_std') +# } +# } + +# return cv_response + +# except HTTPException: +# raise +# except Exception as e: +# logger.error(f"CV results retrieval failed: {e}") +# raise HTTPException( +# status_code=500, +# detail=f"Failed to retrieve CV results: {str(e)}" +# ) + +@app.get("/cv/results") +async def get_cv_results(): + """Get cross-validation results from cv_results.json file""" + try: + # First try to load from cv_results.json (where performance_indicators are saved) + cv_results_path = path_manager.get_logs_path("cv_results.json") + + if cv_results_path.exists(): + with open(cv_results_path, 'r') as f: + cv_data = json.load(f) + + # Load metadata for additional info + metadata_path = path_manager.get_metadata_path() + model_version = 'v1.0_init' + model_type = 'logistic_regression_pipeline' + timestamp = 'Unknown' + + if metadata_path.exists(): + with open(metadata_path, 'r') as f: + metadata = json.load(f) + model_version = metadata.get('model_version', model_version) + model_type = metadata.get('model_type', model_type) + timestamp = metadata.get('timestamp', timestamp) + + # Return cv_data with the performance_indicators intact + response = { + 'cross_validation': cv_data, + 'model_version': model_version, + 'model_type': model_type, + 'training_timestamp': timestamp + } + + return response + + # Fallback to metadata if cv_results.json doesn't exist + metadata_path = path_manager.get_metadata_path() + if not metadata_path.exists(): + raise HTTPException(status_code=404, detail="No CV results available") + + with open(metadata_path, 'r') as f: + metadata = json.load(f) + + # Create basic structure from metadata (without performance_indicators) + cv_response = { + 'model_version': metadata.get('model_version', 'Unknown'), + 'model_type': metadata.get('model_type', 'Unknown'), + 'training_timestamp': metadata.get('timestamp', 'Unknown'), + 'cross_validation': { + 'methodology': { + 'n_splits': 3, + 'cv_type': 'StratifiedKFold', + 'random_state': 42 + }, + 'test_scores': { + 'f1': { + 'mean': metadata.get('cv_f1_mean'), + 'std': metadata.get('cv_f1_std') + }, + 'accuracy': { + 'mean': metadata.get('cv_accuracy_mean'), + 'std': metadata.get('cv_accuracy_std') + } + } + } + } + + return cv_response + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to load CV results: {e}") + raise HTTPException(status_code=500, detail=f"Failed to retrieve CV results: {str(e)}") - # System health overview - app.render_system_health_dashboard() - # Environment information - st.markdown("---") - st.subheader("Environment Configuration") - - env_info = path_manager.get_environment_info() - - col1, col2 = st.columns(2) - - with col1: - st.markdown(f"**Environment:** {env_info['environment']}") - st.markdown(f"**Base Directory:** `{env_info['base_dir']}`") - st.markdown(f"**Python Path:** `{env_info['python_path']}`") - - with col2: - st.markdown(f"**Data Directory:** `{env_info['data_dir']}`") - st.markdown(f"**Model Directory:** `{env_info['model_dir']}`") - st.markdown(f"**Logs Directory:** `{env_info['logs_dir']}`") - - # File system status - st.subheader("File System Status") - - col1, col2 = st.columns(2) - - with col1: - st.markdown("**Available Datasets:**") - datasets = env_info.get('available_datasets', {}) - for name, exists in datasets.items(): - status = "✅" if exists else "❌" - st.markdown(f"{status} {name}") - - with col2: - st.markdown("**Available Models:**") - models = env_info.get('available_models', {}) - for name, exists in models.items(): - status = "✅" if exists else "❌" - st.markdown(f"{status} {name}") - - # Model performance monitoring - if app.api_available: - try: - metrics_response = app.session.get(f"{app.config['api_url']}/metrics", timeout=10) - if metrics_response.status_code == 200: - metrics_data = metrics_response.json() - - st.markdown("---") - st.subheader("Model Performance Metrics") - - model_info = metrics_data.get('model_info', {}) - api_metrics = metrics_data.get('api_metrics', {}) - - col1, col2, col3, col4 = st.columns(4) - - with col1: - st.metric("Model Version", model_info.get('model_version', 'Unknown')) +# Adding proper Cross Validation Model Comparison + +@app.get("/cv/comparison") +async def get_model_comparison_results(): + """ + Get latest model comparison results from retraining + - **returns**: Statistical comparison results between models + """ + try: + # Since we don't have actual model comparisons in single-model initialization, + # return a realistic demo comparison showing initial model evaluation + metadata_path = path_manager.get_metadata_path() + + if not metadata_path.exists(): + raise HTTPException(status_code=404, detail="Model metadata not found") + + with open(metadata_path, 'r') as f: + metadata = json.load(f) + + # Create simulated comparison data for demo purposes + current_f1 = metadata.get('cv_f1_mean', 0.8) + current_accuracy = metadata.get('cv_accuracy_mean', 0.8) + + comparison_response = { + 'comparison_timestamp': metadata.get('timestamp', datetime.now().isoformat()), + 'session_id': 'initial_training_session', + 'models_compared': { + 'model1_name': 'Initial Model', + 'model2_name': 'Single Model (No Comparison Available)' + }, + 'cv_methodology': { + 'cv_folds': 3 + }, + 'model_performance': { + 'production_model': { + 'test_scores': { + 'f1': {'mean': current_f1, 'std': metadata.get('cv_f1_std', 0.02)}, + 'accuracy': {'mean': current_accuracy, 'std': metadata.get('cv_accuracy_std', 0.02)} + } + }, + 'candidate_model': { + 'test_scores': { + 'f1': {'mean': current_f1, 'std': metadata.get('cv_f1_std', 0.02)}, + 'accuracy': {'mean': current_accuracy, 'std': metadata.get('cv_accuracy_std', 0.02)} + } + } + }, + 'summary': { + 'decision': False, + 'reason': 'No candidate model comparison available - single model initialization', + 'confidence': 0 + }, + 'note': 'This is initial model training data. Model comparison requires retraining with candidate models.' + } + + return comparison_response + + except HTTPException: + raise + except Exception as e: + logger.error(f"Model comparison results retrieval failed: {e}") + raise HTTPException(status_code=500, detail=f"Failed to retrieve comparison results: {str(e)}") + + +@app.get("/metrics") +async def get_metrics(): + """ + Get comprehensive API metrics including CV results + - **returns**: Usage statistics, performance metrics, and CV information + """ + try: + # Calculate metrics from rate limiting storage + total_requests = sum(len(requests) + for requests in rate_limit_storage.values()) + unique_clients = len(rate_limit_storage) + + # Load metadata for CV information + metadata_path = path_manager.get_metadata_path() + cv_summary = {} + + if metadata_path.exists(): + try: + with open(metadata_path, 'r') as f: + metadata = json.load(f) - with col2: - st.metric("Health Status", model_info.get('model_health', 'Unknown')) + # Extract CV summary + cv_info = metadata.get('cross_validation', {}) + if cv_info: + test_scores = cv_info.get('test_scores', {}) + cv_summary = { + 'cv_available': True, + 'cv_folds': cv_info.get('n_splits', 'Unknown'), + 'cv_f1_mean': test_scores.get('f1', {}).get('mean'), + 'cv_f1_std': test_scores.get('f1', {}).get('std'), + 'cv_accuracy_mean': test_scores.get('accuracy', {}).get('mean'), + 'cv_accuracy_std': test_scores.get('accuracy', {}).get('std'), + 'overfitting_score': cv_info.get('overfitting_score'), + 'stability_score': cv_info.get('stability_score') + } + else: + cv_summary = {'cv_available': False} + + except Exception as e: + cv_summary = {'cv_available': False, 'cv_error': str(e)} + else: + cv_summary = {'cv_available': False, 'cv_error': 'No metadata file'} + + metrics = { + 'api_metrics': { + 'total_requests': total_requests, + 'unique_clients': unique_clients, + 'timestamp': datetime.now().isoformat() + }, + 'model_info': { + 'model_version': model_manager.model_metadata.get('model_version', 'unknown'), + 'model_health': model_manager.health_status, + 'last_health_check': model_manager.last_health_check.isoformat() if model_manager.last_health_check else None + }, + 'cross_validation_summary': cv_summary, + 'environment_info': { + 'environment': path_manager.environment, + 'available_datasets': path_manager.list_available_datasets(), + 'available_models': path_manager.list_available_models() + } + } + + return metrics + + except Exception as e: + logger.error(f"Metrics retrieval failed: {e}") + raise HTTPException( + status_code=500, + detail=f"Metrics retrieval failed: {str(e)}" + ) + +def get_validation_stats(): + """Get validation statistics from actual validation logs""" + try: + stats = { + 'last_updated': datetime.now().isoformat(), + 'total_validations': 0, + 'total_articles': 0, + 'total_valid_articles': 0, + 'average_quality_score': 0.0, + 'validation_breakdown': {}, + 'source_statistics': {}, + 'validation_history': [], + 'quality_trends': [] + } + + # Load actual validation data + validation_log_path = path_manager.get_logs_path("validation_log.json") + if validation_log_path.exists(): + with open(validation_log_path, 'r') as f: + validation_data = json.load(f) - with col3: - st.metric("Total Requests", api_metrics.get('total_requests', 0)) + if validation_data: + stats['total_validations'] = len(validation_data) + stats['total_articles'] = len(validation_data) - with col4: - st.metric("Unique Clients", api_metrics.get('unique_clients', 0)) + # Analyze validation levels + level_counts = {} + quality_scores = [] - # Cross-validation summary if available - cv_summary = metrics_data.get('cross_validation_summary', {}) - if cv_summary.get('cv_available', False): - st.markdown("---") - st.subheader("Cross-Validation Health") + for entry in validation_data: + level = entry.get('validation_level', 'unknown') + level_counts[level] = level_counts.get(level, 0) + 1 - cv_col1, cv_col2, cv_col3, cv_col4 = st.columns(4) + if entry.get('quality_score'): + quality_scores.append(entry['quality_score']) - with cv_col1: - cv_folds = cv_summary.get('cv_folds', 'Unknown') - st.metric("CV Folds", cv_folds) - - with cv_col2: - cv_f1 = cv_summary.get('cv_f1_mean') - if cv_f1 is not None: - st.metric("CV F1 Score", f"{cv_f1:.4f}") - else: - st.metric("CV F1 Score", "N/A") - - with cv_col3: - overfitting = cv_summary.get('overfitting_score') - if overfitting is not None: - st.metric("Overfitting", f"{overfitting:.4f}") - else: - st.metric("Overfitting", "N/A") - - with cv_col4: - stability = cv_summary.get('stability_score') - if stability is not None: - st.metric("Stability", f"{stability:.4f}") - else: - st.metric("Stability", "N/A") + if entry.get('passed_validation', False): + stats['total_valid_articles'] += 1 + + stats['validation_breakdown'] = level_counts + stats['average_quality_score'] = sum(quality_scores) / len(quality_scores) if quality_scores else 0.0 + stats['validation_history'] = validation_data[-10:] # Last 10 + + # Quality trends over time + for entry in validation_data[-20:]: # Last 20 for trends + if entry.get('quality_score') is not None: + stats['quality_trends'].append({ + 'timestamp': entry.get('timestamp'), + 'quality_score': entry.get('quality_score') + }) + + return stats if stats['total_validations'] > 0 else None + + except Exception as e: + logger.warning(f"Could not load validation stats: {e}") + return None - except Exception as e: - st.warning(f"Could not retrieve performance metrics: {e}") -def render_realtime_metrics(): - """Enhanced real-time metrics dashboard""" - st.markdown("## Real-time Metrics Dashboard") - st.markdown("*Live system performance and monitoring data*") +# Data Quality Report Endpoint +@app.get("/validation/quality-report") +async def get_validation_quality_report(): + """Get detailed validation quality report""" + try: + stats = get_validation_stats() + + if not stats: + return { + 'error': 'No validation data available', + 'message': 'No validation statistics available yet' + } + + # Generate quality assessment + avg_quality = stats.get('average_quality_score', 0) + validation_breakdown = stats.get('validation_breakdown', {}) + + quality_level = 'poor' + if avg_quality > 0.8: + quality_level = 'excellent' + elif avg_quality > 0.6: + quality_level = 'good' + elif avg_quality > 0.4: + quality_level = 'fair' + + # Generate recommendations + recommendations = [] + invalid_count = validation_breakdown.get('INVALID', 0) + total = stats.get('total_validations', 1) + + if invalid_count / total > 0.1: + recommendations.append("High rate of invalid inputs detected - consider input preprocessing") + + if avg_quality < 0.5: + recommendations.append("Low average quality scores - review data sources") + + return { + 'overall_statistics': { + 'total_articles': stats.get('total_articles', 0), + 'overall_success_rate': stats.get('total_valid_articles', 0) / max(stats.get('total_articles', 1), 1) + }, + 'quality_assessment': { + 'quality_level': quality_level, + 'average_quality_score': avg_quality + }, + 'validation_breakdown': validation_breakdown, + 'recommendations': recommendations, + 'timestamp': datetime.now().isoformat() + } + + except Exception as e: + logger.error(f"Quality report generation failed: {e}") + raise HTTPException(status_code=500, detail="Failed to generate quality report") - # Metrics refresh controls - col1, col2, col3 = st.columns([1, 1, 2]) - - with col1: - if st.button("Refresh Metrics", use_container_width=True): - st.rerun() - - with col2: - refresh_interval = st.selectbox("Refresh Interval", ["Manual", "30s", "1m", "5m"]) - - with col3: - metrics_timeframe = st.selectbox("Timeframe", ["Last Hour", "Last 4 Hours", "Last 24 Hours"]) - # Real-time monitoring data - monitoring_data = app.get_monitoring_metrics_from_api() - - if monitoring_data: - # Current performance metrics - st.subheader("Current Performance") - - pred_metrics = monitoring_data.get('prediction_metrics', {}) - system_metrics = monitoring_data.get('system_metrics', {}) - api_metrics = monitoring_data.get('api_metrics', {}) - - col1, col2, col3, col4, col5 = st.columns(5) - - with col1: - predictions_per_min = pred_metrics.get('predictions_per_minute', 0) - st.metric("Predictions/Min", f"{predictions_per_min:.1f}") - - with col2: - avg_confidence = pred_metrics.get('avg_confidence', 0) - st.metric("Avg Confidence", f"{avg_confidence:.2f}") - - with col3: - response_time = api_metrics.get('avg_response_time', 0) - st.metric("Response Time", f"{response_time:.2f}s") - - with col4: - cpu_percent = system_metrics.get('cpu_percent', 0) - st.metric("CPU Usage", f"{cpu_percent:.1f}%") - - with col5: - memory_percent = system_metrics.get('memory_percent', 0) - st.metric("Memory Usage", f"{memory_percent:.1f}%") - - # Performance trends - st.subheader("Performance Trends") - - # Create mock time series data for demonstration - times = pd.date_range(start='2024-01-01 00:00:00', periods=24, freq='H') - mock_data = { - 'timestamp': times, - 'cpu_usage': np.random.normal(60, 10, 24).clip(0, 100), - 'memory_usage': np.random.normal(70, 8, 24).clip(0, 100), - 'response_time': np.random.normal(0.15, 0.03, 24).clip(0.05, 0.5), - 'predictions_per_hour': np.random.poisson(50, 24) +# Statistics Validation Endpoint +@app.get("/validation/statistics") +async def get_validation_statistics(): + """Get comprehensive validation statistics""" + try: + stats = get_validation_stats() + + if not stats: + return { + 'statistics_available': False, + 'message': 'No validation statistics available yet', + 'timestamp': datetime.now().isoformat() + } + + enhanced_stats = { + 'statistics_available': True, + 'last_updated': stats.get('last_updated'), + 'overall_metrics': { + 'total_validations': stats.get('total_validations', 0), + 'total_articles_processed': stats.get('total_articles', 0), + 'overall_success_rate': (stats.get('total_valid_articles', 0) / + max(stats.get('total_articles', 1), 1)), + 'average_quality_score': stats.get('average_quality_score', 0.0) + }, + 'source_breakdown': stats.get('source_statistics', {}), + 'recent_performance': { + 'validation_history': stats.get('validation_history', [])[-10:], + 'quality_trends': stats.get('quality_trends', [])[-10:] + }, + 'timestamp': datetime.now().isoformat() } - trend_df = pd.DataFrame(mock_data) + return enhanced_stats - fig_trends = make_subplots( - rows=2, cols=2, - subplot_titles=('CPU & Memory Usage', 'Response Time', - 'Predictions per Hour', 'System Load'), - specs=[[{"secondary_y": True}, {}], - [{}, {}]] + except Exception as e: + logger.error(f"Failed to get validation statistics: {e}") + raise HTTPException( + status_code=500, + detail=f"Failed to retrieve validation statistics: {str(e)}" ) - # CPU and Memory - fig_trends.add_trace( - go.Scatter(x=trend_df['timestamp'], y=trend_df['cpu_usage'], - name='CPU %', line=dict(color='blue')), - row=1, col=1 - ) - fig_trends.add_trace( - go.Scatter(x=trend_df['timestamp'], y=trend_df['memory_usage'], - name='Memory %', line=dict(color='red')), - row=1, col=1 - ) - # Response Time - fig_trends.add_trace( - go.Scatter(x=trend_df['timestamp'], y=trend_df['response_time'], - name='Response Time (s)', line=dict(color='green')), - row=1, col=2 +# Adding fallback to build quality report from metadata if generate_quality_report fails; improved error handling, logging, and richer report structure +@app.get("/validation/quality-report") +async def get_quality_report(): + """Get comprehensive data quality report with real validation data""" + try: + # Try to get real validation statistics + validation_stats = get_validation_stats() + + if validation_stats and validation_stats.get('total_validations', 0) > 0: + # Generate report from real validation data + avg_quality = validation_stats.get('average_quality_score', 0.0) + breakdown = validation_stats.get('validation_breakdown', {}) + total_validations = validation_stats.get('total_validations', 0) + + # Assess quality level + if avg_quality > 0.8: + quality_level = "excellent" + elif avg_quality > 0.6: + quality_level = "good" + elif avg_quality > 0.4: + quality_level = "fair" + else: + quality_level = "poor" + + # Generate recommendations + recommendations = [] + invalid_rate = breakdown.get('INVALID', 0) / max(total_validations, 1) + + if invalid_rate > 0.1: + recommendations.append("High rate of invalid inputs - consider input preprocessing") + if avg_quality < 0.5: + recommendations.append("Low average quality scores - review data sources") + if breakdown.get('LOW', 0) / max(total_validations, 1) > 0.2: + recommendations.append("Many low-quality inputs detected - implement content filtering") + + return { + "report_timestamp": datetime.now().isoformat(), + "data_source": "real_validation_logs", + "overall_statistics": { + "total_articles": validation_stats.get('total_articles', 0), + "total_validations": total_validations, + "overall_success_rate": validation_stats.get('total_valid_articles', 0) / max(validation_stats.get('total_articles', 1), 1) + }, + "quality_assessment": { + "quality_level": quality_level, + "average_quality_score": avg_quality + }, + "validation_breakdown": breakdown, + "recommendations": recommendations, + "quality_trends": validation_stats.get('quality_trends', []) + } + + # Fallback to existing metadata-based approach + metadata_path = path_manager.get_metadata_path() + + if not metadata_path.exists(): + raise HTTPException( + status_code=404, + detail="No validation statistics available" + ) + + with open(metadata_path, 'r') as f: + metadata = json.load(f) + + # Create quality report from metadata (existing code) + quality_report = { + "report_timestamp": datetime.now().isoformat(), + "data_source": "model_metadata", + "overall_statistics": { + "total_articles": (metadata.get('train_size', 0) + metadata.get('test_size', 0)), + "overall_success_rate": 0.85 if metadata.get('test_f1', 0) > 0.7 else 0.65 + }, + "quality_assessment": { + "quality_level": "excellent" if metadata.get('test_f1', 0) > 0.85 else + "good" if metadata.get('test_f1', 0) > 0.75 else + "fair" if metadata.get('test_f1', 0) > 0.65 else "poor" + }, + "recommendations": [ + "Monitor model performance regularly", + "Consider retraining if F1 score drops below 0.80", + "Validate data quality before training" + ] if metadata.get('test_f1', 0) < 0.85 else [], + "model_info": { + "version": metadata.get('model_version', 'unknown'), + "type": metadata.get('model_type', 'unknown'), + "training_date": metadata.get('timestamp', 'unknown') + }, + "performance_metrics": { + "test_accuracy": metadata.get('test_accuracy', 0.0), + "test_f1": metadata.get('test_f1', 0.0) + } + } + + return quality_report + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to generate quality report: {e}") + raise HTTPException( + status_code=500, + detail=f"Failed to generate quality report: {str(e)}" ) - # Predictions per Hour - fig_trends.add_trace( - go.Bar(x=trend_df['timestamp'], y=trend_df['predictions_per_hour'], - name='Predictions/Hour'), - row=2, col=1 +@app.get("/validation/health") +async def get_validation_health(): + """Get validation system health status""" + try: + stats = get_validation_stats() + + health_indicators = { + 'validation_system_active': True, + 'statistics_available': bool(stats), + 'recent_activity': False, + 'quality_status': 'unknown' + } + + if stats: + last_updated = stats.get('last_updated') + if last_updated: + try: + last_update_time = datetime.fromisoformat(last_updated) + hours_since_update = (datetime.now() - last_update_time).total_seconds() / 3600 + health_indicators['recent_activity'] = hours_since_update <= 24 + health_indicators['hours_since_last_validation'] = hours_since_update + except: + pass + + avg_quality = stats.get('average_quality_score', 0) + success_rate = stats.get('total_valid_articles', 0) / max(stats.get('total_articles', 1), 1) + + if avg_quality >= 0.7 and success_rate >= 0.8: + health_indicators['quality_status'] = 'excellent' + elif avg_quality >= 0.5 and success_rate >= 0.6: + health_indicators['quality_status'] = 'good' + elif avg_quality >= 0.3 and success_rate >= 0.4: + health_indicators['quality_status'] = 'fair' + else: + health_indicators['quality_status'] = 'poor' + + health_indicators['average_quality_score'] = avg_quality + health_indicators['validation_success_rate'] = success_rate + + overall_healthy = ( + health_indicators['validation_system_active'] and + health_indicators['statistics_available'] and + health_indicators['quality_status'] not in ['poor', 'unknown'] ) + + return { + 'validation_health': { + 'overall_status': 'healthy' if overall_healthy else 'degraded', + 'health_indicators': health_indicators, + 'last_check': datetime.now().isoformat() + } + } + + except Exception as e: + logger.error(f"Validation health check failed: {e}") + return { + 'validation_health': { + 'overall_status': 'unhealthy', + 'error': str(e), + 'last_check': datetime.now().isoformat() + } + } - # System Load (combined metric) - system_load = (trend_df['cpu_usage'] + trend_df['memory_usage']) / 2 - fig_trends.add_trace( - go.Scatter(x=trend_df['timestamp'], y=system_load, - name='System Load', fill='tonexty', line=dict(color='purple')), - row=2, col=2 - ) - fig_trends.update_layout( - height=600, - showlegend=True, - title_text="System Performance Trends" - ) +# New monitoring endpoints +@app.get("/monitor/metrics/current") +async def get_current_metrics(): + """Get current real-time metrics""" + try: + prediction_metrics = prediction_monitor.get_current_metrics() + system_metrics = metrics_collector.collect_system_metrics() + api_metrics = metrics_collector.collect_api_metrics() + + return { + "timestamp": datetime.now().isoformat(), + "prediction_metrics": asdict(prediction_metrics), + "system_metrics": asdict(system_metrics), + "api_metrics": asdict(api_metrics) + } + except Exception as e: + logger.error(f"Failed to get current metrics: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/monitor/metrics/historical") +async def get_historical_metrics(hours: int = 24): + """Get historical metrics""" + try: + return { + "prediction_metrics": [asdict(m) for m in prediction_monitor.get_historical_metrics(hours)], + "aggregated_metrics": metrics_collector.get_aggregated_metrics(hours) + } + except Exception as e: + logger.error(f"Failed to get historical metrics: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/monitor/alerts") +async def get_alerts(): + """Get active alerts and statistics""" + try: + return { + "active_alerts": [asdict(alert) for alert in alert_system.get_active_alerts()], + "alert_statistics": alert_system.get_alert_statistics() + } + except Exception as e: + logger.error(f"Failed to get alerts: {e}") + raise HTTPException(status_code=500, detail=str(e)) - st.plotly_chart(fig_trends, use_container_width=True) +@app.get("/monitor/health") +async def get_monitoring_health(): + """Get monitoring system health""" + try: + dashboard_data = metrics_collector.get_real_time_dashboard_data() + confidence_analysis = prediction_monitor.get_confidence_analysis() + + return { + "monitoring_status": "active", + "dashboard_data": dashboard_data, + "confidence_analysis": confidence_analysis, + "total_predictions": prediction_monitor.total_predictions + } + except Exception as e: + logger.error(f"Failed to get monitoring health: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/monitor/patterns") +async def get_prediction_patterns(hours: int = 24): + """Get prediction patterns and anomaly analysis""" + try: + return prediction_monitor.get_prediction_patterns(hours) + except Exception as e: + logger.error(f"Failed to get prediction patterns: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/monitor/alerts/{alert_id}/acknowledge") +async def acknowledge_alert(alert_id: str): + """Acknowledge an alert""" + try: + success = alert_system.acknowledge_alert(alert_id, "api_user") + if success: + return {"message": f"Alert {alert_id} acknowledged"} + else: + raise HTTPException(status_code=404, detail="Alert not found") + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to acknowledge alert: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/monitor/alerts/{alert_id}/resolve") +async def resolve_alert(alert_id: str, resolution_note: str = ""): + """Resolve an alert""" + try: + success = alert_system.resolve_alert(alert_id, "api_user", resolution_note) + if success: + return {"message": f"Alert {alert_id} resolved"} + else: + raise HTTPException(status_code=404, detail="Alert not found") + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to resolve alert: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# Updated automation status endpoint to return static demo-friendly status instead of live manager state +@app.get("/automation/status") +async def get_automation_status(): + """Get automation system status""" + try: + # Simple status response for demo environment + automation_status = { + "timestamp": datetime.now().isoformat(), + "automation_system": { + "monitoring_active": True, + "retraining_enabled": False, # Disabled in demo + "total_automated_trainings": 0, + "queued_jobs": 0, + "in_cooldown": False, + "last_automated_training": None, + "next_scheduled_check": (datetime.now() + timedelta(hours=24)).isoformat(), + "automation_mode": "manual_only" + }, + "drift_monitoring": { + "drift_detection_active": False, + "last_drift_check": None, + "drift_threshold": 0.1, + "current_drift_score": 0.0 + }, + "system_health": "monitoring_only", + "environment": path_manager.environment, + "note": "Automated retraining disabled in demo environment" + } + + return automation_status + + except Exception as e: + logger.error(f"Failed to get automation status: {e}") + raise HTTPException(status_code=500, detail=f"Failed to retrieve automation status: {str(e)}") + +@app.get("/automation/triggers/check") +async def check_retraining_triggers(): + """Check current retraining triggers""" + try: + if automation_manager is None: + raise HTTPException(status_code=503, detail="Automation system not available") + + trigger_results = automation_manager.drift_monitor.check_retraining_triggers() + + return { + "timestamp": datetime.now().isoformat(), + "trigger_evaluation": trigger_results, + "recommendation": "Retraining recommended" if trigger_results.get('should_retrain') else "No retraining needed" + } + + except Exception as e: + logger.error(f"Failed to check triggers: {e}") + raise HTTPException(status_code=500, detail=str(e)) - # Alert status - st.subheader("Alert Status") +@app.post("/automation/retrain/trigger") +async def trigger_manual_retraining(reason: str = "manual_api_trigger"): + """Manually trigger retraining""" + try: + if automation_manager is None: + raise HTTPException(status_code=503, detail="Automation system not available") - # Mock alert data - alerts = [ - {"level": "info", "message": "System operating normally", "timestamp": datetime.now()}, - {"level": "warning", "message": "Memory usage above 75%", "timestamp": datetime.now() - timedelta(minutes=15)}, - ] + result = automation_manager.trigger_manual_retraining(reason) - for alert in alerts: - if alert["level"] == "info": - st.info(f"â„šī¸ {alert['message']} - {alert['timestamp'].strftime('%H:%M:%S')}") - elif alert["level"] == "warning": - st.warning(f"âš ī¸ {alert['message']} - {alert['timestamp'].strftime('%H:%M:%S')}") - elif alert["level"] == "error": - st.error(f"🚨 {alert['message']} - {alert['timestamp'].strftime('%H:%M:%S')}") + if result['success']: + return { + "message": "Retraining triggered successfully", + "timestamp": datetime.now().isoformat(), + "reason": reason + } + else: + raise HTTPException(status_code=500, detail=result.get('error', 'Unknown error')) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to trigger retraining: {e}") + raise HTTPException(status_code=500, detail=str(e)) - else: - st.warning("Real-time metrics not available. API monitoring service may be offline.") +@app.get("/automation/queue") +async def get_retraining_queue(): + """Get current retraining queue""" + try: + if automation_manager is None: + raise HTTPException(status_code=503, detail="Automation system not available") - # Show mock dashboard for demonstration - st.info("Displaying demo metrics dashboard:") + queue = automation_manager.load_retraining_queue() + recent_logs = automation_manager.get_recent_automation_logs(hours=24) - col1, col2, col3, col4 = st.columns(4) - with col1: - st.metric("Demo Predictions", "245", delta="12") - with col2: - st.metric("Demo Confidence", "0.847", delta="0.023") - with col3: - st.metric("Demo Response", "0.156s", delta="-0.012s") - with col4: - st.metric("Demo Uptime", "99.8%", delta="0.1%") + return { + "timestamp": datetime.now().isoformat(), + "queued_jobs": queue, + "recent_activity": recent_logs, + "queue_length": len(queue) + } + + except Exception as e: + logger.error(f"Failed to get retraining queue: {e}") + raise HTTPException(status_code=500, detail=str(e)) - # Auto-refresh logic - if refresh_interval != "Manual": - interval_seconds = {"30s": 30, "1m": 60, "5m": 300}[refresh_interval] +@app.get("/automation/drift/status") +async def get_drift_monitoring_status(): + """Get drift monitoring status""" + try: + if automation_manager is None: + raise HTTPException(status_code=503, detail="Automation system not available") - if 'last_metrics_refresh' not in st.session_state: - st.session_state.last_metrics_refresh = datetime.now() + # Get recent drift results + drift_logs = automation_manager.get_recent_automation_logs(hours=48) + drift_checks = [log for log in drift_logs if 'drift' in log.get('event', '')] + + # Get current drift status + drift_status = automation_manager.drift_monitor.get_automation_status() + + return { + "timestamp": datetime.now().isoformat(), + "drift_monitoring_active": True, + "recent_drift_checks": drift_checks[-10:], # Last 10 checks + "drift_status": drift_status + } - time_since_refresh = datetime.now() - st.session_state.last_metrics_refresh - if time_since_refresh.total_seconds() >= interval_seconds: - st.session_state.last_metrics_refresh = datetime.now() - st.rerun() + except Exception as e: + logger.error(f"Failed to get drift status: {e}") + raise HTTPException(status_code=500, detail=str(e)) +@app.post("/automation/settings/update") +async def update_automation_settings(settings: Dict[str, Any]): + """Update automation settings""" + try: + if automation_manager is None: + raise HTTPException(status_code=503, detail="Automation system not available") + + # Update settings + automation_manager.automation_config.update(settings) + automation_manager.save_automation_config() + + return { + "message": "Automation settings updated", + "timestamp": datetime.now().isoformat(), + "updated_settings": settings + } + + except Exception as e: + logger.error(f"Failed to update automation settings: {e}") + raise HTTPException(status_code=500, detail=str(e)) -# Run the enhanced application -if __name__ == "__main__": + + +''' Deployment endpoints ''' +# Updated deployment status endpoint to return static demo-friendly status instead of live manager state +@app.get("/deployment/status") +async def get_deployment_status(): + """Get deployment system status""" try: - main() + # Simple deployment status for demo environment + deployment_status = { + "timestamp": datetime.now().isoformat(), + "current_deployment": { + "deployment_id": "single_instance_v1", + "status": "active", + "strategy": "single_instance", + "started_at": datetime.now().isoformat(), + "version": "v1.0" + }, + "active_version": { + "version_id": "v1.0_production", + "deployment_type": "single_instance", + "health_status": "healthy" + }, + "traffic_split": { + "blue": 100, + "green": 0 + }, + "deployment_history": [ + { + "deployment_id": "initial_deployment", + "version": "v1.0", + "status": "completed", + "deployed_at": datetime.now().isoformat() + } + ], + "environment": path_manager.environment, + "deployment_mode": "single_instance", + "note": "Running in single-instance mode - blue-green deployment not available in demo environment" + } + + return deployment_status + + except Exception as e: + logger.error(f"Failed to get deployment status: {e}") + raise HTTPException(status_code=500, detail=f"Failed to retrieve deployment status: {str(e)}") + +@app.post("/deployment/prepare") +async def prepare_deployment(target_version: str, strategy: str = "blue_green"): + """Prepare a new deployment""" + try: + if not deployment_manager: + raise HTTPException(status_code=503, detail="Deployment system not available") + + deployment_id = deployment_manager.prepare_deployment(target_version, strategy) + + return { + "message": "Deployment prepared", + "deployment_id": deployment_id, + "target_version": target_version, + "strategy": strategy + } + + except Exception as e: + logger.error(f"Failed to prepare deployment: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/deployment/start/{deployment_id}") +async def start_deployment(deployment_id: str): + """Start a prepared deployment""" + try: + if not deployment_manager: + raise HTTPException(status_code=503, detail="Deployment system not available") + + success = deployment_manager.start_deployment(deployment_id) + + if success: + return {"message": "Deployment started successfully", "deployment_id": deployment_id} + else: + raise HTTPException(status_code=500, detail="Deployment failed to start") + + except Exception as e: + logger.error(f"Failed to start deployment: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/deployment/rollback") +async def rollback_deployment(reason: str = "Manual rollback"): + """Rollback current deployment""" + try: + if not deployment_manager: + raise HTTPException(status_code=503, detail="Deployment system not available") + + success = deployment_manager.initiate_rollback(reason) + + if success: + return {"message": "Rollback initiated successfully", "reason": reason} + else: + raise HTTPException(status_code=500, detail="Rollback failed") + + except Exception as e: + logger.error(f"Failed to rollback deployment: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# Updated traffic status endpoint to return static demo-friendly routing info instead of live router state +@app.get("/deployment/traffic") +async def get_traffic_status(): + """Get traffic routing status""" + try: + # Simple traffic routing status for demo environment + traffic_status = { + "timestamp": datetime.now().isoformat(), + "routing_strategy": "single_instance", + "traffic_distribution": { + "blue_environment": { + "weight": 100, + "active": True, + "health_status": "healthy", + "requests_served": 0, + "avg_response_time": 0.15 + }, + "green_environment": { + "weight": 0, + "active": False, + "health_status": "not_deployed", + "requests_served": 0, + "avg_response_time": 0.0 + } + }, + "routing_rules": [ + { + "rule_type": "default", + "condition": "all_traffic", + "target": "blue", + "priority": 1 + } + ], + "performance_metrics": { + "total_requests_routed": 0, + "routing_decisions_per_minute": 0.0, + "failed_routings": 0 + }, + "environment": path_manager.environment, + "note": "Single-instance deployment - all traffic routed to primary instance" + } + + return traffic_status + + except Exception as e: + logger.error(f"Failed to get traffic status: {e}") + raise HTTPException(status_code=500, detail=f"Failed to retrieve traffic status: {str(e)}") + + +@app.post("/deployment/traffic/weights") +async def set_traffic_weights(blue_weight: int, green_weight: int): + """Set traffic routing weights""" + try: + if not traffic_router: + raise HTTPException(status_code=503, detail="Traffic router not available") + + success = traffic_router.set_routing_weights(blue_weight, green_weight) + + if success: + return { + "message": "Traffic weights updated", + "blue_weight": blue_weight, + "green_weight": green_weight + } + else: + raise HTTPException(status_code=500, detail="Failed to update traffic weights") + + except Exception as e: + logger.error(f"Failed to set traffic weights: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/deployment/performance") +async def get_deployment_performance(window_minutes: int = 60): + """Get deployment performance comparison""" + try: + if not traffic_router: + raise HTTPException(status_code=503, detail="Traffic router not available") + + return traffic_router.compare_environment_performance(window_minutes) + + except Exception as e: + logger.error(f"Failed to get deployment performance: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/registry/models") +async def list_registry_models(status: str = None, limit: int = 10): + """List models in registry""" + try: + if not model_registry: + raise HTTPException(status_code=503, detail="Model registry not available") + + models = model_registry.list_models(status=status, limit=limit) + return {"models": [asdict(model) for model in models]} + + except Exception as e: + logger.error(f"Failed to list registry models: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/registry/stats") +async def get_registry_stats(): + """Get model registry statistics""" + try: + if not model_registry: + raise HTTPException(status_code=503, detail="Model registry not available") + + return model_registry.get_registry_stats() + except Exception as e: - st.error(f"Application error: {e}") - logger.error(f"Application error: {e}") - st.info("Please refresh the page or contact support if the issue persists.") \ No newline at end of file + logger.error(f"Failed to get registry stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file