Spaces:

Ahmedik95316
/

Fake-News-Detection-with-MLOps

Sleeping

App Files Files Community

Ahmedik95316 commited on Aug 19

Commit

04e5963

1 Parent(s): 34841ba

Update app/streamlit_app.py

Browse files

Files changed (1) hide show

app/streamlit_app.py +560 -282

app/streamlit_app.py CHANGED Viewed

@@ -15,7 +15,7 @@ import plotly.express as px
 import plotly.graph_objects as go
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Any
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -24,15 +24,26 @@ logger = logging.getLogger(__name__)
 # Add root to sys.path for imports
 sys.path.append(str(Path(__file__).resolve().parent.parent))
 class StreamlitAppManager:
     """Manages Streamlit application state and functionality"""
     def __init__(self):
         self.setup_config()
         self.setup_paths()
         self.setup_api_client()
         self.initialize_session_state()
     def setup_config(self):
         """Setup application configuration"""
         self.config = {
@@ -44,7 +55,7 @@ class StreamlitAppManager:
             'refresh_interval': 60,
             'max_batch_size': 10
         }
     def setup_paths(self):
         """Setup file paths"""
         self.paths = {
@@ -56,37 +67,39 @@ class StreamlitAppManager:
             'scheduler_log': Path("/tmp/logs/scheduler_execution.json"),
             'error_log': Path("/tmp/logs/scheduler_errors.json")
         }
     def setup_api_client(self):
         """Setup API client with error handling"""
         self.session = requests.Session()
         self.session.timeout = self.config['prediction_timeout']
         # Test API connection
         self.api_available = self.test_api_connection()
     def test_api_connection(self) -> bool:
         """Test API connection"""
         try:
-            response = self.session.get(f"{self.config['api_url']}/health", timeout=5)
             return response.status_code == 200
         except:
             return False
     def initialize_session_state(self):
         """Initialize Streamlit session state"""
         if 'prediction_history' not in st.session_state:
             st.session_state.prediction_history = []
         if 'upload_history' not in st.session_state:
             st.session_state.upload_history = []
         if 'last_refresh' not in st.session_state:
             st.session_state.last_refresh = datetime.now()
         if 'auto_refresh' not in st.session_state:
             st.session_state.auto_refresh = False
 # Initialize app manager
 app_manager = StreamlitAppManager()
@@ -142,6 +155,7 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
 def load_json_file(file_path: Path, default: Any = None) -> Any:
     """Safely load JSON file with error handling"""
     try:
@@ -153,6 +167,7 @@ def load_json_file(file_path: Path, default: Any = None) -> Any:
         logger.error(f"Failed to load {file_path}: {e}")
         return default or {}
 def save_prediction_to_history(text: str, prediction: str, confidence: float):
     """Save prediction to session history"""
     prediction_entry = {
@@ -162,30 +177,31 @@ def save_prediction_to_history(text: str, prediction: str, confidence: float):
         'confidence': confidence,
         'text_length': len(text)
     }
     st.session_state.prediction_history.append(prediction_entry)
     # Keep only last 50 predictions
     if len(st.session_state.prediction_history) > 50:
         st.session_state.prediction_history = st.session_state.prediction_history[-50:]
 def make_prediction_request(text: str) -> Dict[str, Any]:
     """Make prediction request to API"""
     try:
         if not app_manager.api_available:
             return {'error': 'API is not available'}
         response = app_manager.session.post(
             f"{app_manager.config['api_url']}/predict",
             json={"text": text},
             timeout=app_manager.config['prediction_timeout']
         )
         if response.status_code == 200:
             return response.json()
         else:
             return {'error': f'API Error: {response.status_code} - {response.text}'}
     except requests.exceptions.Timeout:
         return {'error': 'Request timed out. Please try again.'}
     except requests.exceptions.ConnectionError:
@@ -193,33 +209,35 @@ def make_prediction_request(text: str) -> Dict[str, Any]:
     except Exception as e:
         return {'error': f'Unexpected error: {str(e)}'}
 def validate_text_input(text: str) -> tuple[bool, str]:
     """Validate text input"""
     if not text or not text.strip():
         return False, "Please enter some text to analyze."
     if len(text) < 10:
         return False, "Text must be at least 10 characters long."
     if len(text) > app_manager.config['max_text_length']:
         return False, f"Text must be less than {app_manager.config['max_text_length']} characters."
     # Check for suspicious content
     suspicious_patterns = ['<script', 'javascript:', 'data:']
     if any(pattern in text.lower() for pattern in suspicious_patterns):
         return False, "Text contains suspicious content."
     return True, "Valid"
 def create_confidence_gauge(confidence: float, prediction: str):
     """Create confidence gauge visualization"""
     fig = go.Figure(go.Indicator(
-        mode = "gauge+number+delta",
-        value = confidence * 100,
-        domain = {'x': [0, 1], 'y': [0, 1]},
-        title = {'text': f"Confidence: {prediction}"},
-        delta = {'reference': 50},
-        gauge = {
             'axis': {'range': [None, 100]},
             'bar': {'color': "red" if prediction == "Fake" else "green"},
             'steps': [
@@ -234,22 +252,23 @@ def create_confidence_gauge(confidence: float, prediction: str):
             }
         }
     ))
     fig.update_layout(height=300)
     return fig
 def create_prediction_history_chart():
     """Create prediction history visualization"""
     if not st.session_state.prediction_history:
         return None
     df = pd.DataFrame(st.session_state.prediction_history)
     df['timestamp'] = pd.to_datetime(df['timestamp'])
     df['confidence_percent'] = df['confidence'] * 100
     fig = px.scatter(
-        df,
-        x='timestamp',
         y='confidence_percent',
         color='prediction',
         size='text_length',
@@ -257,61 +276,326 @@ def create_prediction_history_chart():
         title="Prediction History",
         labels={'confidence_percent': 'Confidence (%)', 'timestamp': 'Time'}
     )
     fig.update_layout(height=400)
     return fig
 # Main application
 def main():
     """Main Streamlit application"""
     # Header
-    st.markdown('<h1 class="main-header">📰 Fake News Detection System</h1>', unsafe_allow_html=True)
     # API Status indicator
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
         if app_manager.api_available:
-            st.markdown('<div class="success-message">🟢 API Service: Online</div>', unsafe_allow_html=True)
         else:
-            st.markdown('<div class="error-message">🔴 API Service: Offline</div>', unsafe_allow_html=True)
     # Main content area
     tab1, tab2, tab3, tab4, tab5 = st.tabs([
-        "🔍 Prediction",
-        "📊 Batch Analysis",
-        "📈 Analytics",
-        "🎯 Model Training",
         "⚙️ System Status"
     ])
     # Tab 1: Individual Prediction
     with tab1:
         st.header("Single Text Analysis")
         # Input methods
         input_method = st.radio(
             "Choose input method:",
             ["Type Text", "Upload File"],
             horizontal=True
         )
         user_text = ""
         if input_method == "Type Text":
             user_text = st.text_area(
                 "Enter news article text:",
                 height=200,
                 placeholder="Paste or type the news article you want to analyze..."
             )
         else:  # Upload File
             uploaded_file = st.file_uploader(
                 "Upload text file:",
                 type=['txt', 'csv'],
                 help="Upload a text file containing the article to analyze"
             )
             if uploaded_file:
                 try:
                     if uploaded_file.type == "text/plain":
@@ -319,44 +603,48 @@ def main():
                     elif uploaded_file.type == "text/csv":
                         df = pd.read_csv(uploaded_file)
                         if 'text' in df.columns:
-                            user_text = df['text'].iloc[0] if len(df) > 0 else ""
                         else:
                             st.error("CSV file must contain a 'text' column")
-                    st.success(f"File uploaded successfully! ({len(user_text)} characters)")
                 except Exception as e:
                     st.error(f"Error reading file: {e}")
         # Prediction section
         col1, col2 = st.columns([3, 1])
         with col1:
             if st.button("🧠 Analyze Text", type="primary", use_container_width=True):
                 if user_text:
                     # Validate input
-                    is_valid, validation_message = validate_text_input(user_text)
                     if not is_valid:
                         st.error(validation_message)
                     else:
                         # Show progress
                         with st.spinner("Analyzing text..."):
                             result = make_prediction_request(user_text)
                         if 'error' in result:
                             st.error(f"❌ {result['error']}")
                         else:
                             # Display results
                             prediction = result['prediction']
                             confidence = result['confidence']
                             # Save to history
-                            save_prediction_to_history(user_text, prediction, confidence)
                             # Results display
                             col_result1, col_result2 = st.columns(2)
                             with col_result1:
                                 if prediction == "Fake":
                                     st.markdown(f"""
@@ -372,12 +660,14 @@ def main():
                                         <p>Confidence: {confidence:.2%}</p>
                                     </div>
                                     """, unsafe_allow_html=True)
                             with col_result2:
                                 # Confidence gauge
-                                fig_gauge = create_confidence_gauge(confidence, prediction)
-                                st.plotly_chart(fig_gauge, use_container_width=True)
                             # Additional information
                             with st.expander("📋 Analysis Details"):
                                 st.json({
@@ -389,51 +679,53 @@ def main():
                                 })
                 else:
                     st.warning("Please enter text to analyze.")
         with col2:
             if st.button("🔄 Clear Text", use_container_width=True):
                 st.rerun()
     # Tab 2: Batch Analysis
     with tab2:
         st.header("Batch Text Analysis")
         # File upload for batch processing
         batch_file = st.file_uploader(
             "Upload CSV file for batch analysis:",
             type=['csv'],
             help="CSV file should contain a 'text' column with articles to analyze"
         )
         if batch_file:
             try:
                 df = pd.read_csv(batch_file)
                 if 'text' not in df.columns:
                     st.error("CSV file must contain a 'text' column")
                 else:
                     st.success(f"File loaded: {len(df)} articles found")
                     # Preview data
                     st.subheader("Data Preview")
                     st.dataframe(df.head(10))
                     # Batch processing
                     if st.button("🚀 Process Batch", type="primary"):
                         if len(df) > app_manager.config['max_batch_size']:
-                            st.warning(f"Only processing first {app_manager.config['max_batch_size']} articles")
                             df = df.head(app_manager.config['max_batch_size'])
                         progress_bar = st.progress(0)
                         status_text = st.empty()
                         results = []
                         for i, row in df.iterrows():
-                            status_text.text(f"Processing article {i+1}/{len(df)}...")
                             progress_bar.progress((i + 1) / len(df))
                             result = make_prediction_request(row['text'])
                             if 'error' not in result:
                                 results.append({
                                     'text': row['text'][:100] + "...",
@@ -448,28 +740,31 @@ def main():
                                     'confidence': 0,
                                     'processing_time': 0
                                 })
                         # Display results
                         results_df = pd.DataFrame(results)
                         # Summary statistics
                         col1, col2, col3, col4 = st.columns(4)
                         with col1:
                             st.metric("Total Processed", len(results_df))
                         with col2:
-                            fake_count = len(results_df[results_df['prediction'] == 'Fake'])
                             st.metric("Fake News", fake_count)
                         with col3:
-                            real_count = len(results_df[results_df['prediction'] == 'Real'])
                             st.metric("Real News", real_count)
                         with col4:
                             avg_confidence = results_df['confidence'].mean()
-                            st.metric("Avg Confidence", f"{avg_confidence:.2%}")
                         # Results visualization
                         if len(results_df) > 0:
                             fig = px.histogram(
@@ -479,268 +774,183 @@ def main():
                                 title="Batch Analysis Results"
                             )
                             st.plotly_chart(fig, use_container_width=True)
                         # Download results
                         csv_buffer = io.StringIO()
                         results_df.to_csv(csv_buffer, index=False)
                         st.download_button(
                             label="📥 Download Results",
                             data=csv_buffer.getvalue(),
                             file_name=f"batch_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
                             mime="text/csv"
                         )
             except Exception as e:
                 st.error(f"Error processing file: {e}")
     # Tab 3: Analytics
     with tab3:
         st.header("System Analytics")
         # Prediction history
         if st.session_state.prediction_history:
             st.subheader("Recent Predictions")
             # History chart
             fig_history = create_prediction_history_chart()
             if fig_history:
                 st.plotly_chart(fig_history, use_container_width=True)
             # History table
             history_df = pd.DataFrame(st.session_state.prediction_history)
             st.dataframe(history_df.tail(20), use_container_width=True)
         else:
-            st.info("No prediction history available. Make some predictions to see analytics.")
         # System metrics
         st.subheader("System Metrics")
         # Load various log files for analytics
         try:
             # API health check
             if app_manager.api_available:
-                response = app_manager.session.get(f"{app_manager.config['api_url']}/metrics")
                 if response.status_code == 200:
                     metrics = response.json()
                     col1, col2, col3, col4 = st.columns(4)
                     with col1:
-                        st.metric("Total API Requests", metrics.get('total_requests', 0))
                     with col2:
-                        st.metric("Unique Clients", metrics.get('unique_clients', 0))
                     with col3:
-                        st.metric("Model Version", metrics.get('model_version', 'Unknown'))
                     with col4:
                         status = metrics.get('model_health', 'unknown')
                         st.metric("Model Status", status)
         except Exception as e:
             st.warning(f"Could not load API metrics: {e}")
     # Tab 4: Model Training
     with tab4:
-        st.header("Custom Model Training")
-        st.info("Upload your own dataset to retrain the model with custom data.")
         # File upload for training
         training_file = st.file_uploader(
             "Upload training dataset (CSV):",
             type=['csv'],
             help="CSV file should contain 'text' and 'label' columns (label: 0=Real, 1=Fake)"
         )
         if training_file:
             try:
                 df_train = pd.read_csv(training_file)
                 required_columns = ['text', 'label']
-                missing_columns = [col for col in required_columns if col not in df_train.columns]
                 if missing_columns:
                     st.error(f"Missing required columns: {missing_columns}")
                 else:
-                    st.success(f"Training file loaded: {len(df_train)} samples")
-                    # Data validation
-                    label_counts = df_train['label'].value_counts()
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.subheader("Dataset Overview")
-                        st.write(f"Total samples: {len(df_train)}")
-                        st.write(f"Real news (0): {label_counts.get(0, 0)}")
-                        st.write(f"Fake news (1): {label_counts.get(1, 0)}")
-                    with col2:
-                        # Label distribution chart
-                        fig_labels = px.pie(
-                            values=label_counts.values,
-                            names=['Real', 'Fake'],
-                            title="Label Distribution"
-                        )
-                        st.plotly_chart(fig_labels)
-                    # Training options
-                    st.subheader("Training Configuration")
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        test_size = st.slider("Test Size", 0.1, 0.4, 0.2, 0.05)
-                        max_features = st.number_input("Max Features", 1000, 20000, 10000, 1000)
-                    with col2:
-                        cross_validation = st.checkbox("Cross Validation", value=True)
-                        hyperparameter_tuning = st.checkbox("Hyperparameter Tuning", value=False)
-                    # Start training
-                    if st.button("🏃‍♂️ Start Training", type="primary"):
-                        # Save training data
-                        app_manager.paths['custom_data'].parent.mkdir(parents=True, exist_ok=True)
-                        df_train.to_csv(app_manager.paths['custom_data'], index=False)
-                        # Progress simulation
-                        progress_bar = st.progress(0)
-                        status_text = st.empty()
-                        training_steps = [
-                            "Preprocessing data...",
-                            "Splitting dataset...",
-                            "Training model...",
-                            "Evaluating performance...",
-                            "Saving model..."
-                        ]
-                        for i, step in enumerate(training_steps):
-                            status_text.text(step)
-                            progress_bar.progress((i + 1) / len(training_steps))
-                            time.sleep(2)  # Simulate processing time
-                        # Run actual training
-                        try:
-                            result = subprocess.run(
-                                [sys.executable, "model/train.py",
-                                 "--data_path", str(app_manager.paths['custom_data'])],
-                                capture_output=True,
-                                text=True,
-                                timeout=300
-                            )
-                            if result.returncode == 0:
-                                st.success("🎉 Training completed successfully!")
-                                # Try to extract accuracy from output
-                                try:
-                                    output_lines = result.stdout.strip().split('\n')
-                                    for line in output_lines:
-                                        if 'accuracy' in line.lower():
-                                            st.info(f"Model performance: {line}")
-                                except:
-                                    pass
-                                # Reload API model
-                                if app_manager.api_available:
-                                    try:
-                                        reload_response = app_manager.session.post(
-                                            f"{app_manager.config['api_url']}/model/reload"
-                                        )
-                                        if reload_response.status_code == 200:
-                                            st.success("✅ Model reloaded in API successfully!")
-                                    except:
-                                        st.warning("⚠️ Model trained but API reload failed")
-                            else:
-                                st.error(f"Training failed: {result.stderr}")
-                        except subprocess.TimeoutExpired:
-                            st.error("Training timed out. Please try with a smaller dataset.")
-                        except Exception as e:
-                            st.error(f"Training error: {e}")
             except Exception as e:
                 st.error(f"Error loading training file: {e}")
     # Tab 5: System Status
     with tab5:
         render_system_status()
 def render_system_status():
     """Render system status tab"""
     st.header("System Status & Monitoring")
     # Auto-refresh toggle
     col1, col2 = st.columns([1, 4])
     with col1:
-        st.session_state.auto_refresh = st.checkbox("Auto Refresh", value=st.session_state.auto_refresh)
     with col2:
         if st.button("🔄 Refresh Now"):
             st.session_state.last_refresh = datetime.now()
             st.rerun()
     # System health overview
     st.subheader("🏥 System Health")
     if app_manager.api_available:
         try:
-            health_response = app_manager.session.get(f"{app_manager.config['api_url']}/health")
             if health_response.status_code == 200:
                 health_data = health_response.json()
                 # Overall status
                 overall_status = health_data.get('status', 'unknown')
                 if overall_status == 'healthy':
                     st.success("🟢 System Status: Healthy")
                 else:
                     st.error("🔴 System Status: Unhealthy")
                 # Detailed health metrics
                 col1, col2, col3 = st.columns(3)
                 with col1:
                     st.subheader("🤖 Model Health")
                     model_health = health_data.get('model_health', {})
                     for key, value in model_health.items():
                         if key != 'test_prediction':
-                            st.write(f"**{key.replace('_', ' ').title()}:** {value}")
                 with col2:
                     st.subheader("💻 System Resources")
                     system_health = health_data.get('system_health', {})
                     for key, value in system_health.items():
                         if isinstance(value, (int, float)):
-                            st.metric(key.replace('_', ' ').title(), f"{value:.1f}%")
                 with col3:
                     st.subheader("🔗 API Health")
                     api_health = health_data.get('api_health', {})
                     for key, value in api_health.items():
-                        st.write(f"**{key.replace('_', ' ').title()}:** {value}")
         except Exception as e:
             st.error(f"Failed to get health status: {e}")
     else:
         st.error("🔴 API Service is not available")
     # Model information
     st.subheader("🎯 Model Information")
     metadata = load_json_file(app_manager.paths['metadata'], {})
     if metadata:
         col1, col2 = st.columns(2)
         with col1:
             for key in ['model_version', 'test_accuracy', 'test_f1', 'model_type']:
                 if key in metadata:
@@ -750,7 +960,7 @@ def render_system_status():
                         st.metric(display_key, f"{value:.4f}")
                     else:
                         st.metric(display_key, str(value))
         with col2:
             for key in ['train_size', 'timestamp', 'data_version']:
                 if key in metadata:
@@ -758,49 +968,52 @@ def render_system_status():
                     value = metadata[key]
                     if key == 'timestamp':
                         try:
-                            dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
                             value = dt.strftime('%Y-%m-%d %H:%M:%S')
                         except:
                             pass
                     st.write(f"**{display_key}:** {value}")
     else:
         st.warning("No model metadata available")
     # Recent activity
     st.subheader("📜 Recent Activity")
     activity_log = load_json_file(app_manager.paths['activity_log'], [])
     if activity_log:
-        recent_activities = activity_log[-10:] if len(activity_log) > 10 else activity_log
         for entry in reversed(recent_activities):
             timestamp = entry.get('timestamp', 'Unknown')
             event = entry.get('event', 'Unknown event')
             level = entry.get('level', 'INFO')
             if level == 'ERROR':
                 st.error(f"🔴 {timestamp} - {event}")
             elif level == 'WARNING':
                 st.warning(f"🟡 {timestamp} - {event}")
             else:
                 st.info(f"🔵 {timestamp} - {event}")
     else:
         st.info("No recent activity logs found")
     # File system status
     st.subheader("📁 File System Status")
     critical_files = [
-        ("/tmp/model.pkl", "Main Model"),
         ("/tmp/vectorizer.pkl", "Vectorizer"),
-        ("/tmp/data/combined_dataset.csv", "Training Dataset"),
-        ("/tmp/metadata.json", "Model Metadata")
     ]
     col1, col2 = st.columns(2)
     with col1:
         st.write("**Critical Files:**")
         for file_path, description in critical_files:
@@ -808,18 +1021,18 @@ def render_system_status():
                 st.success(f"✅ {description}")
             else:
                 st.error(f"❌ {description}")
     with col2:
         # Disk usage information
         try:
             import shutil
             total, used, free = shutil.disk_usage("/tmp")
             st.write("**Disk Usage (/tmp):**")
             st.write(f"Total: {total // (1024**3)} GB")
             st.write(f"Used: {used // (1024**3)} GB")
             st.write(f"Free: {free // (1024**3)} GB")
             usage_percent = (used / total) * 100
             if usage_percent > 90:
                 st.error(f"⚠️ Disk usage: {usage_percent:.1f}%")
@@ -827,34 +1040,90 @@ def render_system_status():
                 st.warning(f"⚠️ Disk usage: {usage_percent:.1f}%")
             else:
                 st.success(f"✅ Disk usage: {usage_percent:.1f}%")
         except Exception as e:
             st.error(f"Cannot check disk usage: {e}")
-    # Initialize system button
-    if st.button("🔧 Initialize System", help="Run system initialization if components are missing"):
-        with st.spinner("Running system initialization..."):
             try:
-                result = subprocess.run(
-                    [sys.executable, "/app/initialize_system.py"],
-                    capture_output=True,
-                    text=True,
-                    timeout=300
-                )
-                if result.returncode == 0:
-                    st.success("✅ System initialization completed successfully!")
-                    st.code(result.stdout)
-                    time.sleep(2)
-                    st.rerun()
-                else:
-                    st.error("❌ System initialization failed")
-                    st.code(result.stderr)
-            except subprocess.TimeoutExpired:
-                st.error("⏰ Initialization timed out")
             except Exception as e:
-                st.error(f"❌ Initialization error: {e}")
 # Auto-refresh logic
 if st.session_state.auto_refresh:
@@ -863,6 +1132,15 @@ if st.session_state.auto_refresh:
         st.session_state.last_refresh = datetime.now()
         st.rerun()
 # Run main application
 if __name__ == "__main__":
     main()

 import plotly.graph_objects as go
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Any
+import contextlib
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 # Add root to sys.path for imports
 sys.path.append(str(Path(__file__).resolve().parent.parent))
+# Try to import trainer directly for better progress tracking
+try:
+    from model.train import RobustModelTrainer, estimate_training_time
+    DIRECT_TRAINING_AVAILABLE = True
+except ImportError:
+    RobustModelTrainer = None
+    estimate_training_time = None
+    DIRECT_TRAINING_AVAILABLE = False
+    logger.warning("Direct training import failed, using subprocess fallback")
 class StreamlitAppManager:
     """Manages Streamlit application state and functionality"""
     def __init__(self):
         self.setup_config()
         self.setup_paths()
         self.setup_api_client()
         self.initialize_session_state()
     def setup_config(self):
         """Setup application configuration"""
         self.config = {
             'refresh_interval': 60,
             'max_batch_size': 10
         }
     def setup_paths(self):
         """Setup file paths"""
         self.paths = {
             'scheduler_log': Path("/tmp/logs/scheduler_execution.json"),
             'error_log': Path("/tmp/logs/scheduler_errors.json")
         }
     def setup_api_client(self):
         """Setup API client with error handling"""
         self.session = requests.Session()
         self.session.timeout = self.config['prediction_timeout']
         # Test API connection
         self.api_available = self.test_api_connection()
     def test_api_connection(self) -> bool:
         """Test API connection"""
         try:
+            response = self.session.get(
+                f"{self.config['api_url']}/health", timeout=5)
             return response.status_code == 200
         except:
             return False
     def initialize_session_state(self):
         """Initialize Streamlit session state"""
         if 'prediction_history' not in st.session_state:
             st.session_state.prediction_history = []
         if 'upload_history' not in st.session_state:
             st.session_state.upload_history = []
         if 'last_refresh' not in st.session_state:
             st.session_state.last_refresh = datetime.now()
         if 'auto_refresh' not in st.session_state:
             st.session_state.auto_refresh = False
 # Initialize app manager
 app_manager = StreamlitAppManager()
 </style>
 """, unsafe_allow_html=True)
 def load_json_file(file_path: Path, default: Any = None) -> Any:
     """Safely load JSON file with error handling"""
     try:
         logger.error(f"Failed to load {file_path}: {e}")
         return default or {}
 def save_prediction_to_history(text: str, prediction: str, confidence: float):
     """Save prediction to session history"""
     prediction_entry = {
         'confidence': confidence,
         'text_length': len(text)
     }
     st.session_state.prediction_history.append(prediction_entry)
     # Keep only last 50 predictions
     if len(st.session_state.prediction_history) > 50:
         st.session_state.prediction_history = st.session_state.prediction_history[-50:]
 def make_prediction_request(text: str) -> Dict[str, Any]:
     """Make prediction request to API"""
     try:
         if not app_manager.api_available:
             return {'error': 'API is not available'}
         response = app_manager.session.post(
             f"{app_manager.config['api_url']}/predict",
             json={"text": text},
             timeout=app_manager.config['prediction_timeout']
         )
         if response.status_code == 200:
             return response.json()
         else:
             return {'error': f'API Error: {response.status_code} - {response.text}'}
     except requests.exceptions.Timeout:
         return {'error': 'Request timed out. Please try again.'}
     except requests.exceptions.ConnectionError:
     except Exception as e:
         return {'error': f'Unexpected error: {str(e)}'}
 def validate_text_input(text: str) -> tuple[bool, str]:
     """Validate text input"""
     if not text or not text.strip():
         return False, "Please enter some text to analyze."
     if len(text) < 10:
         return False, "Text must be at least 10 characters long."
     if len(text) > app_manager.config['max_text_length']:
         return False, f"Text must be less than {app_manager.config['max_text_length']} characters."
     # Check for suspicious content
     suspicious_patterns = ['<script', 'javascript:', 'data:']
     if any(pattern in text.lower() for pattern in suspicious_patterns):
         return False, "Text contains suspicious content."
     return True, "Valid"
 def create_confidence_gauge(confidence: float, prediction: str):
     """Create confidence gauge visualization"""
     fig = go.Figure(go.Indicator(
+        mode="gauge+number+delta",
+        value=confidence * 100,
+        domain={'x': [0, 1], 'y': [0, 1]},
+        title={'text': f"Confidence: {prediction}"},
+        delta={'reference': 50},
+        gauge={
             'axis': {'range': [None, 100]},
             'bar': {'color': "red" if prediction == "Fake" else "green"},
             'steps': [
             }
         }
     ))
     fig.update_layout(height=300)
     return fig
 def create_prediction_history_chart():
     """Create prediction history visualization"""
     if not st.session_state.prediction_history:
         return None
     df = pd.DataFrame(st.session_state.prediction_history)
     df['timestamp'] = pd.to_datetime(df['timestamp'])
     df['confidence_percent'] = df['confidence'] * 100
     fig = px.scatter(
+        df,
+        x='timestamp',
         y='confidence_percent',
         color='prediction',
         size='text_length',
         title="Prediction History",
         labels={'confidence_percent': 'Confidence (%)', 'timestamp': 'Time'}
     )
     fig.update_layout(height=400)
     return fig
+def estimate_training_time_streamlit(dataset_size: int) -> dict:
+    """Estimate training time for Streamlit display"""
+    if estimate_training_time:
+        # Use the imported function
+        detailed_estimate = estimate_training_time(dataset_size, enable_tuning=True, cv_folds=3)
+        return {
+            'detailed': detailed_estimate,
+            'simple_range': f"{int(detailed_estimate['total_seconds']//60)}:{int(detailed_estimate['total_seconds']%60):02d}",
+            'category': 'small' if dataset_size < 100 else 'medium' if dataset_size < 1000 else 'large'
+        }
+    else:
+        # Fallback estimation
+        if dataset_size < 100:
+            return {'simple_range': '0:30-1:00', 'category': 'small'}
+        elif dataset_size < 1000:
+            return {'simple_range': '1:00-3:00', 'category': 'medium'}
+        else:
+            return {'simple_range': '3:00+', 'category': 'large'}
+def render_enhanced_training_section(df_train):
+    """Enhanced training section with progress tracking"""
+    st.header("Custom Model Training")
+    st.info("Upload your own dataset to retrain the model with custom data.")
+    # Show dataset info and time estimate
+    dataset_size = len(df_train)
+    time_estimate = estimate_training_time_streamlit(dataset_size)
+    # Training information display
+    st.markdown("### 📊 Training Information")
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("Dataset Size", f"{dataset_size} samples")
+    with col2:
+        if 'detailed' in time_estimate:
+            est_time = time_estimate['detailed']['total_formatted']
+        else:
+            est_time = time_estimate['simple_range']
+        st.metric("Estimated Time", est_time)
+    with col3:
+        st.metric("Category", time_estimate['category'].title())
+    with col4:
+        training_method = "Full Pipeline" if dataset_size >= 50 else "Simplified"
+        st.metric("Training Mode", training_method)
+    # Dataset preview
+    with st.expander("👀 Dataset Preview"):
+        st.dataframe(df_train.head(10))
+        # Dataset statistics
+        label_counts = df_train['label'].value_counts()
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Class Distribution")
+            st.write(f"Real news (0): {label_counts.get(0, 0)}")
+            st.write(f"Fake news (1): {label_counts.get(1, 0)}")
+        with col2:
+            # Label distribution chart
+            fig_labels = px.pie(
+                values=label_counts.values,
+                names=['Real', 'Fake'],
+                title="Label Distribution"
+            )
+            st.plotly_chart(fig_labels, use_container_width=True)
+    # Training configuration
+    with st.expander("⚙️ Training Configuration"):
+        col1, col2 = st.columns(2)
+        with col1:
+            if dataset_size < 20:
+                st.warning("⚠️ Very small dataset: Hyperparameter tuning will be skipped")
+                st.info("• Simple training only")
+                st.info("• Minimal cross-validation")
+            elif dataset_size < 50:
+                st.info("ℹ️ Small dataset: Limited hyperparameter tuning")
+                st.info("• Reduced parameter grids")
+                st.info("• 2-3 fold cross-validation")
+            else:
+                st.success("✅ Standard dataset: Full training pipeline")
+                st.info("• Complete hyperparameter tuning")
+                st.info("• 3-fold cross-validation")
+                st.info("• Model comparison")
+        with col2:
+            st.write("**Expected Features:**")
+            st.write(f"• TF-IDF vectorization")
+            st.write(f"• Feature selection")
+            st.write(f"• Logistic Regression")
+            if dataset_size >= 50:
+                st.write(f"• Random Forest comparison")
+            st.write(f"• Performance evaluation")
+    # Training button and execution
+    if st.button("🏃‍♂️ Start Training", type="primary", use_container_width=True):
+        # Save training data
+        app_manager.paths['custom_data'].parent.mkdir(parents=True, exist_ok=True)
+        df_train.to_csv(app_manager.paths['custom_data'], index=False)
+        st.markdown("---")
+        st.markdown("### 🔄 Training Progress")
+        # Progress containers
+        progress_col1, progress_col2 = st.columns([3, 1])
+        with progress_col1:
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+        with progress_col2:
+            time_display = st.empty()
+        # Start training
+        start_time = time.time()
+        if DIRECT_TRAINING_AVAILABLE:
+            # Method 1: Direct function call (shows progress in real-time)
+            status_text.text("Status: Initializing direct training...")
+            progress_bar.progress(5)
+            try:
+                # Create output capture
+                output_buffer = io.StringIO()
+                with st.spinner("Training model (direct method)..."):
+                    # Redirect stdout to capture progress
+                    with contextlib.redirect_stdout(output_buffer):
+                        trainer = RobustModelTrainer()
+                        success, message = trainer.train_model(
+                            data_path=str(app_manager.paths['custom_data'])
+                        )
+                elapsed_time = time.time() - start_time
+                time_display.text(f"Elapsed: {timedelta(seconds=int(elapsed_time))}")
+                # Show final progress
+                progress_bar.progress(100)
+                status_text.text("Status: Training completed!")
+                # Get captured output
+                captured_output = output_buffer.getvalue()
+                if success:
+                    st.success("🎉 **Training Completed Successfully!**")
+                    st.info(f"📊 **{message}**")
+                    # Show captured progress if available
+                    if captured_output:
+                        with st.expander("📈 Training Progress Details"):
+                            st.code(captured_output)
+                else:
+                    st.error(f"❌ **Training Failed:** {message}")
+                    if captured_output:
+                        with st.expander("🔍 Debug Output"):
+                            st.code(captured_output)
+            except Exception as e:
+                st.error(f"❌ **Training Error:** {str(e)}")
+        else:
+            # Method 2: Subprocess with progress simulation
+            status_text.text("Status: Starting subprocess training...")
+            progress_bar.progress(10)
+            try:
+                # Simulate progress during subprocess execution
+                progress_steps = [
+                    (20, "Loading and validating data..."),
+                    (40, "Creating preprocessing pipeline..."),
+                    (60, "Training models..."),
+                    (80, "Evaluating performance..."),
+                    (95, "Saving model artifacts...")
+                ]
+                # Start subprocess
+                process = subprocess.Popen(
+                    [sys.executable, "model/train.py", "--data_path", str(app_manager.paths['custom_data'])],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.STDOUT,
+                    universal_newlines=True
+                )
+                # Simulate progress while waiting
+                step_idx = 0
+                while process.poll() is None:
+                    elapsed = time.time() - start_time
+                    time_display.text(f"Elapsed: {timedelta(seconds=int(elapsed))}")
+                    # Update progress based on elapsed time
+                    if step_idx < len(progress_steps):
+                        expected_time = dataset_size * 0.1  # Rough estimate
+                        if elapsed > expected_time * (step_idx + 1) / len(progress_steps):
+                            progress, status = progress_steps[step_idx]
+                            progress_bar.progress(progress)
+                            status_text.text(f"Status: {status}")
+                            step_idx += 1
+                    time.sleep(1)
+                # Get final output
+                stdout, _ = process.communicate()
+                # Final progress
+                progress_bar.progress(100)
+                status_text.text("Status: Training completed!")
+                elapsed_time = time.time() - start_time
+                time_display.text(f"Completed: {timedelta(seconds=int(elapsed_time))}")
+                if process.returncode == 0:
+                    st.success("🎉 **Training Completed Successfully!**")
+                    # Extract performance info from output
+                    if stdout:
+                        lines = stdout.strip().split('\n')
+                        for line in lines[-10:]:  # Check last 10 lines
+                            if 'Best model:' in line:
+                                st.info(f"📊 **{line}**")
+                            elif any(keyword in line.lower() for keyword in ['accuracy', 'f1']):
+                                if line.strip():
+                                    st.info(f"📈 **Performance:** {line}")
+                    # Show full output in expander
+                    with st.expander("📋 Complete Training Log"):
+                        st.code(stdout)
+                else:
+                    st.error("❌ **Training Failed**")
+                    st.code(stdout)
+            except Exception as e:
+                st.error(f"❌ **Training Error:** {str(e)}")
+        # Try to reload model in API regardless of training method
+        if app_manager.api_available:
+            try:
+                with st.spinner("Reloading model in API..."):
+                    reload_response = app_manager.session.post(
+                        f"{app_manager.config['api_url']}/model/reload",
+                        timeout=30
+                    )
+                    if reload_response.status_code == 200:
+                        st.success("✅ **Model reloaded in API successfully!**")
+                    else:
+                        st.warning("⚠️ Model trained but API reload failed")
+            except Exception as e:
+                st.warning(f"⚠️ Model trained but API reload failed: {str(e)}")
+        # Training tips
+        st.markdown("---")
+        st.markdown("### 💡 Training Tips")
+        st.info("✓ **Model saved successfully** - You can now test predictions")
+        st.info("✓ **Try different datasets** to improve performance")
+        st.info("✓ **Larger datasets** (50+ samples) enable full hyperparameter tuning")
 # Main application
 def main():
     """Main Streamlit application"""
     # Header
+    st.markdown('<h1 class="main-header">📰 Fake News Detection System</h1>',
+                unsafe_allow_html=True)
     # API Status indicator
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
         if app_manager.api_available:
+            st.markdown(
+                '<div class="success-message">🟢 API Service: Online</div>', unsafe_allow_html=True)
         else:
+            st.markdown(
+                '<div class="error-message">🔴 API Service: Offline</div>', unsafe_allow_html=True)
     # Main content area
     tab1, tab2, tab3, tab4, tab5 = st.tabs([
+        "🔍 Prediction",
+        "📊 Batch Analysis",
+        "📈 Analytics",
+        "🎯 Model Training",
         "⚙️ System Status"
     ])
     # Tab 1: Individual Prediction
     with tab1:
         st.header("Single Text Analysis")
         # Input methods
         input_method = st.radio(
             "Choose input method:",
             ["Type Text", "Upload File"],
             horizontal=True
         )
         user_text = ""
         if input_method == "Type Text":
             user_text = st.text_area(
                 "Enter news article text:",
                 height=200,
                 placeholder="Paste or type the news article you want to analyze..."
             )
         else:  # Upload File
             uploaded_file = st.file_uploader(
                 "Upload text file:",
                 type=['txt', 'csv'],
                 help="Upload a text file containing the article to analyze"
             )
             if uploaded_file:
                 try:
                     if uploaded_file.type == "text/plain":
                     elif uploaded_file.type == "text/csv":
                         df = pd.read_csv(uploaded_file)
                         if 'text' in df.columns:
+                            user_text = df['text'].iloc[0] if len(
+                                df) > 0 else ""
                         else:
                             st.error("CSV file must contain a 'text' column")
+                    st.success(
+                        f"File uploaded successfully! ({len(user_text)} characters)")
                 except Exception as e:
                     st.error(f"Error reading file: {e}")
         # Prediction section
         col1, col2 = st.columns([3, 1])
         with col1:
             if st.button("🧠 Analyze Text", type="primary", use_container_width=True):
                 if user_text:
                     # Validate input
+                    is_valid, validation_message = validate_text_input(
+                        user_text)
                     if not is_valid:
                         st.error(validation_message)
                     else:
                         # Show progress
                         with st.spinner("Analyzing text..."):
                             result = make_prediction_request(user_text)
                         if 'error' in result:
                             st.error(f"❌ {result['error']}")
                         else:
                             # Display results
                             prediction = result['prediction']
                             confidence = result['confidence']
                             # Save to history
+                            save_prediction_to_history(
+                                user_text, prediction, confidence)
                             # Results display
                             col_result1, col_result2 = st.columns(2)
                             with col_result1:
                                 if prediction == "Fake":
                                     st.markdown(f"""
                                         <p>Confidence: {confidence:.2%}</p>
                                     </div>
                                     """, unsafe_allow_html=True)
                             with col_result2:
                                 # Confidence gauge
+                                fig_gauge = create_confidence_gauge(
+                                    confidence, prediction)
+                                st.plotly_chart(
+                                    fig_gauge, use_container_width=True)
                             # Additional information
                             with st.expander("📋 Analysis Details"):
                                 st.json({
                                 })
                 else:
                     st.warning("Please enter text to analyze.")
         with col2:
             if st.button("🔄 Clear Text", use_container_width=True):
                 st.rerun()
     # Tab 2: Batch Analysis
     with tab2:
         st.header("Batch Text Analysis")
         # File upload for batch processing
         batch_file = st.file_uploader(
             "Upload CSV file for batch analysis:",
             type=['csv'],
             help="CSV file should contain a 'text' column with articles to analyze"
         )
         if batch_file:
             try:
                 df = pd.read_csv(batch_file)
                 if 'text' not in df.columns:
                     st.error("CSV file must contain a 'text' column")
                 else:
                     st.success(f"File loaded: {len(df)} articles found")
                     # Preview data
                     st.subheader("Data Preview")
                     st.dataframe(df.head(10))
                     # Batch processing
                     if st.button("🚀 Process Batch", type="primary"):
                         if len(df) > app_manager.config['max_batch_size']:
+                            st.warning(
+                                f"Only processing first {app_manager.config['max_batch_size']} articles")
                             df = df.head(app_manager.config['max_batch_size'])
                         progress_bar = st.progress(0)
                         status_text = st.empty()
                         results = []
                         for i, row in df.iterrows():
+                            status_text.text(
+                                f"Processing article {i+1}/{len(df)}...")
                             progress_bar.progress((i + 1) / len(df))
                             result = make_prediction_request(row['text'])
                             if 'error' not in result:
                                 results.append({
                                     'text': row['text'][:100] + "...",
                                     'confidence': 0,
                                     'processing_time': 0
                                 })
                         # Display results
                         results_df = pd.DataFrame(results)
                         # Summary statistics
                         col1, col2, col3, col4 = st.columns(4)
                         with col1:
                             st.metric("Total Processed", len(results_df))
                         with col2:
+                            fake_count = len(
+                                results_df[results_df['prediction'] == 'Fake'])
                             st.metric("Fake News", fake_count)
                         with col3:
+                            real_count = len(
+                                results_df[results_df['prediction'] == 'Real'])
                             st.metric("Real News", real_count)
                         with col4:
                             avg_confidence = results_df['confidence'].mean()
+                            st.metric("Avg Confidence",
+                                      f"{avg_confidence:.2%}")
                         # Results visualization
                         if len(results_df) > 0:
                             fig = px.histogram(
                                 title="Batch Analysis Results"
                             )
                             st.plotly_chart(fig, use_container_width=True)
                         # Download results
                         csv_buffer = io.StringIO()
                         results_df.to_csv(csv_buffer, index=False)
                         st.download_button(
                             label="📥 Download Results",
                             data=csv_buffer.getvalue(),
                             file_name=f"batch_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
                             mime="text/csv"
                         )
             except Exception as e:
                 st.error(f"Error processing file: {e}")
     # Tab 3: Analytics
     with tab3:
         st.header("System Analytics")
         # Prediction history
         if st.session_state.prediction_history:
             st.subheader("Recent Predictions")
             # History chart
             fig_history = create_prediction_history_chart()
             if fig_history:
                 st.plotly_chart(fig_history, use_container_width=True)
             # History table
             history_df = pd.DataFrame(st.session_state.prediction_history)
             st.dataframe(history_df.tail(20), use_container_width=True)
         else:
+            st.info(
+                "No prediction history available. Make some predictions to see analytics.")
         # System metrics
         st.subheader("System Metrics")
         # Load various log files for analytics
         try:
             # API health check
             if app_manager.api_available:
+                response = app_manager.session.get(
+                    f"{app_manager.config['api_url']}/metrics")
                 if response.status_code == 200:
                     metrics = response.json()
                     col1, col2, col3, col4 = st.columns(4)
                     with col1:
+                        st.metric("Total API Requests",
+                                  metrics.get('total_requests', 0))
                     with col2:
+                        st.metric("Unique Clients", metrics.get(
+                            'unique_clients', 0))
                     with col3:
+                        st.metric("Model Version", metrics.get(
+                            'model_version', 'Unknown'))
                     with col4:
                         status = metrics.get('model_health', 'unknown')
                         st.metric("Model Status", status)
         except Exception as e:
             st.warning(f"Could not load API metrics: {e}")
     # Tab 4: Model Training
     with tab4:
         # File upload for training
         training_file = st.file_uploader(
             "Upload training dataset (CSV):",
             type=['csv'],
             help="CSV file should contain 'text' and 'label' columns (label: 0=Real, 1=Fake)"
         )
         if training_file:
             try:
                 df_train = pd.read_csv(training_file)
                 required_columns = ['text', 'label']
+                missing_columns = [
+                    col for col in required_columns if col not in df_train.columns]
                 if missing_columns:
                     st.error(f"Missing required columns: {missing_columns}")
                 else:
+                    st.success(
+                        f"Training file loaded: {len(df_train)} samples")
+                    # Enhanced training section
+                    render_enhanced_training_section(df_train)
             except Exception as e:
                 st.error(f"Error loading training file: {e}")
     # Tab 5: System Status
     with tab5:
         render_system_status()
 def render_system_status():
     """Render system status tab"""
     st.header("System Status & Monitoring")
     # Auto-refresh toggle
     col1, col2 = st.columns([1, 4])
     with col1:
+        st.session_state.auto_refresh = st.checkbox(
+            "Auto Refresh", value=st.session_state.auto_refresh)
     with col2:
         if st.button("🔄 Refresh Now"):
             st.session_state.last_refresh = datetime.now()
             st.rerun()
     # System health overview
     st.subheader("🏥 System Health")
     if app_manager.api_available:
         try:
+            health_response = app_manager.session.get(
+                f"{app_manager.config['api_url']}/health")
             if health_response.status_code == 200:
                 health_data = health_response.json()
                 # Overall status
                 overall_status = health_data.get('status', 'unknown')
                 if overall_status == 'healthy':
                     st.success("🟢 System Status: Healthy")
                 else:
                     st.error("🔴 System Status: Unhealthy")
                 # Detailed health metrics
                 col1, col2, col3 = st.columns(3)
                 with col1:
                     st.subheader("🤖 Model Health")
                     model_health = health_data.get('model_health', {})
                     for key, value in model_health.items():
                         if key != 'test_prediction':
+                            st.write(
+                                f"**{key.replace('_', ' ').title()}:** {value}")
                 with col2:
                     st.subheader("💻 System Resources")
                     system_health = health_data.get('system_health', {})
                     for key, value in system_health.items():
                         if isinstance(value, (int, float)):
+                            st.metric(key.replace('_', ' ').title(),
+                                      f"{value:.1f}%")
                 with col3:
                     st.subheader("🔗 API Health")
                     api_health = health_data.get('api_health', {})
                     for key, value in api_health.items():
+                        st.write(
+                            f"**{key.replace('_', ' ').title()}:** {value}")
         except Exception as e:
             st.error(f"Failed to get health status: {e}")
     else:
         st.error("🔴 API Service is not available")
     # Model information
     st.subheader("🎯 Model Information")
     metadata = load_json_file(app_manager.paths['metadata'], {})
     if metadata:
         col1, col2 = st.columns(2)
         with col1:
             for key in ['model_version', 'test_accuracy', 'test_f1', 'model_type']:
                 if key in metadata:
                         st.metric(display_key, f"{value:.4f}")
                     else:
                         st.metric(display_key, str(value))
         with col2:
             for key in ['train_size', 'timestamp', 'data_version']:
                 if key in metadata:
                     value = metadata[key]
                     if key == 'timestamp':
                         try:
+                            dt = datetime.fromisoformat(
+                                value.replace('Z', '+00:00'))
                             value = dt.strftime('%Y-%m-%d %H:%M:%S')
                         except:
                             pass
                     st.write(f"**{display_key}:** {value}")
     else:
         st.warning("No model metadata available")
     # Recent activity
     st.subheader("📜 Recent Activity")
     activity_log = load_json_file(app_manager.paths['activity_log'], [])
     if activity_log:
+        recent_activities = activity_log[-10:] if len(
+            activity_log) > 10 else activity_log
         for entry in reversed(recent_activities):
             timestamp = entry.get('timestamp', 'Unknown')
             event = entry.get('event', 'Unknown event')
             level = entry.get('level', 'INFO')
             if level == 'ERROR':
                 st.error(f"🔴 {timestamp} - {event}")
             elif level == 'WARNING':
                 st.warning(f"🟡 {timestamp} - {event}")
             else:
                 st.info(f"🔵 {timestamp} - {event}")
     else:
         st.info("No recent activity logs found")
     # File system status
     st.subheader("📁 File System Status")
     critical_files = [
+        ("/tmp/pipeline.pkl", "Pipeline Model"),
+        ("/tmp/model.pkl", "Model Component"),
         ("/tmp/vectorizer.pkl", "Vectorizer"),
+        ("/tmp/metadata.json", "Model Metadata"),
+        ("/tmp/data/combined_dataset.csv", "Training Dataset")
     ]
     col1, col2 = st.columns(2)
     with col1:
         st.write("**Critical Files:**")
         for file_path, description in critical_files:
                 st.success(f"✅ {description}")
             else:
                 st.error(f"❌ {description}")
     with col2:
         # Disk usage information
         try:
             import shutil
             total, used, free = shutil.disk_usage("/tmp")
             st.write("**Disk Usage (/tmp):**")
             st.write(f"Total: {total // (1024**3)} GB")
             st.write(f"Used: {used // (1024**3)} GB")
             st.write(f"Free: {free // (1024**3)} GB")
             usage_percent = (used / total) * 100
             if usage_percent > 90:
                 st.error(f"⚠️ Disk usage: {usage_percent:.1f}%")
                 st.warning(f"⚠️ Disk usage: {usage_percent:.1f}%")
             else:
                 st.success(f"✅ Disk usage: {usage_percent:.1f}%")
         except Exception as e:
             st.error(f"Cannot check disk usage: {e}")
+    # System actions
+    st.subheader("🔧 System Actions")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        # Initialize system button
+        if st.button("🔧 Initialize System", help="Run system initialization if components are missing"):
+            with st.spinner("Running system initialization..."):
+                try:
+                    result = subprocess.run(
+                        [sys.executable, "/app/initialize_system.py"],
+                        capture_output=True,
+                        text=True,
+                        timeout=300
+                    )
+                    if result.returncode == 0:
+                        st.success(
+                            "✅ System initialization completed successfully!")
+                        with st.expander("📋 Initialization Output"):
+                            st.code(result.stdout)
+                        time.sleep(2)
+                        st.rerun()
+                    else:
+                        st.error("❌ System initialization failed")
+                        st.code(result.stderr)
+                except subprocess.TimeoutExpired:
+                    st.error("⏰ Initialization timed out")
+                except Exception as e:
+                    st.error(f"❌ Initialization error: {e}")
+    with col2:
+        # Reload API model
+        if st.button("🔄 Reload API Model", help="Reload the model in the API service"):
+            if app_manager.api_available:
+                try:
+                    with st.spinner("Reloading model in API..."):
+                        reload_response = app_manager.session.post(
+                            f"{app_manager.config['api_url']}/model/reload",
+                            timeout=30
+                        )
+                        if reload_response.status_code == 200:
+                            st.success("✅ Model reloaded successfully!")
+                            st.json(reload_response.json())
+                        else:
+                            st.error(f"❌ Model reload failed: {reload_response.status_code}")
+                except Exception as e:
+                    st.error(f"❌ Model reload error: {e}")
+            else:
+                st.error("❌ API service not available")
+    with col3:
+        # Clear cache
+        if st.button("🗑️ Clear Cache", help="Clear prediction history and temporary data"):
             try:
+                # Clear session state
+                st.session_state.prediction_history = []
+                st.session_state.upload_history = []
+                # Clear temporary files
+                temp_files = [
+                    "/tmp/custom_upload.csv",
+                    "/tmp/prediction_log.json"
+                ]
+                cleared_count = 0
+                for temp_file in temp_files:
+                    if Path(temp_file).exists():
+                        Path(temp_file).unlink()
+                        cleared_count += 1
+                st.success(f"✅ Cache cleared! Removed {cleared_count} temporary files")
+                time.sleep(1)
+                st.rerun()
             except Exception as e:
+                st.error(f"❌ Cache clear error: {e}")
 # Auto-refresh logic
 if st.session_state.auto_refresh:
         st.session_state.last_refresh = datetime.now()
         st.rerun()
+# Footer
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center; color: #666; padding: 20px;'>
+    <p>📰 <strong>Fake News Detection System</strong> | Advanced MLOps Pipeline</p>
+    <p>Built with Streamlit, FastAPI, and Scikit-learn | Production-ready with comprehensive monitoring</p>
+</div>
+""", unsafe_allow_html=True)
 # Run main application
 if __name__ == "__main__":
     main()