Spaces:

Ahmedik95316
/

Fake-News-Detection-with-MLOps

Running

App Files Files Community

Ahmedik95316 commited on Aug 20

Commit

8cf2942

1 Parent(s): 9d6a9cd

Update app/streamlit_app.py

Browse files

Cross Validation Implementation

Files changed (1) hide show

app/streamlit_app.py +574 -25

app/streamlit_app.py CHANGED Viewed

@@ -117,6 +117,48 @@ class StreamlitAppManager:
         if 'auto_refresh' not in st.session_state:
             st.session_state.auto_refresh = False
 # Initialize app manager
 app_manager = StreamlitAppManager()
@@ -244,7 +286,372 @@ def show_logs_section():
         else:
             st.warning(f"Log file not found: {log_path}")
 def save_prediction_to_history(text: str, prediction: str, confidence: float):
     """Save prediction to session history"""
     prediction_entry = {
@@ -357,6 +764,81 @@ def create_prediction_history_chart():
     fig.update_layout(height=400)
     return fig
 def render_environment_info():
     """Render environment information"""
@@ -628,62 +1110,129 @@ def main():
     # Tab 3: Analytics
     with tab3:
         st.header("System Analytics")
-        # Prediction history
         if st.session_state.prediction_history:
             st.subheader("Recent Predictions")
             # History chart
             fig_history = create_prediction_history_chart()
             if fig_history:
                 st.plotly_chart(fig_history, use_container_width=True)
             # History table
             history_df = pd.DataFrame(st.session_state.prediction_history)
             st.dataframe(history_df.tail(20), use_container_width=True)
         else:
             st.info(
                 "No prediction history available. Make some predictions to see analytics.")
-        # System metrics
         st.subheader("System Metrics")
         # Load various log files for analytics
         try:
-            # API health check
             if app_manager.api_available:
                 response = app_manager.session.get(
                     f"{app_manager.config['api_url']}/metrics")
                 if response.status_code == 200:
                     metrics = response.json()
                     col1, col2, col3, col4 = st.columns(4)
                     with col1:
                         st.metric("Total API Requests",
-                                  metrics.get('total_requests', 0))
                     with col2:
-                        st.metric("Unique Clients", metrics.get(
-                            'unique_clients', 0))
                     with col3:
-                        st.metric("Model Version", metrics.get(
-                            'model_version', 'Unknown'))
                     with col4:
-                        status = metrics.get('model_health', 'unknown')
                         st.metric("Model Status", status)
                     # Environment details
                     st.subheader("Environment Details")
-                    env_data = metrics.get('environment', path_manager.environment)
                     st.info(f"Running in: {env_data}")
                     # Available files
-                    datasets = metrics.get('available_datasets', {})
-                    models = metrics.get('available_models', {})
                     col1, col2 = st.columns(2)
                     with col1:
@@ -697,7 +1246,7 @@ def main():
                         for name, exists in models.items():
                             status = "✅" if exists else "❌"
                             st.write(f"{status} {name}")
         except Exception as e:
             st.warning(f"Could not load API metrics: {e}")

         if 'auto_refresh' not in st.session_state:
             st.session_state.auto_refresh = False
+    def get_cv_results_from_api(self):
+        """Get cross-validation results from API"""
+        try:
+            if not self.api_available:
+                return None
+            response = self.session.get(
+                f"{self.config['api_url']}/cv/results",
+                timeout=10
+            )
+            if response.status_code == 200:
+                return response.json()
+            elif response.status_code == 404:
+                return {'error': 'No CV results available'}
+            else:
+                return None
+        except Exception as e:
+            logger.warning(f"Could not fetch CV results: {e}")
+            return None
+    def get_model_comparison_from_api(self):
+        """Get model comparison results from API"""
+        try:
+            if not self.api_available:
+                return None
+            response = self.session.get(
+                f"{self.config['api_url']}/cv/comparison",
+                timeout=10
+            )
+            if response.status_code == 200:
+                return response.json()
+            elif response.status_code == 404:
+                return {'error': 'No comparison results available'}
+            else:
+                return None
+        except Exception as e:
+            logger.warning(f"Could not fetch model comparison: {e}")
+            return None
 # Initialize app manager
 app_manager = StreamlitAppManager()
         else:
             st.warning(f"Log file not found: {log_path}")
+# ADD STANDALONE FS HERE
+def render_cv_results_section(self):
+    """Render cross-validation results section"""
+    st.subheader("🎯 Cross-Validation Results")
+    cv_results = self.get_cv_results_from_api()
+    if cv_results is None:
+        st.warning("API not available - showing local CV results if available")
+        # Try to load local metadata
+        try:
+            from path_config import path_manager
+            metadata_path = path_manager.get_metadata_path()
+            if metadata_path.exists():
+                with open(metadata_path, 'r') as f:
+                    metadata = json.load(f)
+                    cv_results = {'cross_validation': metadata.get('cross_validation', {})}
+            else:
+                st.info("No local CV results found")
+                return
+        except Exception as e:
+            st.error(f"Could not load local CV results: {e}")
+            return
+    if cv_results and 'error' not in cv_results:
+        # Display model information
+        if 'model_version' in cv_results:
+            st.info(f"**Model Version:** {cv_results.get('model_version', 'Unknown')} | "
+                   f"**Type:** {cv_results.get('model_type', 'Unknown')} | "
+                   f"**Trained:** {cv_results.get('training_timestamp', 'Unknown')}")
+        cv_data = cv_results.get('cross_validation', {})
+        if cv_data:
+            # CV Methodology
+            methodology = cv_data.get('methodology', {})
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.metric("CV Folds", methodology.get('n_splits', 'Unknown'))
+            with col2:
+                st.metric("CV Type", methodology.get('cv_type', 'StratifiedKFold'))
+            with col3:
+                st.metric("Random State", methodology.get('random_state', 42))
+            # Performance Metrics Summary
+            st.subheader("📊 Performance Summary")
+            test_scores = cv_data.get('test_scores', {})
+            if test_scores:
+                metrics_cols = st.columns(len(test_scores))
+                for idx, (metric, scores) in enumerate(test_scores.items()):
+                    with metrics_cols[idx]:
+                        if isinstance(scores, dict):
+                            mean_val = scores.get('mean', 0)
+                            std_val = scores.get('std', 0)
+                            st.metric(
+                                f"{metric.upper()}",
+                                f"{mean_val:.4f}",
+                                delta=f"±{std_val:.4f}"
+                            )
+                # Detailed CV Scores Visualization
+                st.subheader("📈 Cross-Validation Scores by Metric")
+                # Create a comprehensive chart
+                chart_data = []
+                fold_results = cv_data.get('individual_fold_results', [])
+                if fold_results:
+                    for fold_result in fold_results:
+                        fold_num = fold_result.get('fold', 0)
+                        test_scores_fold = fold_result.get('test_scores', {})
+                        for metric, score in test_scores_fold.items():
+                            chart_data.append({
+                                'Fold': f"Fold {fold_num}",
+                                'Metric': metric.upper(),
+                                'Score': score,
+                                'Type': 'Test'
+                            })
+                        # Add train scores if available
+                        train_scores_fold = fold_result.get('train_scores', {})
+                        for metric, score in train_scores_fold.items():
+                            chart_data.append({
+                                'Fold': f"Fold {fold_num}",
+                                'Metric': metric.upper(),
+                                'Score': score,
+                                'Type': 'Train'
+                            })
+                if chart_data:
+                    df_cv = pd.DataFrame(chart_data)
+                    # Create separate charts for each metric
+                    for metric in df_cv['Metric'].unique():
+                        metric_data = df_cv[df_cv['Metric'] == metric]
+                        fig = px.bar(
+                            metric_data,
+                            x='Fold',
+                            y='Score',
+                            color='Type',
+                            title=f'{metric} Scores Across CV Folds',
+                            barmode='group'
+                        )
+                        fig.update_layout(height=400)
+                        st.plotly_chart(fig, use_container_width=True)
+                # Performance Indicators
+                st.subheader("🔍 Model Quality Indicators")
+                performance_indicators = cv_data.get('performance_indicators', {})
+                col1, col2 = st.columns(2)
+                with col1:
+                    overfitting_score = performance_indicators.get('overfitting_score', 'Unknown')
+                    if isinstance(overfitting_score, (int, float)):
+                        if overfitting_score < 0.05:
+                            st.success(f"**Overfitting Score:** {overfitting_score:.4f} (Low)")
+                        elif overfitting_score < 0.15:
+                            st.warning(f"**Overfitting Score:** {overfitting_score:.4f} (Moderate)")
+                        else:
+                            st.error(f"**Overfitting Score:** {overfitting_score:.4f} (High)")
+                    else:
+                        st.info(f"**Overfitting Score:** {overfitting_score}")
+                with col2:
+                    stability_score = performance_indicators.get('stability_score', 'Unknown')
+                    if isinstance(stability_score, (int, float)):
+                        if stability_score > 0.9:
+                            st.success(f"**Stability Score:** {stability_score:.4f} (High)")
+                        elif stability_score > 0.7:
+                            st.warning(f"**Stability Score:** {stability_score:.4f} (Moderate)")
+                        else:
+                            st.error(f"**Stability Score:** {stability_score:.4f} (Low)")
+                    else:
+                        st.info(f"**Stability Score:** {stability_score}")
+                # Statistical Validation Results
+                if 'statistical_validation' in cv_results:
+                    st.subheader("📈 Statistical Validation")
+                    stat_validation = cv_results['statistical_validation']
+                    for metric, validation_data in stat_validation.items():
+                        if isinstance(validation_data, dict):
+                            with st.expander(f"Statistical Tests - {metric.upper()}"):
+                                col1, col2 = st.columns(2)
+                                with col1:
+                                    st.write(f"**Improvement:** {validation_data.get('improvement', 0):.4f}")
+                                    st.write(f"**Effect Size:** {validation_data.get('effect_size', 0):.4f}")
+                                with col2:
+                                    sig_improvement = validation_data.get('significant_improvement', False)
+                                    if sig_improvement:
+                                        st.success("**Significant Improvement:** Yes")
+                                    else:
+                                        st.info("**Significant Improvement:** No")
+                                # Display test results
+                                tests = validation_data.get('tests', {})
+                                if tests:
+                                    st.write("**Statistical Test Results:**")
+                                    for test_name, test_result in tests.items():
+                                        if isinstance(test_result, dict):
+                                            p_value = test_result.get('p_value', 1.0)
+                                            significant = test_result.get('significant', False)
+                                            status = "✅ Significant" if significant else "❌ Not Significant"
+                                            st.write(f"- {test_name}: p-value = {p_value:.4f} ({status})")
+                # Promotion Validation
+                if 'promotion_validation' in cv_results:
+                    st.subheader("🚀 Model Promotion Validation")
+                    promotion_val = cv_results['promotion_validation']
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        confidence = promotion_val.get('decision_confidence', 'Unknown')
+                        if isinstance(confidence, (int, float)):
+                            st.metric("Decision Confidence", f"{confidence:.2%}")
+                        else:
+                            st.metric("Decision Confidence", str(confidence))
+                    with col2:
+                        st.write(f"**Promotion Reason:**")
+                        st.write(promotion_val.get('promotion_reason', 'Unknown'))
+                    with col3:
+                        st.write(f"**Comparison Method:**")
+                        st.write(promotion_val.get('comparison_method', 'Unknown'))
+                # Raw CV Data (expandable)
+                with st.expander("🔍 Detailed CV Data"):
+                    st.json(cv_data)
+            else:
+                st.info("No detailed CV test scores available")
+        else:
+            st.info("No cross-validation data available")
+    else:
+        error_msg = cv_results.get('error', 'Unknown error') if cv_results else 'No CV results available'
+        st.warning(f"Cross-validation results not available: {error_msg}")
+def render_model_comparison_section(self):
+    """Render model comparison results section"""
+    st.subheader("⚖️ Model Comparison Results")
+    comparison_results = self.get_model_comparison_from_api()
+    if comparison_results is None:
+        st.warning("API not available - comparison results not accessible")
+        return
+    if comparison_results and 'error' not in comparison_results:
+        # Comparison Summary
+        summary = comparison_results.get('summary', {})
+        models_compared = comparison_results.get('models_compared', {})
+        st.info(f"**Comparison:** {models_compared.get('model1_name', 'Model 1')} vs "
+                f"{models_compared.get('model2_name', 'Model 2')} | "
+                f"**Timestamp:** {comparison_results.get('comparison_timestamp', 'Unknown')}")
+        # Decision Summary
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            decision = summary.get('decision', False)
+            if decision:
+                st.success("**Decision:** Promote New Model")
+            else:
+                st.info("**Decision:** Keep Current Model")
+        with col2:
+            confidence = summary.get('confidence', 0)
+            st.metric("Decision Confidence", f"{confidence:.2%}")
+        with col3:
+            st.write("**Reason:**")
+            st.write(summary.get('reason', 'Unknown'))
+        # Performance Comparison
+        st.subheader("📊 Performance Comparison")
+        prod_performance = comparison_results.get('model_performance', {}).get('production_model', {})
+        cand_performance = comparison_results.get('model_performance', {}).get('candidate_model', {})
+        # Create comparison chart
+        if prod_performance.get('test_scores') and cand_performance.get('test_scores'):
+            comparison_data = []
+            prod_scores = prod_performance['test_scores']
+            cand_scores = cand_performance['test_scores']
+            for metric in set(prod_scores.keys()) & set(cand_scores.keys()):
+                prod_mean = prod_scores[metric].get('mean', 0)
+                cand_mean = cand_scores[metric].get('mean', 0)
+                comparison_data.extend([
+                    {'Model': 'Production', 'Metric': metric.upper(), 'Score': prod_mean},
+                    {'Model': 'Candidate', 'Metric': metric.upper(), 'Score': cand_mean}
+                ])
+            if comparison_data:
+                df_comparison = pd.DataFrame(comparison_data)
+                fig = px.bar(
+                    df_comparison,
+                    x='Metric',
+                    y='Score',
+                    color='Model',
+                    title='Model Performance Comparison',
+                    barmode='group'
+                )
+                fig.update_layout(height=400)
+                st.plotly_chart(fig, use_container_width=True)
+        # Detailed Metric Comparisons
+        st.subheader("🔍 Detailed Metric Analysis")
+        metric_comparisons = comparison_results.get('metric_comparisons', {})
+        if metric_comparisons:
+            for metric, comparison_data in metric_comparisons.items():
+                if isinstance(comparison_data, dict):
+                    with st.expander(f"{metric.upper()} Analysis"):
+                        col1, col2, col3 = st.columns(3)
+                        with col1:
+                            improvement = comparison_data.get('improvement', 0)
+                            rel_improvement = comparison_data.get('relative_improvement', 0)
+                            if improvement > 0:
+                                st.success(f"**Improvement:** +{improvement:.4f}")
+                                st.success(f"**Relative:** +{rel_improvement:.2f}%")
+                            else:
+                                st.info(f"**Improvement:** {improvement:.4f}")
+                                st.info(f"**Relative:** {rel_improvement:.2f}%")
+                        with col2:
+                            effect_size = comparison_data.get('effect_size', 0)
+                            if abs(effect_size) > 0.8:
+                                st.success(f"**Effect Size:** {effect_size:.4f} (Large)")
+                            elif abs(effect_size) > 0.5:
+                                st.warning(f"**Effect Size:** {effect_size:.4f} (Medium)")
+                            else:
+                                st.info(f"**Effect Size:** {effect_size:.4f} (Small)")
+                        with col3:
+                            sig_improvement = comparison_data.get('significant_improvement', False)
+                            practical_sig = comparison_data.get('practical_significance', False)
+                            if sig_improvement:
+                                st.success("**Statistical Significance:** Yes")
+                            else:
+                                st.info("**Statistical Significance:** No")
+                            if practical_sig:
+                                st.success("**Practical Significance:** Yes")
+                            else:
+                                st.info("**Practical Significance:** No")
+                        # Statistical test results
+                        tests = comparison_data.get('tests', {})
+                        if tests:
+                            st.write("**Statistical Tests:**")
+                            for test_name, test_result in tests.items():
+                                if isinstance(test_result, dict):
+                                    p_value = test_result.get('p_value', 1.0)
+                                    significant = test_result.get('significant', False)
+                                    status = "✅" if significant else "❌"
+                                    st.write(f"- {test_name}: p = {p_value:.4f} {status}")
+        # CV Methodology
+        cv_methodology = comparison_results.get('cv_methodology', {})
+        if cv_methodology:
+            st.subheader("🎯 Cross-Validation Methodology")
+            st.info(f"**CV Folds:** {cv_methodology.get('cv_folds', 'Unknown')} | "
+                   f"**Session ID:** {comparison_results.get('session_id', 'Unknown')}")
+        # Raw comparison data (expandable)
+        with st.expander("🔍 Raw Comparison Data"):
+            st.json(comparison_results)
+    else:
+        error_msg = comparison_results.get('error', 'Unknown error') if comparison_results else 'No comparison results available'
+        st.warning(f"Model comparison results not available: {error_msg}")
 def save_prediction_to_history(text: str, prediction: str, confidence: float):
     """Save prediction to session history"""
     prediction_entry = {
     fig.update_layout(height=400)
     return fig
+def create_cv_performance_chart(cv_results: dict) -> Optional[Any]:
+    """Create a comprehensive CV performance visualization"""
+    try:
+        if not cv_results or 'cross_validation' not in cv_results:
+            return None
+        cv_data = cv_results['cross_validation']
+        fold_results = cv_data.get('individual_fold_results', [])
+        if not fold_results:
+            return None
+        # Prepare data for visualization
+        chart_data = []
+        for fold_result in fold_results:
+            fold_num = fold_result.get('fold', 0)
+            test_scores = fold_result.get('test_scores', {})
+            train_scores = fold_result.get('train_scores', {})
+            for metric, score in test_scores.items():
+                chart_data.append({
+                    'Fold': fold_num,
+                    'Metric': metric.upper(),
+                    'Score': score,
+                    'Type': 'Test',
+                    'Fold_Label': f"Fold {fold_num}"
+                })
+            for metric, score in train_scores.items():
+                chart_data.append({
+                    'Fold': fold_num,
+                    'Metric': metric.upper(),
+                    'Score': score,
+                    'Type': 'Train',
+                    'Fold_Label': f"Fold {fold_num}"
+                })
+        if not chart_data:
+            return None
+        df_cv = pd.DataFrame(chart_data)
+        # Create faceted chart showing all metrics
+        fig = px.box(
+            df_cv[df_cv['Type'] == 'Test'],  # Focus on test scores
+            x='Metric',
+            y='Score',
+            title='Cross-Validation Performance Distribution',
+            points='all'
+        )
+        # Add mean lines
+        for metric in df_cv['Metric'].unique():
+            metric_data = df_cv[(df_cv['Metric'] == metric) & (df_cv['Type'] == 'Test')]
+            mean_score = metric_data['Score'].mean()
+            fig.add_hline(
+                y=mean_score,
+                line_dash="dash",
+                line_color="red",
+                annotation_text=f"Mean: {mean_score:.3f}"
+            )
+        fig.update_layout(
+            height=500,
+            showlegend=True
+        )
+        return fig
+    except Exception as e:
+        logger.error(f"Failed to create CV chart: {e}")
+        return None
 def render_environment_info():
     """Render environment information"""
     # Tab 3: Analytics
     with tab3:
         st.header("System Analytics")
+        # Add CV and Model Comparison sections
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            if st.button("🔄 Refresh CV Results", use_container_width=True):
+                st.rerun()
+        with col2:
+            show_detailed_cv = st.checkbox("Show Detailed CV Analysis", value=True)
+        if show_detailed_cv:
+            # Render cross-validation results
+            app_manager.render_cv_results_section()
+            # Add separator
+            st.divider()
+            # Render model comparison results
+            app_manager.render_model_comparison_section()
+            # Add separator
+            st.divider()
+        # Prediction history (existing content)
         if st.session_state.prediction_history:
             st.subheader("Recent Predictions")
             # History chart
             fig_history = create_prediction_history_chart()
             if fig_history:
                 st.plotly_chart(fig_history, use_container_width=True)
             # History table
             history_df = pd.DataFrame(st.session_state.prediction_history)
             st.dataframe(history_df.tail(20), use_container_width=True)
         else:
             st.info(
                 "No prediction history available. Make some predictions to see analytics.")
+        # System metrics (existing content with CV enhancement)
         st.subheader("System Metrics")
         # Load various log files for analytics
         try:
+            # API health check with CV information
             if app_manager.api_available:
                 response = app_manager.session.get(
                     f"{app_manager.config['api_url']}/metrics")
                 if response.status_code == 200:
                     metrics = response.json()
+                    # Basic metrics
+                    api_metrics = metrics.get('api_metrics', {})
+                    model_info = metrics.get('model_info', {})
+                    cv_summary = metrics.get('cross_validation_summary', {})
                     col1, col2, col3, col4 = st.columns(4)
                     with col1:
                         st.metric("Total API Requests",
+                                  api_metrics.get('total_requests', 0))
                     with col2:
+                        st.metric("Unique Clients",
+                                  api_metrics.get('unique_clients', 0))
                     with col3:
+                        st.metric("Model Version",
+                                  model_info.get('model_version', 'Unknown'))
                     with col4:
+                        status = model_info.get('model_health', 'unknown')
                         st.metric("Model Status", status)
+                    # Cross-validation summary metrics
+                    if cv_summary.get('cv_available', False):
+                        st.subheader("Cross-Validation Summary")
+                        cv_col1, cv_col2, cv_col3, cv_col4 = st.columns(4)
+                        with cv_col1:
+                            cv_folds = cv_summary.get('cv_folds', 'Unknown')
+                            st.metric("CV Folds", cv_folds)
+                        with cv_col2:
+                            cv_f1 = cv_summary.get('cv_f1_mean')
+                            cv_f1_std = cv_summary.get('cv_f1_std')
+                            if cv_f1 is not None and cv_f1_std is not None:
+                                st.metric("CV F1 Score", f"{cv_f1:.4f}", f"±{cv_f1_std:.4f}")
+                            else:
+                                st.metric("CV F1 Score", "N/A")
+                        with cv_col3:
+                            cv_acc = cv_summary.get('cv_accuracy_mean')
+                            cv_acc_std = cv_summary.get('cv_accuracy_std')
+                            if cv_acc is not None and cv_acc_std is not None:
+                                st.metric("CV Accuracy", f"{cv_acc:.4f}", f"±{cv_acc_std:.4f}")
+                            else:
+                                st.metric("CV Accuracy", "N/A")
+                        with cv_col4:
+                            overfitting = cv_summary.get('overfitting_score')
+                            if overfitting is not None:
+                                if overfitting < 0.05:
+                                    st.metric("Overfitting", f"{overfitting:.4f}", "Low", delta_color="normal")
+                                elif overfitting < 0.15:
+                                    st.metric("Overfitting", f"{overfitting:.4f}", "Moderate", delta_color="off")
+                                else:
+                                    st.metric("Overfitting", f"{overfitting:.4f}", "High", delta_color="inverse")
+                            else:
+                                st.metric("Overfitting", "N/A")
                     # Environment details
                     st.subheader("Environment Details")
+                    env_info = metrics.get('environment_info', {})
+                    env_data = env_info.get('environment', 'Unknown')
                     st.info(f"Running in: {env_data}")
                     # Available files
+                    datasets = env_info.get('available_datasets', {})
+                    models = env_info.get('available_models', {})
                     col1, col2 = st.columns(2)
                     with col1:
                         for name, exists in models.items():
                             status = "✅" if exists else "❌"
                             st.write(f"{status} {name}")
         except Exception as e:
             st.warning(f"Could not load API metrics: {e}")