Spaces:

Dhruv-18
/

neural-data-anlayst

Sleeping

File size: 29,915 Bytes
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any
import base64
from io import BytesIO

# Additional advanced features for Neural Data Analyst

class AdvancedFeatures:
    """Advanced features and utilities for the Neural Data Analyst"""
    
    def __init__(self, db_manager):
        self.db_manager = db_manager
        
    def render_advanced_analytics_dashboard(self, df: pd.DataFrame):
        """Render advanced analytics dashboard"""
        st.markdown("## 🔬 Advanced Analytics Dashboard")
        
        tabs = st.tabs([
            "📊 Interactive Plots", 
            "🎯 Smart Recommendations", 
            "📈 Trend Analysis",
            "🔍 Anomaly Detection",
            "📋 Report Generator"
        ])
        
        with tabs[0]:
            self.render_interactive_plots(df)
            
        with tabs[1]:
            self.render_smart_recommendations(df)
            
        with tabs[2]:
            self.render_trend_analysis(df)
            
        with tabs[3]:
            self.render_anomaly_detection(df)
            
        with tabs[4]:
            self.render_report_generator(df)
            
    def render_interactive_plots(self, df: pd.DataFrame):
        """Render interactive plotting interface"""
        st.markdown("### 📊 Interactive Plot Builder")
        
        col1, col2, col3 = st.columns(3)
        
        with col1:
            plot_type = st.selectbox(
                "Plot Type",
                ["Scatter", "Line", "Bar", "Histogram", "Box", "Violin", "Heatmap", "3D Scatter"]
            )
            
        with col2:
            x_column = st.selectbox("X-axis", df.columns)
            
        with col3:
            y_column = st.selectbox("Y-axis", df.columns)
            
        # Color and size options
        col1, col2 = st.columns(2)
        with col1:
            color_column = st.selectbox("Color by", ["None"] + list(df.columns))
        with col2:
            size_column = st.selectbox("Size by", ["None"] + list(df.select_dtypes(include=[np.number]).columns))
            
        # Generate plot based on selections
        if st.button("🎨 Generate Plot"):
            fig = self.create_dynamic_plot(df, plot_type, x_column, y_column, color_column, size_column)
            if fig:
                st.plotly_chart(fig, use_container_width=True)
                
        # Plot gallery
        with st.expander("🖼️ Quick Plot Gallery"):
            self.render_plot_gallery(df)
            
    def create_dynamic_plot(self, df: pd.DataFrame, plot_type: str, x_col: str, y_col: str, 
                           color_col: str = None, size_col: str = None):
        """Create dynamic plot based on user selections"""
        try:
            kwargs = {
                'data_frame': df,
                'x': x_col,
                'title': f'{plot_type} Plot: {x_col} vs {y_col}'
            }
            
            if y_col and y_col != x_col:
                kwargs['y'] = y_col
                
            if color_col and color_col != "None":
                kwargs['color'] = color_col
                
            if size_col and size_col != "None" and plot_type in ["Scatter", "3D Scatter"]:
                kwargs['size'] = size_col
                
            if plot_type == "Scatter":
                fig = px.scatter(**kwargs)
            elif plot_type == "Line":
                fig = px.line(**kwargs)
            elif plot_type == "Bar":
                fig = px.bar(**kwargs)
            elif plot_type == "Histogram":
                fig = px.histogram(df, x=x_col, title=f'Histogram: {x_col}')
            elif plot_type == "Box":
                fig = px.box(**kwargs)
            elif plot_type == "Violin":
                fig = px.violin(**kwargs)
            elif plot_type == "3D Scatter":
                z_col = st.selectbox("Z-axis", df.select_dtypes(include=[np.number]).columns)
                kwargs['z'] = z_col
                fig = px.scatter_3d(**kwargs)
            elif plot_type == "Heatmap":
                numeric_df = df.select_dtypes(include=[np.number])
                corr_matrix = numeric_df.corr()
                fig = px.imshow(corr_matrix, text_auto=True, title="Correlation Heatmap")
            else:
                return None
                
            fig.update_layout(
                plot_bgcolor='rgba(0,0,0,0)',
                paper_bgcolor='rgba(0,0,0,0)',
                font=dict(color='white')
            )
            
            return fig
            
        except Exception as e:
            st.error(f"Error creating plot: {str(e)}")
            return None
            
    def render_plot_gallery(self, df: pd.DataFrame):
        """Render quick plot gallery"""
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        
        if len(numeric_cols) >= 2:
            col1, col2 = st.columns(2)
            
            with col1:
                # Quick correlation plot
                fig = px.scatter(df, x=numeric_cols[0], y=numeric_cols[1], 
                               title="Quick Correlation View")
                fig.update_layout(height=300)
                st.plotly_chart(fig, use_container_width=True)
                
            with col2:
                # Quick distribution plot
                fig = px.histogram(df, x=numeric_cols[0], title="Quick Distribution")
                fig.update_layout(height=300)
                st.plotly_chart(fig, use_container_width=True)
                
    def render_smart_recommendations(self, df: pd.DataFrame):
        """Render smart analysis recommendations"""
        st.markdown("### 🎯 Smart Analysis Recommendations")
        
        recommendations = self.generate_analysis_recommendations(df)
        
        for i, rec in enumerate(recommendations):
            with st.expander(f"💡 {rec['title']}", expanded=i == 0):
                st.markdown(f"**Recommendation:** {rec['description']}")
                st.markdown(f"**Rationale:** {rec['rationale']}")
                
                if st.button(f"Apply Recommendation", key=f"apply_rec_{i}"):
                    self.apply_recommendation(df, rec)
                    
    def generate_analysis_recommendations(self, df: pd.DataFrame) -> List[Dict[str, str]]:
        """Generate smart analysis recommendations"""
        recommendations = []
        
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        categorical_cols = df.select_dtypes(include=['object', 'category']).columns
        
        # Missing data recommendation
        missing_data = df.isnull().sum()
        high_missing = missing_data[missing_data > len(df) * 0.1]
        
        if len(high_missing) > 0:
            recommendations.append({
                'title': 'Missing Data Analysis',
                'description': f'Analyze missing data patterns in {len(high_missing)} columns with >10% missing values',
                'rationale': 'Understanding missing data patterns can reveal data collection issues or systematic biases',
                'action': 'missing_analysis'
            })
            
        # Correlation analysis recommendation
        if len(numeric_cols) > 2:
            recommendations.append({
                'title': 'Correlation Deep Dive',
                'description': 'Perform comprehensive correlation analysis with feature selection recommendations',
                'rationale': 'Identifying highly correlated features can improve model performance and interpretability',
                'action': 'correlation_analysis'
            })
            
        # Outlier detection recommendation
        if len(numeric_cols) > 0:
            recommendations.append({
                'title': 'Outlier Detection & Treatment',
                'description': 'Identify and analyze outliers using multiple statistical methods',
                'rationale': 'Outliers can significantly impact analysis results and model performance',
                'action': 'outlier_analysis'
            })
            
        # Segmentation recommendation
        if len(categorical_cols) > 0 and len(numeric_cols) > 0:
            recommendations.append({
                'title': 'Customer/Data Segmentation',
                'description': 'Perform clustering analysis to identify natural data segments',
                'rationale': 'Segmentation can reveal hidden patterns and improve targeted strategies',
                'action': 'segmentation_analysis'
            })
            
        # Time series recommendation
        date_cols = df.select_dtypes(include=['datetime64']).columns
        if len(date_cols) > 0:
            recommendations.append({
                'title': 'Time Series Analysis',
                'description': 'Analyze temporal patterns and trends in your data',
                'rationale': 'Time-based analysis can reveal seasonality, trends, and forecasting opportunities',
                'action': 'time_series_analysis'
            })
            
        return recommendations
        
    def apply_recommendation(self, df: pd.DataFrame, recommendation: Dict[str, str]):
        """Apply a smart recommendation"""
        action = recommendation.get('action')
        
        if action == 'missing_analysis':
            self.perform_missing_analysis(df)
        elif action == 'correlation_analysis':
            self.perform_correlation_analysis(df)
        elif action == 'outlier_analysis':
            self.perform_outlier_analysis(df)
        elif action == 'segmentation_analysis':
            self.perform_segmentation_analysis(df)
        elif action == 'time_series_analysis':
            self.perform_time_series_analysis(df)
            
    def perform_missing_analysis(self, df: pd.DataFrame):
        """Perform detailed missing data analysis"""
        st.markdown("#### 🔍 Missing Data Analysis Results")
        
        missing_data = df.isnull().sum()
        missing_percent = (missing_data / len(df)) * 100
        
        missing_df = pd.DataFrame({
            'Column': missing_data.index,
            'Missing_Count': missing_data.values,
            'Missing_Percentage': missing_percent.values
        })
        
        missing_df = missing_df[missing_df['Missing_Count'] > 0].sort_values('Missing_Percentage', ascending=False)
        
        if len(missing_df) > 0:
            fig = px.bar(missing_df, x='Column', y='Missing_Percentage',
                        title='Missing Data by Column (%)')
            fig.update_layout(height=400)
            st.plotly_chart(fig, use_container_width=True)
            
            st.dataframe(missing_df, use_container_width=True)
        else:
            st.success("✅ No missing data found in the dataset!")
            
    def perform_correlation_analysis(self, df: pd.DataFrame):
        """Perform detailed correlation analysis"""
        st.markdown("#### 🔗 Advanced Correlation Analysis")
        
        numeric_df = df.select_dtypes(include=[np.number])
        
        if len(numeric_df.columns) > 1:
            corr_matrix = numeric_df.corr()
            
            # Hierarchical clustering of correlations
            from scipy.cluster.hierarchy import linkage, dendrogram
            from scipy.spatial.distance import squareform
            
            distance_matrix = 1 - np.abs(corr_matrix)
            condensed_distances = squareform(distance_matrix, checks=False)
            linkage_matrix = linkage(condensed_distances, method='average')
            
            fig = go.Figure()
            dendro = dendrogram(linkage_matrix, labels=corr_matrix.columns, no_plot=True)
            
            # Create dendrogram plot
            for i in range(len(dendro['icoord'])):
                x = dendro['icoord'][i]
                y = dendro['dcoord'][i]
                fig.add_trace(go.Scatter(x=x, y=y, mode='lines', 
                                       line=dict(color='gold', width=2),
                                       showlegend=False))
                                       
            fig.update_layout(
                title="Feature Clustering Dendrogram",
                xaxis_title="Features",
                yaxis_title="Distance",
                height=400
            )
            
            st.plotly_chart(fig, use_container_width=True)
            
    def render_trend_analysis(self, df: pd.DataFrame):
        """Render trend analysis interface"""
        st.markdown("### 📈 Trend Analysis")
        
        date_cols = df.select_dtypes(include=['datetime64']).columns
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        
        if len(date_cols) == 0:
            st.warning("No datetime columns found. Try converting date columns to datetime format.")
            
            # Offer to convert columns
            potential_date_cols = [col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()]
            if potential_date_cols:
                date_col = st.selectbox("Select date column to convert:", potential_date_cols)
                if st.button("Convert to DateTime"):
                    try:
                        df[date_col] = pd.to_datetime(df[date_col])
                        st.success(f"Converted {date_col} to datetime!")
                        st.experimental_rerun()
                    except Exception as e:
                        st.error(f"Conversion failed: {str(e)}")
            return
            
        col1, col2 = st.columns(2)
        with col1:
            date_col = st.selectbox("Date Column", date_cols)
        with col2:
            value_col = st.selectbox("Value Column", numeric_cols)
            
        if st.button("🔍 Analyze Trends"):
            self.perform_trend_analysis(df, date_col, value_col)
            
    def perform_trend_analysis(self, df: pd.DataFrame, date_col: str, value_col: str):
        """Perform trend analysis"""
        st.markdown("#### 📊 Trend Analysis Results")
        
        # Time series plot
        fig = px.line(df.sort_values(date_col), x=date_col, y=value_col,
                     title=f'{value_col} Over Time')
        fig.update_layout(height=400)
        st.plotly_chart(fig, use_container_width=True)
        
        # Rolling statistics
        df_sorted = df.sort_values(date_col).copy()
        df_sorted['7_day_avg'] = df_sorted[value_col].rolling(window=7, min_periods=1).mean()
        df_sorted['30_day_avg'] = df_sorted[value_col].rolling(window=30, min_periods=1).mean()
        
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df_sorted[date_col], y=df_sorted[value_col], 
                               name='Original', mode='lines'))
        fig.add_trace(go.Scatter(x=df_sorted[date_col], y=df_sorted['7_day_avg'], 
                               name='7-Day Average', mode='lines'))
        fig.add_trace(go.Scatter(x=df_sorted[date_col], y=df_sorted['30_day_avg'], 
                               name='30-Day Average', mode='lines'))
        
        fig.update_layout(title="Trend with Moving Averages", height=400)
        st.plotly_chart(fig, use_container_width=True)
        
    def render_anomaly_detection(self, df: pd.DataFrame):
        """Render anomaly detection interface"""
        st.markdown("### 🔍 Anomaly Detection")
        
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        
        if len(numeric_cols) == 0:
            st.warning("No numeric columns found for anomaly detection.")
            return
            
        col1, col2 = st.columns(2)
        with col1:
            target_col = st.selectbox("Target Column", numeric_cols)
        with col2:
            method = st.selectbox("Detection Method", 
                                ["IQR", "Z-Score", "Isolation Forest", "Local Outlier Factor"])
            
        if st.button("🎯 Detect Anomalies"):
            self.perform_anomaly_detection(df, target_col, method)
            
    def perform_anomaly_detection(self, df: pd.DataFrame, target_col: str, method: str):
        """Perform anomaly detection"""
        st.markdown("#### 🎯 Anomaly Detection Results")
        
        data = df[target_col].dropna()
        anomalies = []
        
        if method == "IQR":
            Q1 = data.quantile(0.25)
            Q3 = data.quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            anomalies = df[(df[target_col] < lower_bound) | (df[target_col] > upper_bound)]
            
        elif method == "Z-Score":
            z_scores = np.abs((data - data.mean()) / data.std())
            anomalies = df[z_scores > 3]
            
        elif method == "Isolation Forest":
            from sklearn.ensemble import IsolationForest
            iso_forest = IsolationForest(contamination=0.1, random_state=42)
            outlier_labels = iso_forest.fit_predict(data.values.reshape(-1, 1))
            anomalies = df[outlier_labels == -1]
            
        elif method == "Local Outlier Factor":
            from sklearn.neighbors import LocalOutlierFactor
            lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
            outlier_labels = lof.fit_predict(data.values.reshape(-1, 1))
            anomalies = df[outlier_labels == -1]
            
        # Visualization
        fig = go.Figure()
        
        # Normal data points
        normal_data = df[~df.index.isin(anomalies.index)]
        fig.add_trace(go.Scatter(
            x=normal_data.index,
            y=normal_data[target_col],
            mode='markers',
            name='Normal',
            marker=dict(color='blue', size=6)
        ))
        
        # Anomalies
        fig.add_trace(go.Scatter(
            x=anomalies.index,
            y=anomalies[target_col],
            mode='markers',
            name='Anomalies',
            marker=dict(color='red', size=10, symbol='x')
        ))
        
        fig.update_layout(
            title=f'Anomaly Detection: {target_col} ({method})',
            xaxis_title='Index',
            yaxis_title=target_col,
            height=500
        )
        
        st.plotly_chart(fig, use_container_width=True)
        
        # Summary
        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric("Total Data Points", len(df))
        with col2:
            st.metric("Anomalies Found", len(anomalies))
        with col3:
            st.metric("Anomaly Rate", f"{len(anomalies)/len(df)*100:.2f}%")
            
        if len(anomalies) > 0:
            with st.expander("🔍 Anomaly Details"):
                st.dataframe(anomalies[[target_col]], use_container_width=True)
                
    def render_report_generator(self, df: pd.DataFrame):
        """Render automated report generator"""
        st.markdown("### 📋 Automated Report Generator")
        
        report_type = st.selectbox(
            "Report Type",
            ["Executive Summary", "Technical Analysis", "Data Quality Report", "Custom Report"]
        )
        
        col1, col2 = st.columns(2)
        with col1:
            include_charts = st.checkbox("Include Charts", value=True)
        with col2:
            include_recommendations = st.checkbox("Include Recommendations", value=True)
            
        if st.button("📄 Generate Report"):
            report_content = self.generate_report(df, report_type, include_charts, include_recommendations)
            
            # Display report
            st.markdown("#### 📊 Generated Report")
            st.markdown(report_content)
            
            # Download option
            self.create_download_link(report_content, f"neural_analyst_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
            
    def generate_report(self, df: pd.DataFrame, report_type: str, include_charts: bool, include_recommendations: bool) -> str:
        """Generate automated report"""
        report = f"""
# Neural Data Analyst Report
**Generated on:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
**Report Type:** {report_type}

## Dataset Overview
- **Total Rows:** {len(df):,}
- **Total Columns:** {len(df.columns)}
- **Memory Usage:** {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB
- **Missing Values:** {df.isnull().sum().sum():,} ({df.isnull().sum().sum() / df.size * 100:.1f}%)

## Column Information
"""
        
        # Column details
        for col in df.columns:
            dtype = str(df[col].dtype)
            null_count = df[col].isnull().sum()
            unique_count = df[col].nunique()
            
            report += f"- **{col}** ({dtype}): {null_count} missing, {unique_count} unique values\n"
            
        # Numeric summary
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) > 0:
            report += "\n## Numeric Summary\n"
            summary_stats = df[numeric_cols].describe()
            report += summary_stats.to_markdown()
            
        # Key insights
        if include_recommendations:
            report += "\n## Key Insights & Recommendations\n"
            recommendations = self.generate_analysis_recommendations(df)
            for i, rec in enumerate(recommendations[:5], 1):
                report += f"{i}. **{rec['title']}:** {rec['description']}\n"
                
        return report
        
    def create_download_link(self, content: str, filename: str):
        """Create download link for report"""
        b64 = base64.b64encode(content.encode()).decode()
        href = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📥 Download Report</a>'
        st.markdown(href, unsafe_allow_html=True)
        
    def render_data_comparison_tool(self):
        """Render data comparison tool for multiple datasets"""
        st.markdown("## ⚖️ Data Comparison Tool")
        
        st.markdown("Upload multiple datasets to compare their characteristics:")
        
        uploaded_files = st.file_uploader(
            "Choose CSV files for comparison",
            type=['csv'],
            accept_multiple_files=True
        )
        
        if len(uploaded_files) >= 2:
            datasets = {}
            
            for file in uploaded_files:
                try:
                    df = pd.read_csv(file)
                    datasets[file.name] = df
                except Exception as e:
                    st.error(f"Error loading {file.name}: {str(e)}")
                    
            if len(datasets) >= 2:
                self.perform_dataset_comparison(datasets)
                
    def perform_dataset_comparison(self, datasets: Dict[str, pd.DataFrame]):
        """Perform comparison between multiple datasets"""
        st.markdown("### 📊 Dataset Comparison Results")
        
        # Basic comparison table
        comparison_data = []
        
        for name, df in datasets.items():
            comparison_data.append({
                'Dataset': name,
                'Rows': len(df),
                'Columns': len(df.columns),
                'Numeric Columns': len(df.select_dtypes(include=[np.number]).columns),
                'Text Columns': len(df.select_dtypes(include=['object']).columns),
                'Missing Values': df.isnull().sum().sum(),
                'Memory (MB)': f"{df.memory_usage(deep=True).sum() / 1024**2:.2f}"
            })
            
        comparison_df = pd.DataFrame(comparison_data)
        st.dataframe(comparison_df, use_container_width=True)
        
        # Visual comparison
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=['Rows Comparison', 'Columns Comparison', 
                          'Missing Values', 'Memory Usage'],
            specs=[[{"type": "bar"}, {"type": "bar"}],
                   [{"type": "bar"}, {"type": "bar"}]]
        )
        
        names = list(datasets.keys())
        
        # Rows comparison
        fig.add_trace(
            go.Bar(x=names, y=[len(datasets[name]) for name in names], name="Rows"),
            row=1, col=1
        )
        
        # Columns comparison
        fig.add_trace(
            go.Bar(x=names, y=[len(datasets[name].columns) for name in names], name="Columns"),
            row=1, col=2
        )
        
        # Missing values comparison
        fig.add_trace(
            go.Bar(x=names, y=[datasets[name].isnull().sum().sum() for name in names], name="Missing"),
            row=2, col=1
        )
        
        # Memory usage comparison
        fig.add_trace(
            go.Bar(x=names, y=[datasets[name].memory_usage(deep=True).sum() / 1024**2 for name in names], name="Memory"),
            row=2, col=2
        )
        
        fig.update_layout(height=600, showlegend=False, title_text="Dataset Comparison Dashboard")
        st.plotly_chart(fig, use_container_width=True)
        
    def render_data_profiling_tool(self, df: pd.DataFrame):
        """Render comprehensive data profiling tool"""
        st.markdown("## 🔬 Data Profiling Tool")
        
        if st.button("🚀 Generate Complete Data Profile"):
            with st.spinner("Generating comprehensive data profile..."):
                profile = self.generate_data_profile(df)
                self.display_data_profile(profile)
                
    def generate_data_profile(self, df: pd.DataFrame) -> Dict[str, Any]:
        """Generate comprehensive data profile"""
        profile = {
            'overview': {},
            'column_profiles': {},
            'data_quality': {},
            'relationships': {},
            'recommendations': []
        }
        
        # Overview
        profile['overview'] = {
            'shape': df.shape,
            'memory_usage': df.memory_usage(deep=True).sum(),
            'dtypes': df.dtypes.value_counts().to_dict(),
            'missing_cells': df.isnull().sum().sum(),
            'duplicate_rows': df.duplicated().sum()
        }
        
        # Column profiles
        for col in df.columns:
            col_profile = {
                'dtype': str(df[col].dtype),
                'null_count': df[col].isnull().sum(),
                'null_percentage': df[col].isnull().sum() / len(df) * 100,
                'unique_count': df[col].nunique(),
                'unique_percentage': df[col].nunique() / len(df) * 100
            }
            
            if df[col].dtype in ['int64', 'float64']:
                col_profile.update({
                    'min': df[col].min(),
                    'max': df[col].max(),
                    'mean': df[col].mean(),
                    'std': df[col].std(),
                    'skewness': df[col].skew(),
                    'kurtosis': df[col].kurtosis()
                })
            else:
                col_profile.update({
                    'most_frequent': df[col].mode().iloc[0] if len(df[col].mode()) > 0 else None,
                    'most_frequent_count': df[col].value_counts().iloc[0] if len(df[col].value_counts()) > 0 else 0
                })
                
            profile['column_profiles'][col] = col_profile
            
        return profile
        
    def display_data_profile(self, profile: Dict[str, Any]):
        """Display data profile results"""
        st.markdown("### 📊 Complete Data Profile")
        
        # Overview metrics
        overview = profile['overview']
        
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("Rows", f"{overview['shape'][0]:,}")
        with col2:
            st.metric("Columns", overview['shape'][1])
        with col3:
            st.metric("Missing Cells", f"{overview['missing_cells']:,}")
        with col4:
            st.metric("Duplicates", f"{overview['duplicate_rows']:,}")
            
        # Column details table
        st.markdown("#### 📋 Column Details")
        
        col_data = []
        for col, details in profile['column_profiles'].items():
            col_data.append({
                'Column': col,
                'Type': details['dtype'],
                'Missing %': f"{details['null_percentage']:.1f}%",
                'Unique %': f"{details['unique_percentage']:.1f}%",
                'Details': f"Min: {details.get('min', 'N/A')}, Max: {details.get('max', 'N/A')}" if 'min' in details else f"Most Frequent: {details.get('most_frequent', 'N/A')}"
            })
            
        col_df = pd.DataFrame(col_data)
        st.dataframe(col_df, use_container_width=True)

# Usage in main app
def integrate_advanced_features():
    """Integration function for advanced features"""
    return """
    # Add this to your main.py file:
    
    from advanced_features import AdvancedFeatures
    
    # In your NeuralDataAnalyst class:
    def __init__(self):
        # ... existing code ...
        self.advanced_features = AdvancedFeatures(self.db_manager)
    
    # Add this after your existing data upload section:
    if st.session_state.uploaded_data is not None:
        if st.button("🔬 Advanced Analytics", key="advanced_analytics"):
            self.advanced_features.render_advanced_analytics_dashboard(st.session_state.uploaded_data)
            
        if st.button("🔍 Data Profiling", key="data_profiling"):
            self.advanced_features.render_data_profiling_tool(st.session_state.uploaded_data)
    
    # Add dataset comparison in sidebar:
    with st.sidebar:
        st.markdown("---")
        if st.button("⚖️ Compare Datasets"):
            self.advanced_features.render_data_comparison_tool()
    """