import numpy as np
import time
from typing import Dict, List, Any

class MockTrainer:
    def __init__(self):
        # More realistic base accuracy for DP-SGD on MNIST (should achieve 85-98% like research shows)
        self.base_accuracy = 0.98  # Non-private MNIST accuracy
        self.base_loss = 0.08      # Corresponding base loss
        
    def train(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """
        Simulate DP-SGD training with given parameters using realistic privacy trade-offs.
        
        Args:
            params: Dictionary containing training parameters:
                - clipping_norm: float
                - noise_multiplier: float
                - batch_size: int
                - learning_rate: float
                - epochs: int
                
        Returns:
            Dictionary containing training results and metrics
        """
        # Extract parameters
        clipping_norm = params['clipping_norm']
        noise_multiplier = params['noise_multiplier']
        batch_size = params['batch_size']
        learning_rate = params['learning_rate']
        epochs = params['epochs']
        
        # Calculate realistic privacy impact on performance
        privacy_factor = self._calculate_realistic_privacy_factor(clipping_norm, noise_multiplier, batch_size, epochs)
        
        # Generate epoch-wise data
        epochs_data = self._generate_epoch_data(epochs, privacy_factor)
        
        # Generate iteration-wise data (mock version for consistency)
        iterations_data = self._generate_iteration_data(epochs, privacy_factor, batch_size)
        
        # Calculate final metrics (must be consistent with epoch data)
        final_metrics = self._calculate_final_metrics(epochs_data, privacy_factor)
        
        # Generate recommendations
        recommendations = self._generate_recommendations(params, final_metrics)
        
        # Generate gradient information
        gradient_info = {
            'before_clipping': self.generate_gradient_norms(clipping_norm),
            'after_clipping': self.generate_clipped_gradients(clipping_norm)
        }
        
        # Calculate realistic privacy budget
        privacy_budget = self._calculate_mock_privacy_budget(params)
        
        return {
            'epochs_data': epochs_data,
            'iterations_data': iterations_data,
            'final_metrics': final_metrics,
            'recommendations': recommendations,
            'gradient_info': gradient_info,
            'privacy_budget': privacy_budget
        }
    
    def _calculate_mock_privacy_budget(self, params: Dict[str, Any]) -> float:
        """Calculate a realistic mock privacy budget based on DP-SGD theory."""
        noise_multiplier = params['noise_multiplier']
        epochs = params['epochs']
        batch_size = params['batch_size']
        
        # More realistic calculation based on DP-SGD research
        q = batch_size / 60000  # Sampling rate for MNIST
        steps = epochs * (60000 // batch_size)
        
        # Simplified but more accurate RDP calculation
        # Based on research: ε ≈ q*sqrt(steps*log(1/δ)) / σ for large σ
        import math
        delta = 1e-5
        epsilon = (q * math.sqrt(steps * math.log(1/delta))) / noise_multiplier
        
        # Add some realistic variation
        epsilon *= (1 + np.random.normal(0, 0.1))
        
        return max(0.1, min(50.0, epsilon))

    def _calculate_realistic_privacy_factor(self, clipping_norm: float, noise_multiplier: float, batch_size: int, epochs: int) -> float:
        """Calculate realistic privacy impact based on DP-SGD research."""
        # Research shows DP-SGD can achieve 85-98% accuracy with proper parameters
        # The privacy impact should be much less severe than previously modeled
        
        # Base degradation from noise (much less severe)
        if noise_multiplier <= 0.5:
            noise_degradation = 0.02  # Very little impact with low noise
        elif noise_multiplier <= 1.0:
            noise_degradation = 0.05  # Small impact with medium noise
        elif noise_multiplier <= 1.5:
            noise_degradation = 0.12  # Moderate impact
        else:
            noise_degradation = min(0.25, 0.1 + 0.05 * noise_multiplier)  # Higher impact with very high noise
        
        # Clipping degradation (much less severe)
        if clipping_norm >= 2.0:
            clipping_degradation = 0.01  # Minimal impact with good clipping
        elif clipping_norm >= 1.0:
            clipping_degradation = 0.03  # Small impact
        else:
            clipping_degradation = min(0.15, 0.2 / clipping_norm)  # More impact with very low clipping
        
        # Batch size effect (larger batches help significantly)
        if batch_size >= 256:
            batch_factor = -0.02  # Bonus for large batches
        elif batch_size >= 128:
            batch_factor = 0.01   # Small penalty
        else:
            batch_factor = min(0.08, 0.001 * (128 - batch_size))
        
        # Epochs effect (more training helps overcome noise)
        if epochs >= 10:
            epoch_factor = -0.03  # Bonus for sufficient training
        elif epochs >= 5:
            epoch_factor = 0.01   # Small penalty
        else:
            epoch_factor = 0.05   # Penalty for insufficient training
        
        total_degradation = noise_degradation + clipping_degradation + batch_factor + epoch_factor
        privacy_factor = 1.0 - max(0, total_degradation)  # Much less degradation overall
        
        return max(0.7, privacy_factor)  # Ensure minimum 70% of original performance (can achieve 85%+ with good params)
    
    def _generate_iteration_data(self, epochs: int, privacy_factor: float, batch_size: int) -> List[Dict[str, float]]:
        """Generate realistic iteration-wise training metrics."""
        iterations_data = []
        
        # Simulate ~60,000 training samples, so iterations_per_epoch = 60000 / batch_size
        dataset_size = 60000
        iterations_per_epoch = dataset_size // batch_size
        
        # Realistic base learning curve parameters
        base_accuracy = self.base_accuracy * privacy_factor
        base_loss = self.base_loss / privacy_factor
        
        current_iteration = 0
        for epoch in range(1, epochs + 1):
            for iteration_in_epoch in range(0, iterations_per_epoch, 10):  # Sample every 10th
                current_iteration += 10
                
                # Overall progress through all training
                total_iterations = epochs * iterations_per_epoch
                overall_progress = current_iteration / total_iterations
                
                # More realistic learning curve: slower start, plateau effect
                learning_progress = 1 - np.exp(-3 * overall_progress)  # Exponential approach to target
                
                # Add realistic variation (DP-SGD has more noise)
                noise_std = 0.08 if privacy_factor < 0.7 else 0.04  # More noise for high privacy
                noise = np.random.normal(0, noise_std)
                
                # Calculate realistic accuracy progression
                target_accuracy = base_accuracy * (0.4 + 0.6 * learning_progress)
                accuracy = target_accuracy + noise
                
                # Calculate corresponding loss
                target_loss = base_loss * (1.5 - 0.5 * learning_progress)
                loss = target_loss - noise * 0.3  # Loss inversely correlated with accuracy
                
                # Add some iteration-level oscillations (typical of SGD)
                oscillation = 0.015 * np.sin(current_iteration * 0.05)
                accuracy += oscillation
                loss -= oscillation * 0.5
                
                iterations_data.append({
                    'iteration': current_iteration,
                    'epoch': epoch,
                    'accuracy': max(5, min(95, accuracy * 100)),  # Realistic bounds
                    'loss': max(0.05, loss),
                    'train_accuracy': max(5, min(95, (accuracy + np.random.normal(0, 0.02)) * 100)),
                    'train_loss': max(0.05, loss + np.random.normal(0, 0.1))
                })
        
        return iterations_data

    def _generate_epoch_data(self, epochs: int, privacy_factor: float) -> List[Dict[str, float]]:
        """Generate realistic training metrics for each epoch."""
        epochs_data = []
        
        # Realistic base learning curve parameters
        base_accuracy = self.base_accuracy * privacy_factor
        base_loss = self.base_loss / privacy_factor
        
        for epoch in range(1, epochs + 1):
            # Realistic learning curve: fast early improvement, then plateau
            progress = epoch / epochs
            learning_factor = 1 - np.exp(-2.5 * progress)  # Exponential learning curve
            
            # Add realistic epoch-to-epoch variation
            noise_std = 0.03 if privacy_factor < 0.7 else 0.015
            noise = np.random.normal(0, noise_std)
            
            # Calculate realistic metrics
            accuracy = base_accuracy * (0.4 + 0.6 * learning_factor) + noise
            loss = base_loss * (1.4 - 0.4 * learning_factor) - noise * 0.3
            
            epochs_data.append({
                'epoch': epoch,
                'accuracy': max(5, min(95, accuracy * 100)),  # Convert to percentage with bounds
                'loss': max(0.05, loss),
                'train_accuracy': max(5, min(95, (accuracy + np.random.normal(0, 0.01)) * 100)),
                'train_loss': max(0.05, loss + np.random.normal(0, 0.05))
            })
        
        return epochs_data
    
    def _calculate_final_metrics(self, epochs_data: List[Dict[str, float]], privacy_factor: float) -> Dict[str, float]:
        """Calculate final training metrics that are CONSISTENT with epoch data."""
        if not epochs_data:
            return {'accuracy': 50.0, 'loss': 1.0, 'training_time': 1.0}
            
        # Use the LAST epoch's results as final metrics (consistency!)
        final_epoch = epochs_data[-1]
        
        # Training time should be realistic for DP-SGD (slower than normal)
        base_time = len(epochs_data) * 0.8  # Base time per epoch
        privacy_slowdown = (2.0 - privacy_factor)  # DP-SGD is slower
        time_variation = 1.0 + np.random.normal(0, 0.1)
        
        return {
            'accuracy': final_epoch['accuracy'],  # Consistent with training progress!
            'loss': final_epoch['loss'],
            'training_time': base_time * privacy_slowdown * time_variation
        }
    
    def _generate_recommendations(self, params: Dict[str, Any], metrics: Dict[str, float]) -> List[Dict[str, str]]:
        """Generate realistic recommendations based on DP-SGD best practices."""
        recommendations = []
        
        # Noise multiplier recommendations (critical for DP-SGD)
        if params['noise_multiplier'] < 0.5:
            recommendations.append({
                'icon': '🔒',
                'text': 'Very low noise provides minimal privacy. Consider σ ≥ 0.8 for meaningful privacy.'
            })
        elif params['noise_multiplier'] > 2.0:
            recommendations.append({
                'icon': '⚠️',
                'text': 'High noise (σ > 2.0) significantly degrades accuracy. Try reducing to 0.8-1.5.'
            })
        elif params['noise_multiplier'] > 1.5:
            recommendations.append({
                'icon': '💡',
                'text': 'Consider reducing noise multiplier to 0.8-1.2 for better utility-privacy trade-off.'
            })
        
        # Clipping norm recommendations
        if params['clipping_norm'] < 0.5:
            recommendations.append({
                'icon': '⚠️',
                'text': 'Very low clipping norm can prevent learning. Try C = 1.0-2.0.'
            })
        elif params['clipping_norm'] > 3.0:
            recommendations.append({
                'icon': '🔒',
                'text': 'Large clipping norm reduces privacy protection. Consider C ≤ 2.0.'
            })
        
        # Batch size recommendations (important for DP-SGD)
        if params['batch_size'] < 64:
            recommendations.append({
                'icon': '⚡',
                'text': 'Small batch sizes amplify noise effects. Try batch size ≥ 128 for better stability.'
            })
        elif params['batch_size'] > 512:
            recommendations.append({
                'icon': '💾',
                'text': 'Very large batch sizes may require more memory and longer training time.'
            })
        
        # Learning rate recommendations
        if params['learning_rate'] > 0.05:
            recommendations.append({
                'icon': '⚠️',
                'text': 'High learning rate with noise can destabilize training. Try ≤ 0.02.'
            })
        elif params['learning_rate'] < 0.005:
            recommendations.append({
                'icon': '⏳',
                'text': 'Very low learning rate may require more epochs for convergence.'
            })
        
        # Epochs recommendations
        if params['epochs'] < 5:
            recommendations.append({
                'icon': '📈',
                'text': 'Few epochs may not be enough to overcome noise. Try 8-15 epochs.'
            })
        elif params['epochs'] > 20:
            recommendations.append({
                'icon': '🔒',
                'text': 'Many epochs increase privacy cost. Consider early stopping around 10-15 epochs.'
            })
        
        # Accuracy-based recommendations
        if metrics['accuracy'] < 60:
            recommendations.append({
                'icon': '📉',
                'text': 'Low accuracy suggests too much noise. Reduce σ or increase C for better utility.'
            })
        elif metrics['accuracy'] > 85:
            recommendations.append({
                'icon': '🎯',
                'text': 'Good accuracy! This is a well-balanced privacy-utility trade-off.'
            })
        
        return recommendations

    def generate_gradient_norms(self, clipping_norm: float) -> List[Dict[str, float]]:
        """Generate realistic gradient norms following a log-normal distribution."""
        num_points = 100
        gradients = []
        
        # Parameters for log-normal distribution
        mu = np.log(clipping_norm) - 0.5
        sigma = 0.8
        
        for _ in range(num_points):
            # Generate log-normal distributed gradient norms
            u1, u2 = np.random.random(2)
            z = np.sqrt(-2.0 * np.log(u1)) * np.cos(2.0 * np.pi * u2)
            norm = np.exp(mu + sigma * z)
            
            # Calculate density using kernel density estimation
            density = np.exp(-(np.power(np.log(norm) - mu, 2) / (2 * sigma * sigma))) / (norm * sigma * np.sqrt(2 * np.pi))
            density = 0.2 + 0.8 * (density / 0.8) + 0.1 * (np.random.random() - 0.5)
            
            gradients.append({'x': float(norm), 'y': float(density)})
        
        return sorted(gradients, key=lambda x: x['x'])

    def generate_clipped_gradients(self, clipping_norm: float) -> List[Dict[str, float]]:
        """Generate clipped versions of the gradient norms."""
        original_gradients = self.generate_gradient_norms(clipping_norm)
        return [{'x': min(g['x'], clipping_norm), 'y': g['y']} for g in original_gradients]