|
import numpy as np |
|
import time |
|
from typing import Dict, List, Any |
|
|
|
class MockTrainer: |
|
def __init__(self): |
|
|
|
self.base_accuracy = 0.98 |
|
self.base_loss = 0.08 |
|
|
|
def train(self, params: Dict[str, Any]) -> Dict[str, Any]: |
|
""" |
|
Simulate DP-SGD training with given parameters using realistic privacy trade-offs. |
|
|
|
Args: |
|
params: Dictionary containing training parameters: |
|
- clipping_norm: float |
|
- noise_multiplier: float |
|
- batch_size: int |
|
- learning_rate: float |
|
- epochs: int |
|
|
|
Returns: |
|
Dictionary containing training results and metrics |
|
""" |
|
|
|
clipping_norm = params['clipping_norm'] |
|
noise_multiplier = params['noise_multiplier'] |
|
batch_size = params['batch_size'] |
|
learning_rate = params['learning_rate'] |
|
epochs = params['epochs'] |
|
|
|
|
|
privacy_factor = self._calculate_realistic_privacy_factor(clipping_norm, noise_multiplier, batch_size, epochs) |
|
|
|
|
|
epochs_data = self._generate_epoch_data(epochs, privacy_factor) |
|
|
|
|
|
iterations_data = self._generate_iteration_data(epochs, privacy_factor, batch_size) |
|
|
|
|
|
final_metrics = self._calculate_final_metrics(epochs_data, privacy_factor) |
|
|
|
|
|
recommendations = self._generate_recommendations(params, final_metrics) |
|
|
|
|
|
gradient_info = { |
|
'before_clipping': self.generate_gradient_norms(clipping_norm), |
|
'after_clipping': self.generate_clipped_gradients(clipping_norm) |
|
} |
|
|
|
|
|
privacy_budget = self._calculate_mock_privacy_budget(params) |
|
|
|
return { |
|
'epochs_data': epochs_data, |
|
'iterations_data': iterations_data, |
|
'final_metrics': final_metrics, |
|
'recommendations': recommendations, |
|
'gradient_info': gradient_info, |
|
'privacy_budget': privacy_budget |
|
} |
|
|
|
def _calculate_mock_privacy_budget(self, params: Dict[str, Any]) -> float: |
|
"""Calculate a realistic mock privacy budget based on DP-SGD theory.""" |
|
noise_multiplier = params['noise_multiplier'] |
|
epochs = params['epochs'] |
|
batch_size = params['batch_size'] |
|
|
|
|
|
q = batch_size / 60000 |
|
steps = epochs * (60000 // batch_size) |
|
|
|
|
|
|
|
import math |
|
delta = 1e-5 |
|
epsilon = (q * math.sqrt(steps * math.log(1/delta))) / noise_multiplier |
|
|
|
|
|
epsilon *= (1 + np.random.normal(0, 0.1)) |
|
|
|
return max(0.1, min(50.0, epsilon)) |
|
|
|
def _calculate_realistic_privacy_factor(self, clipping_norm: float, noise_multiplier: float, batch_size: int, epochs: int) -> float: |
|
"""Calculate realistic privacy impact based on DP-SGD research.""" |
|
|
|
|
|
|
|
|
|
if noise_multiplier <= 0.5: |
|
noise_degradation = 0.02 |
|
elif noise_multiplier <= 1.0: |
|
noise_degradation = 0.05 |
|
elif noise_multiplier <= 1.5: |
|
noise_degradation = 0.12 |
|
else: |
|
noise_degradation = min(0.25, 0.1 + 0.05 * noise_multiplier) |
|
|
|
|
|
if clipping_norm >= 2.0: |
|
clipping_degradation = 0.01 |
|
elif clipping_norm >= 1.0: |
|
clipping_degradation = 0.03 |
|
else: |
|
clipping_degradation = min(0.15, 0.2 / clipping_norm) |
|
|
|
|
|
if batch_size >= 256: |
|
batch_factor = -0.02 |
|
elif batch_size >= 128: |
|
batch_factor = 0.01 |
|
else: |
|
batch_factor = min(0.08, 0.001 * (128 - batch_size)) |
|
|
|
|
|
if epochs >= 10: |
|
epoch_factor = -0.03 |
|
elif epochs >= 5: |
|
epoch_factor = 0.01 |
|
else: |
|
epoch_factor = 0.05 |
|
|
|
total_degradation = noise_degradation + clipping_degradation + batch_factor + epoch_factor |
|
privacy_factor = 1.0 - max(0, total_degradation) |
|
|
|
return max(0.7, privacy_factor) |
|
|
|
def _generate_iteration_data(self, epochs: int, privacy_factor: float, batch_size: int) -> List[Dict[str, float]]: |
|
"""Generate realistic iteration-wise training metrics.""" |
|
iterations_data = [] |
|
|
|
|
|
dataset_size = 60000 |
|
iterations_per_epoch = dataset_size // batch_size |
|
|
|
|
|
base_accuracy = self.base_accuracy * privacy_factor |
|
base_loss = self.base_loss / privacy_factor |
|
|
|
current_iteration = 0 |
|
for epoch in range(1, epochs + 1): |
|
for iteration_in_epoch in range(0, iterations_per_epoch, 10): |
|
current_iteration += 10 |
|
|
|
|
|
total_iterations = epochs * iterations_per_epoch |
|
overall_progress = current_iteration / total_iterations |
|
|
|
|
|
learning_progress = 1 - np.exp(-3 * overall_progress) |
|
|
|
|
|
noise_std = 0.08 if privacy_factor < 0.7 else 0.04 |
|
noise = np.random.normal(0, noise_std) |
|
|
|
|
|
target_accuracy = base_accuracy * (0.4 + 0.6 * learning_progress) |
|
accuracy = target_accuracy + noise |
|
|
|
|
|
target_loss = base_loss * (1.5 - 0.5 * learning_progress) |
|
loss = target_loss - noise * 0.3 |
|
|
|
|
|
oscillation = 0.015 * np.sin(current_iteration * 0.05) |
|
accuracy += oscillation |
|
loss -= oscillation * 0.5 |
|
|
|
iterations_data.append({ |
|
'iteration': current_iteration, |
|
'epoch': epoch, |
|
'accuracy': max(5, min(95, accuracy * 100)), |
|
'loss': max(0.05, loss), |
|
'train_accuracy': max(5, min(95, (accuracy + np.random.normal(0, 0.02)) * 100)), |
|
'train_loss': max(0.05, loss + np.random.normal(0, 0.1)) |
|
}) |
|
|
|
return iterations_data |
|
|
|
def _generate_epoch_data(self, epochs: int, privacy_factor: float) -> List[Dict[str, float]]: |
|
"""Generate realistic training metrics for each epoch.""" |
|
epochs_data = [] |
|
|
|
|
|
base_accuracy = self.base_accuracy * privacy_factor |
|
base_loss = self.base_loss / privacy_factor |
|
|
|
for epoch in range(1, epochs + 1): |
|
|
|
progress = epoch / epochs |
|
learning_factor = 1 - np.exp(-2.5 * progress) |
|
|
|
|
|
noise_std = 0.03 if privacy_factor < 0.7 else 0.015 |
|
noise = np.random.normal(0, noise_std) |
|
|
|
|
|
accuracy = base_accuracy * (0.4 + 0.6 * learning_factor) + noise |
|
loss = base_loss * (1.4 - 0.4 * learning_factor) - noise * 0.3 |
|
|
|
epochs_data.append({ |
|
'epoch': epoch, |
|
'accuracy': max(5, min(95, accuracy * 100)), |
|
'loss': max(0.05, loss), |
|
'train_accuracy': max(5, min(95, (accuracy + np.random.normal(0, 0.01)) * 100)), |
|
'train_loss': max(0.05, loss + np.random.normal(0, 0.05)) |
|
}) |
|
|
|
return epochs_data |
|
|
|
def _calculate_final_metrics(self, epochs_data: List[Dict[str, float]], privacy_factor: float) -> Dict[str, float]: |
|
"""Calculate final training metrics that are CONSISTENT with epoch data.""" |
|
if not epochs_data: |
|
return {'accuracy': 50.0, 'loss': 1.0, 'training_time': 1.0} |
|
|
|
|
|
final_epoch = epochs_data[-1] |
|
|
|
|
|
base_time = len(epochs_data) * 0.8 |
|
privacy_slowdown = (2.0 - privacy_factor) |
|
time_variation = 1.0 + np.random.normal(0, 0.1) |
|
|
|
return { |
|
'accuracy': final_epoch['accuracy'], |
|
'loss': final_epoch['loss'], |
|
'training_time': base_time * privacy_slowdown * time_variation |
|
} |
|
|
|
def _generate_recommendations(self, params: Dict[str, Any], metrics: Dict[str, float]) -> List[Dict[str, str]]: |
|
"""Generate realistic recommendations based on DP-SGD best practices.""" |
|
recommendations = [] |
|
|
|
|
|
if params['noise_multiplier'] < 0.5: |
|
recommendations.append({ |
|
'icon': 'π', |
|
'text': 'Very low noise provides minimal privacy. Consider Ο β₯ 0.8 for meaningful privacy.' |
|
}) |
|
elif params['noise_multiplier'] > 2.0: |
|
recommendations.append({ |
|
'icon': 'β οΈ', |
|
'text': 'High noise (Ο > 2.0) significantly degrades accuracy. Try reducing to 0.8-1.5.' |
|
}) |
|
elif params['noise_multiplier'] > 1.5: |
|
recommendations.append({ |
|
'icon': 'π‘', |
|
'text': 'Consider reducing noise multiplier to 0.8-1.2 for better utility-privacy trade-off.' |
|
}) |
|
|
|
|
|
if params['clipping_norm'] < 0.5: |
|
recommendations.append({ |
|
'icon': 'β οΈ', |
|
'text': 'Very low clipping norm can prevent learning. Try C = 1.0-2.0.' |
|
}) |
|
elif params['clipping_norm'] > 3.0: |
|
recommendations.append({ |
|
'icon': 'π', |
|
'text': 'Large clipping norm reduces privacy protection. Consider C β€ 2.0.' |
|
}) |
|
|
|
|
|
if params['batch_size'] < 64: |
|
recommendations.append({ |
|
'icon': 'β‘', |
|
'text': 'Small batch sizes amplify noise effects. Try batch size β₯ 128 for better stability.' |
|
}) |
|
elif params['batch_size'] > 512: |
|
recommendations.append({ |
|
'icon': 'πΎ', |
|
'text': 'Very large batch sizes may require more memory and longer training time.' |
|
}) |
|
|
|
|
|
if params['learning_rate'] > 0.05: |
|
recommendations.append({ |
|
'icon': 'β οΈ', |
|
'text': 'High learning rate with noise can destabilize training. Try β€ 0.02.' |
|
}) |
|
elif params['learning_rate'] < 0.005: |
|
recommendations.append({ |
|
'icon': 'β³', |
|
'text': 'Very low learning rate may require more epochs for convergence.' |
|
}) |
|
|
|
|
|
if params['epochs'] < 5: |
|
recommendations.append({ |
|
'icon': 'π', |
|
'text': 'Few epochs may not be enough to overcome noise. Try 8-15 epochs.' |
|
}) |
|
elif params['epochs'] > 20: |
|
recommendations.append({ |
|
'icon': 'π', |
|
'text': 'Many epochs increase privacy cost. Consider early stopping around 10-15 epochs.' |
|
}) |
|
|
|
|
|
if metrics['accuracy'] < 60: |
|
recommendations.append({ |
|
'icon': 'π', |
|
'text': 'Low accuracy suggests too much noise. Reduce Ο or increase C for better utility.' |
|
}) |
|
elif metrics['accuracy'] > 85: |
|
recommendations.append({ |
|
'icon': 'π―', |
|
'text': 'Good accuracy! This is a well-balanced privacy-utility trade-off.' |
|
}) |
|
|
|
return recommendations |
|
|
|
def generate_gradient_norms(self, clipping_norm: float) -> List[Dict[str, float]]: |
|
"""Generate realistic gradient norms following a log-normal distribution.""" |
|
num_points = 100 |
|
gradients = [] |
|
|
|
|
|
mu = np.log(clipping_norm) - 0.5 |
|
sigma = 0.8 |
|
|
|
for _ in range(num_points): |
|
|
|
u1, u2 = np.random.random(2) |
|
z = np.sqrt(-2.0 * np.log(u1)) * np.cos(2.0 * np.pi * u2) |
|
norm = np.exp(mu + sigma * z) |
|
|
|
|
|
density = np.exp(-(np.power(np.log(norm) - mu, 2) / (2 * sigma * sigma))) / (norm * sigma * np.sqrt(2 * np.pi)) |
|
density = 0.2 + 0.8 * (density / 0.8) + 0.1 * (np.random.random() - 0.5) |
|
|
|
gradients.append({'x': float(norm), 'y': float(density)}) |
|
|
|
return sorted(gradients, key=lambda x: x['x']) |
|
|
|
def generate_clipped_gradients(self, clipping_norm: float) -> List[Dict[str, float]]: |
|
"""Generate clipped versions of the gradient norms.""" |
|
original_gradients = self.generate_gradient_norms(clipping_norm) |
|
return [{'x': min(g['x'], clipping_norm), 'y': g['y']} for g in original_gradients] |