DPSGDTool / app /training /privacy_calculator.py
Shuya Feng
feat: Implement DP-SGD Explorer web application with Flask backend, interactive frontend, and easy deployment script
6640531
import numpy as np
from typing import Dict, Any
class PrivacyCalculator:
def __init__(self):
self.delta = 1e-5 # Standard delta value for DP guarantees
def calculate_epsilon(self, params: Dict[str, Any]) -> float:
"""
Calculate the privacy budget (ε) using the moment accountant method.
Args:
params: Dictionary containing training parameters:
- clipping_norm: float
- noise_multiplier: float
- batch_size: int
- epochs: int
Returns:
The calculated privacy budget (ε)
"""
# Extract parameters
clipping_norm = params['clipping_norm']
noise_multiplier = params['noise_multiplier']
batch_size = params['batch_size']
epochs = params['epochs']
# Calculate sampling rate (assuming MNIST dataset size of 60,000)
sampling_rate = batch_size / 60000
# Calculate number of steps
steps = epochs * (1 / sampling_rate)
# Calculate moments for different orders
orders = [1.25, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]
moments = [self._calculate_moment(order, sampling_rate, noise_multiplier) for order in orders]
# Find the minimum ε that satisfies all moment bounds
epsilon = float('inf')
for moment in moments:
# Convert moment bound to (ε,δ)-DP bound
moment_epsilon = moment + np.log(1/self.delta) / (orders[0] - 1)
epsilon = min(epsilon, moment_epsilon)
# Add some randomness to make it more realistic
epsilon *= (1 + np.random.normal(0, 0.05))
return max(0.1, epsilon) # Ensure ε is at least 0.1
def _calculate_moment(self, order: float, sampling_rate: float, noise_multiplier: float) -> float:
"""
Calculate the moment bound for a given order.
Args:
order: The moment order
sampling_rate: The probability of sampling each example
noise_multiplier: The noise multiplier used in DP-SGD
Returns:
The calculated moment bound
"""
# Simplified moment calculation based on the moment accountant method
# This is a simplified version that captures the key relationships
c = np.sqrt(2 * np.log(1.25 / self.delta))
moment = (order * sampling_rate * c) / noise_multiplier
# Add some non-linear effects
moment *= (1 + 0.1 * np.sin(order))
return moment
def calculate_optimal_noise(self, target_epsilon: float, params: Dict[str, Any]) -> float:
"""
Calculate the optimal noise multiplier for a target privacy budget.
Args:
target_epsilon: The desired privacy budget
params: Dictionary containing training parameters:
- clipping_norm: float
- batch_size: int
- epochs: int
Returns:
The calculated optimal noise multiplier
"""
# Extract parameters
clipping_norm = params['clipping_norm']
batch_size = params['batch_size']
epochs = params['epochs']
# Calculate sampling rate
sampling_rate = batch_size / 60000
# Calculate number of steps
steps = epochs * (1 / sampling_rate)
# Calculate optimal noise using the analytical Gaussian mechanism
c = np.sqrt(2 * np.log(1.25 / self.delta))
optimal_noise = (c * sampling_rate * np.sqrt(steps)) / target_epsilon
# Add some randomness to make it more realistic
optimal_noise *= (1 + np.random.normal(0, 0.05))
return max(0.1, optimal_noise) # Ensure noise is at least 0.1