Spaces:

Jethro85
/

DPSGDTool

Sleeping

Emily commited on Aug 28

Commit

f826c3b

1 Parent(s): 0e859f8

Add multi-dataset and ResNet-18 architecture support

- Fix dataset selection: now properly switches between MNIST, CIFAR-10, and Fashion-MNIST
- Add ResNet-18 architecture option alongside existing MLP and CNN models
- Implement dynamic data preprocessing based on model architecture (flatten for MLPs, keep 2D/3D for CNNs)
- Add model architecture parameter to frontend and backend
- Cache trainers by dataset+architecture combination for efficiency
- Update privacy budget calculations to use correct dataset sizes
- Support for all architecture combinations across datasets

Files changed (4) hide show

app/routes.py +49 -17
app/static/js/main.js +20 -2
app/templates/index.html +1 -0
app/training/simplified_real_trainer.py +206 -10

app/routes.py CHANGED Viewed

@@ -23,17 +23,27 @@ main = Blueprint('main', __name__)
 mock_trainer = MockTrainer()
 privacy_calculator = PrivacyCalculator()
-# Initialize real trainer if available
-if REAL_TRAINER_AVAILABLE:
-    try:
-        real_trainer = RealTrainer()
-        print("Real trainer initialized successfully")
-    except Exception as e:
-        print(f"Failed to initialize real trainer: {e}")
-        REAL_TRAINER_AVAILABLE = False
-        real_trainer = None
-else:
-    real_trainer = None
 @main.route('/')
 def index():
@@ -62,24 +72,46 @@ def train():
             'epochs': int(data.get('epochs', 5))
         }
         # Check if user wants to force mock training
         use_mock = data.get('use_mock', False)
         # Use real trainer if available and not forced to use mock
-        if REAL_TRAINER_AVAILABLE and real_trainer and not use_mock:
-            print("Using real trainer with MNIST dataset")
-            results = real_trainer.train(params)
-            results['trainer_type'] = 'real'
-            results['dataset'] = 'MNIST'
         else:
             print("Using mock trainer with synthetic data")
             results = mock_trainer.train(params)
             results['trainer_type'] = 'mock'
             results['dataset'] = 'synthetic'
         # Add gradient information for visualization (if not already included)
         if 'gradient_info' not in results:
-            trainer = real_trainer if (REAL_TRAINER_AVAILABLE and real_trainer and not use_mock) else mock_trainer
             results['gradient_info'] = {
                 'before_clipping': trainer.generate_gradient_norms(params['clipping_norm']),
                 'after_clipping': trainer.generate_clipped_gradients(params['clipping_norm'])

 mock_trainer = MockTrainer()
 privacy_calculator = PrivacyCalculator()
+# We'll create trainers dynamically based on dataset selection
+real_trainers = {}  # Cache trainers by dataset to avoid reloading
+def get_or_create_trainer(dataset, model_architecture='simple-mlp'):
+    """Get or create a trainer for the specified dataset and architecture."""
+    if not REAL_TRAINER_AVAILABLE:
+        return None
+    # Create a unique key for dataset + architecture combination
+    trainer_key = f"{dataset}_{model_architecture}"
+    if trainer_key not in real_trainers:
+        try:
+            print(f"Creating new trainer for dataset: {dataset}, architecture: {model_architecture}")
+            real_trainers[trainer_key] = RealTrainer(dataset=dataset, model_architecture=model_architecture)
+            print(f"Trainer for {dataset} with {model_architecture} initialized successfully")
+        except Exception as e:
+            print(f"Failed to initialize trainer for {dataset} with {model_architecture}: {e}")
+            return None
+    return real_trainers[trainer_key]
 @main.route('/')
 def index():
             'epochs': int(data.get('epochs', 5))
         }
+        # Get dataset and model architecture selection
+        dataset = data.get('dataset', 'mnist')
+        model_architecture = data.get('model_architecture', 'simple-mlp')
         # Check if user wants to force mock training
         use_mock = data.get('use_mock', False)
         # Use real trainer if available and not forced to use mock
+        if REAL_TRAINER_AVAILABLE and not use_mock:
+            real_trainer = get_or_create_trainer(dataset, model_architecture)
+            if real_trainer:
+                print(f"Using real trainer with {dataset.upper()} dataset and {model_architecture} architecture")
+                results = real_trainer.train(params)
+                results['trainer_type'] = 'real'
+                results['dataset'] = dataset.upper()
+                results['model_architecture'] = model_architecture
+            else:
+                print("Failed to create real trainer, falling back to mock trainer")
+                results = mock_trainer.train(params)
+                results['trainer_type'] = 'mock'
+                results['dataset'] = 'synthetic'
+                results['model_architecture'] = 'mock'
         else:
             print("Using mock trainer with synthetic data")
             results = mock_trainer.train(params)
             results['trainer_type'] = 'mock'
             results['dataset'] = 'synthetic'
+            results['model_architecture'] = 'mock'
         # Add gradient information for visualization (if not already included)
         if 'gradient_info' not in results:
+            if REAL_TRAINER_AVAILABLE and not use_mock:
+                current_trainer = get_or_create_trainer(dataset, model_architecture)
+                if current_trainer:
+                    trainer = current_trainer
+                else:
+                    trainer = mock_trainer
+            else:
+                trainer = mock_trainer
             results['gradient_info'] = {
                 'before_clipping': trainer.generate_gradient_norms(params['clipping_norm']),
                 'after_clipping': trainer.generate_clipped_gradients(params['clipping_norm'])

app/static/js/main.js CHANGED Viewed

@@ -697,7 +697,9 @@ class DPSGDExplorer {
             noise_multiplier: parseFloat(document.getElementById('noise-multiplier').value),
             batch_size: parseInt(document.getElementById('batch-size').value),
             learning_rate: parseFloat(document.getElementById('learning-rate').value),
-            epochs: parseInt(document.getElementById('epochs').value)
         };
     }
@@ -720,7 +722,23 @@ class DPSGDExplorer {
     calculateEpochPrivacy(epoch) {
         const params = this.getParameters();
-        const samplingRate = params.batch_size / 60000; // Assuming MNIST size
         const steps = epoch * (1 / samplingRate);
         const delta = 1e-5;
         const c = Math.sqrt(2 * Math.log(1.25 / delta));

             noise_multiplier: parseFloat(document.getElementById('noise-multiplier').value),
             batch_size: parseInt(document.getElementById('batch-size').value),
             learning_rate: parseFloat(document.getElementById('learning-rate').value),
+            epochs: parseInt(document.getElementById('epochs').value),
+            dataset: document.getElementById('dataset-select').value,
+            model_architecture: document.getElementById('model-select').value
         };
     }
     calculateEpochPrivacy(epoch) {
         const params = this.getParameters();
+        // Get dataset size based on selection
+        let datasetSize;
+        switch(params.dataset) {
+            case 'cifar10':
+                datasetSize = 50000; // CIFAR-10 training set size
+                break;
+            case 'fashion-mnist':
+                datasetSize = 60000; // Fashion-MNIST training set size
+                break;
+            case 'mnist':
+            default:
+                datasetSize = 60000; // MNIST training set size
+                break;
+        }
+        const samplingRate = params.batch_size / datasetSize;
         const steps = epoch * (1 / samplingRate);
         const delta = 1e-5;
         const c = Math.sqrt(2 * Math.log(1.25 / delta));

app/templates/index.html CHANGED Viewed

@@ -39,6 +39,7 @@
                     <option value="simple-mlp">Simple MLP</option>
                     <option value="simple-cnn">Simple CNN</option>
                     <option value="advanced-cnn">Advanced CNN</option>
                 </select>
             </div>

                     <option value="simple-mlp">Simple MLP</option>
                     <option value="simple-cnn">Simple CNN</option>
                     <option value="advanced-cnn">Advanced CNN</option>
+                    <option value="resnet18">ResNet-18</option>
                 </select>
             </div>

app/training/simplified_real_trainer.py CHANGED Viewed

@@ -8,15 +8,32 @@ import logging
 logging.getLogger('tensorflow').setLevel(logging.ERROR)
 class SimplifiedRealTrainer:
-    def __init__(self):
         # Set random seeds for reproducibility
         tf.random.set_seed(42)
         np.random.seed(42)
-        # Load and preprocess MNIST dataset
-        self.x_train, self.y_train, self.x_test, self.y_test = self._load_mnist()
         self.model = None
     def _load_mnist(self):
         """Load and preprocess MNIST dataset."""
         print("Loading MNIST dataset...")
@@ -28,9 +45,90 @@ class SimplifiedRealTrainer:
         x_train = x_train.astype('float32') / 255.0
         x_test = x_test.astype('float32') / 255.0
-        # Reshape to flatten images
-        x_train = x_train.reshape(-1, 28 * 28)
-        x_test = x_test.reshape(-1, 28 * 28)
         # Convert labels to categorical
         y_train = keras.utils.to_categorical(y_train, 10)
@@ -42,15 +140,113 @@ class SimplifiedRealTrainer:
         return x_train, y_train, x_test, y_test
     def _create_model(self):
-        """Create a simple MLP model for MNIST classification optimized for DP-SGD."""
-        # Use a simpler, more robust architecture for DP-SGD
         model = keras.Sequential([
-            keras.layers.Dense(256, activation='tanh', input_shape=(784,)),  # tanh works better with DP-SGD
             keras.layers.Dense(128, activation='tanh'),
-            keras.layers.Dense(10, activation='softmax')
         ])
         return model
     def _clip_gradients(self, gradients, clipping_norm):
         """Clip gradients to a maximum L2 norm globally across all parameters."""
         # Calculate global L2 norm across all gradients

 logging.getLogger('tensorflow').setLevel(logging.ERROR)
 class SimplifiedRealTrainer:
+    def __init__(self, dataset='mnist', model_architecture='simple-mlp'):
         # Set random seeds for reproducibility
         tf.random.set_seed(42)
         np.random.seed(42)
+        self.dataset = dataset
+        self.model_architecture = model_architecture
+        self.input_shape = None
+        self.original_shape = None  # For CNNs that need 2D/3D inputs
+        self.num_classes = 10
+        # Load and preprocess the specified dataset
+        self.x_train, self.y_train, self.x_test, self.y_test = self._load_dataset(dataset)
         self.model = None
+    def _load_dataset(self, dataset):
+        """Load and preprocess the specified dataset."""
+        if dataset == 'mnist':
+            return self._load_mnist()
+        elif dataset == 'cifar10':
+            return self._load_cifar10()
+        elif dataset == 'fashion-mnist':
+            return self._load_fashion_mnist()
+        else:
+            raise ValueError(f"Unsupported dataset: {dataset}")
     def _load_mnist(self):
         """Load and preprocess MNIST dataset."""
         print("Loading MNIST dataset...")
         x_train = x_train.astype('float32') / 255.0
         x_test = x_test.astype('float32') / 255.0
+        # Store original shape for CNNs (add channel dimension)
+        self.original_shape = (28, 28, 1)
+        # For MLPs, flatten the images; for CNNs, keep 2D shape
+        if self.model_architecture in ['simple-cnn', 'advanced-cnn', 'resnet18']:
+            x_train = x_train.reshape(-1, 28, 28, 1)
+            x_test = x_test.reshape(-1, 28, 28, 1)
+            self.input_shape = (28, 28, 1)
+        else:
+            x_train = x_train.reshape(-1, 28 * 28)
+            x_test = x_test.reshape(-1, 28 * 28)
+            self.input_shape = (784,)
+        self.num_classes = 10
+        # Convert labels to categorical
+        y_train = keras.utils.to_categorical(y_train, 10)
+        y_test = keras.utils.to_categorical(y_test, 10)
+        print(f"Training data shape: {x_train.shape}")
+        print(f"Test data shape: {x_test.shape}")
+        return x_train, y_train, x_test, y_test
+    def _load_cifar10(self):
+        """Load and preprocess CIFAR-10 dataset."""
+        print("Loading CIFAR-10 dataset...")
+        # Load CIFAR-10 data
+        (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
+        # Normalize pixel values to [0, 1]
+        x_train = x_train.astype('float32') / 255.0
+        x_test = x_test.astype('float32') / 255.0
+        # Store original shape for CNNs
+        self.original_shape = (32, 32, 3)
+        # For MLPs, flatten the images; for CNNs, keep 3D shape
+        if self.model_architecture in ['simple-cnn', 'advanced-cnn', 'resnet18']:
+            # Keep original shape for CNNs
+            self.input_shape = (32, 32, 3)
+        else:
+            # Flatten for MLPs
+            x_train = x_train.reshape(-1, 32 * 32 * 3)
+            x_test = x_test.reshape(-1, 32 * 32 * 3)
+            self.input_shape = (3072,)
+        self.num_classes = 10
+        # Convert labels to categorical
+        y_train = keras.utils.to_categorical(y_train, 10)
+        y_test = keras.utils.to_categorical(y_test, 10)
+        print(f"Training data shape: {x_train.shape}")
+        print(f"Test data shape: {x_test.shape}")
+        return x_train, y_train, x_test, y_test
+    def _load_fashion_mnist(self):
+        """Load and preprocess Fashion-MNIST dataset."""
+        print("Loading Fashion-MNIST dataset...")
+        # Load Fashion-MNIST data
+        (x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
+        # Normalize pixel values to [0, 1]
+        x_train = x_train.astype('float32') / 255.0
+        x_test = x_test.astype('float32') / 255.0
+        # Store original shape for CNNs (add channel dimension)
+        self.original_shape = (28, 28, 1)
+        # For MLPs, flatten the images; for CNNs, keep 2D shape
+        if self.model_architecture in ['simple-cnn', 'advanced-cnn', 'resnet18']:
+            x_train = x_train.reshape(-1, 28, 28, 1)
+            x_test = x_test.reshape(-1, 28, 28, 1)
+            self.input_shape = (28, 28, 1)
+        else:
+            x_train = x_train.reshape(-1, 28 * 28)
+            x_test = x_test.reshape(-1, 28 * 28)
+            self.input_shape = (784,)
+        self.num_classes = 10
         # Convert labels to categorical
         y_train = keras.utils.to_categorical(y_train, 10)
         return x_train, y_train, x_test, y_test
     def _create_model(self):
+        """Create a model based on the specified architecture."""
+        if self.model_architecture == 'simple-mlp':
+            return self._create_simple_mlp()
+        elif self.model_architecture == 'simple-cnn':
+            return self._create_simple_cnn()
+        elif self.model_architecture == 'advanced-cnn':
+            return self._create_advanced_cnn()
+        elif self.model_architecture == 'resnet18':
+            return self._create_resnet18()
+        else:
+            raise ValueError(f"Unsupported model architecture: {self.model_architecture}")
+    def _create_simple_mlp(self):
+        """Create a simple MLP model optimized for DP-SGD."""
         model = keras.Sequential([
+            keras.layers.Dense(256, activation='tanh', input_shape=self.input_shape),  # tanh works better with DP-SGD
             keras.layers.Dense(128, activation='tanh'),
+            keras.layers.Dense(self.num_classes, activation='softmax')
         ])
         return model
+    def _create_simple_cnn(self):
+        """Create a simple CNN model optimized for DP-SGD."""
+        model = keras.Sequential([
+            keras.layers.Conv2D(32, (3, 3), activation='tanh', input_shape=self.input_shape),
+            keras.layers.MaxPooling2D((2, 2)),
+            keras.layers.Conv2D(64, (3, 3), activation='tanh'),
+            keras.layers.MaxPooling2D((2, 2)),
+            keras.layers.Flatten(),
+            keras.layers.Dense(128, activation='tanh'),
+            keras.layers.Dense(self.num_classes, activation='softmax')
+        ])
+        return model
+    def _create_advanced_cnn(self):
+        """Create an advanced CNN model optimized for DP-SGD."""
+        model = keras.Sequential([
+            keras.layers.Conv2D(32, (3, 3), activation='tanh', input_shape=self.input_shape),
+            keras.layers.BatchNormalization(),
+            keras.layers.Conv2D(32, (3, 3), activation='tanh'),
+            keras.layers.MaxPooling2D((2, 2)),
+            keras.layers.Dropout(0.25),
+            keras.layers.Conv2D(64, (3, 3), activation='tanh'),
+            keras.layers.BatchNormalization(),
+            keras.layers.Conv2D(64, (3, 3), activation='tanh'),
+            keras.layers.MaxPooling2D((2, 2)),
+            keras.layers.Dropout(0.25),
+            keras.layers.Flatten(),
+            keras.layers.Dense(256, activation='tanh'),
+            keras.layers.Dropout(0.5),
+            keras.layers.Dense(128, activation='tanh'),
+            keras.layers.Dense(self.num_classes, activation='softmax')
+        ])
+        return model
+    def _create_resnet18(self):
+        """Create a ResNet-18 model optimized for DP-SGD."""
+        def residual_block(x, filters, kernel_size=3, stride=1, conv_shortcut=False):
+            """A residual block for ResNet."""
+            if conv_shortcut:
+                shortcut = keras.layers.Conv2D(filters, 1, strides=stride, padding='same')(x)
+                shortcut = keras.layers.BatchNormalization()(shortcut)
+            else:
+                shortcut = x
+            x = keras.layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
+            x = keras.layers.BatchNormalization()(x)
+            x = keras.layers.Activation('tanh')(x)  # Use tanh for DP-SGD
+            x = keras.layers.Conv2D(filters, kernel_size, padding='same')(x)
+            x = keras.layers.BatchNormalization()(x)
+            x = keras.layers.Add()([shortcut, x])
+            x = keras.layers.Activation('tanh')(x)
+            return x
+        def resnet_block(x, filters, num_blocks, stride=1):
+            """A stack of residual blocks."""
+            x = residual_block(x, filters, stride=stride, conv_shortcut=True)
+            for _ in range(num_blocks - 1):
+                x = residual_block(x, filters)
+            return x
+        # Input layer
+        inputs = keras.layers.Input(shape=self.input_shape)
+        # Initial convolution
+        x = keras.layers.Conv2D(64, 7, strides=2, padding='same')(inputs)
+        x = keras.layers.BatchNormalization()(x)
+        x = keras.layers.Activation('tanh')(x)
+        x = keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)
+        # ResNet blocks
+        x = resnet_block(x, 64, 2)
+        x = resnet_block(x, 128, 2, stride=2)
+        x = resnet_block(x, 256, 2, stride=2)
+        x = resnet_block(x, 512, 2, stride=2)
+        # Global average pooling and output
+        x = keras.layers.GlobalAveragePooling2D()(x)
+        x = keras.layers.Dense(self.num_classes, activation='softmax')(x)
+        model = keras.Model(inputs, x)
+        return model
     def _clip_gradients(self, gradients, clipping_norm):
         """Clip gradients to a maximum L2 norm globally across all parameters."""
         # Calculate global L2 norm across all gradients