Spaces:
Build error
Build error
| """ | |
| Mask R-CNN | |
| Multi-GPU Support for Keras. | |
| Copyright (c) 2017 Matterport, Inc. | |
| Licensed under the MIT License (see LICENSE for details) | |
| Written by Waleed Abdulla | |
| Ideas and a small code snippets from these sources: | |
| https://github.com/fchollet/keras/issues/2436 | |
| https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 | |
| https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ | |
| https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py | |
| """ | |
| import keras.backend as K | |
| import keras.layers as KL | |
| import keras.models as KM | |
| import tensorflow as tf | |
| class ParallelModel(KM.Model): | |
| """Subclasses the standard Keras Model and adds multi-GPU support. | |
| It works by creating a copy of the model on each GPU. Then it slices | |
| the inputs and sends a slice to each copy of the model, and then | |
| merges the outputs together and applies the loss on the combined | |
| outputs. | |
| """ | |
| def __init__(self, keras_model, gpu_count): | |
| """Class constructor. | |
| keras_model: The Keras model to parallelize | |
| gpu_count: Number of GPUs. Must be > 1 | |
| """ | |
| self.inner_model = keras_model | |
| self.gpu_count = gpu_count | |
| merged_outputs = self.make_parallel() | |
| super(ParallelModel, self).__init__( | |
| inputs=self.inner_model.inputs, outputs=merged_outputs | |
| ) | |
| def __getattribute__(self, attrname): | |
| """Redirect loading and saving methods to the inner model. That's where | |
| the weights are stored.""" | |
| if "load" in attrname or "save" in attrname: | |
| return getattr(self.inner_model, attrname) | |
| return super(ParallelModel, self).__getattribute__(attrname) | |
| def summary(self, *args, **kwargs): | |
| """Override summary() to display summaries of both, the wrapper | |
| and inner models.""" | |
| super(ParallelModel, self).summary(*args, **kwargs) | |
| self.inner_model.summary(*args, **kwargs) | |
| def make_parallel(self): | |
| """Creates a new wrapper model that consists of multiple replicas of | |
| the original model placed on different GPUs. | |
| """ | |
| # Slice inputs. Slice inputs on the CPU to avoid sending a copy | |
| # of the full inputs to all GPUs. Saves on bandwidth and memory. | |
| input_slices = { | |
| name: tf.split(x, self.gpu_count) | |
| for name, x in zip(self.inner_model.input_names, self.inner_model.inputs) | |
| } | |
| output_names = self.inner_model.output_names | |
| outputs_all = [] | |
| for i in range(len(self.inner_model.outputs)): | |
| outputs_all.append([]) | |
| # Run the model call() on each GPU to place the ops there | |
| for i in range(self.gpu_count): | |
| with tf.device("/gpu:%d" % i): | |
| with tf.name_scope("tower_%d" % i): | |
| # Run a slice of inputs through this replica | |
| zipped_inputs = zip( | |
| self.inner_model.input_names, self.inner_model.inputs | |
| ) | |
| inputs = [ | |
| KL.Lambda( | |
| lambda s: input_slices[name][i], | |
| output_shape=lambda s: (None,) + s[1:], | |
| )(tensor) | |
| for name, tensor in zipped_inputs | |
| ] | |
| # Create the model replica and get the outputs | |
| outputs = self.inner_model(inputs) | |
| if not isinstance(outputs, list): | |
| outputs = [outputs] | |
| # Save the outputs for merging back together later | |
| for l, o in enumerate(outputs): | |
| outputs_all[l].append(o) | |
| # Merge outputs on CPU | |
| with tf.device("/cpu:0"): | |
| merged = [] | |
| for outputs, name in zip(outputs_all, output_names): | |
| # Concatenate or average outputs? | |
| # Outputs usually have a batch dimension and we concatenate | |
| # across it. If they don't, then the output is likely a loss | |
| # or a metric value that gets averaged across the batch. | |
| # Keras expects losses and metrics to be scalars. | |
| if K.int_shape(outputs[0]) == (): | |
| # Average | |
| m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)( | |
| outputs | |
| ) | |
| else: | |
| # Concatenate | |
| m = KL.Concatenate(axis=0, name=name)(outputs) | |
| merged.append(m) | |
| return merged | |
| if __name__ == "__main__": | |
| # Testing code below. It creates a simple model to train on MNIST and | |
| # tries to run it on 2 GPUs. It saves the graph so it can be viewed | |
| # in TensorBoard. Run it as: | |
| # | |
| # python3 parallel_model.py | |
| import os | |
| import keras.optimizers | |
| import numpy as np | |
| from keras.datasets import mnist | |
| from keras.preprocessing.image import ImageDataGenerator | |
| GPU_COUNT = 2 | |
| # Root directory of the project | |
| ROOT_DIR = os.path.abspath("../") | |
| # Directory to save logs and trained model | |
| MODEL_DIR = os.path.join(ROOT_DIR, "logs") | |
| def build_model(x_train, num_classes): | |
| # Reset default graph. Keras leaves old ops in the graph, | |
| # which are ignored for execution but clutter graph | |
| # visualization in TensorBoard. | |
| tf.reset_default_graph() | |
| inputs = KL.Input(shape=x_train.shape[1:], name="input_image") | |
| x = KL.Conv2D(32, (3, 3), activation="relu", padding="same", name="conv1")( | |
| inputs | |
| ) | |
| x = KL.Conv2D(64, (3, 3), activation="relu", padding="same", name="conv2")(x) | |
| x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) | |
| x = KL.Flatten(name="flat1")(x) | |
| x = KL.Dense(128, activation="relu", name="dense1")(x) | |
| x = KL.Dense(num_classes, activation="softmax", name="dense2")(x) | |
| return KM.Model(inputs, x, "digit_classifier_model") | |
| # Load MNIST Data | |
| (x_train, y_train), (x_test, y_test) = mnist.load_data() | |
| x_train = np.expand_dims(x_train, -1).astype("float32") / 255 | |
| x_test = np.expand_dims(x_test, -1).astype("float32") / 255 | |
| print("x_train shape:", x_train.shape) | |
| print("x_test shape:", x_test.shape) | |
| # Build data generator and model | |
| datagen = ImageDataGenerator() | |
| model = build_model(x_train, 10) | |
| # Add multi-GPU support. | |
| model = ParallelModel(model, GPU_COUNT) | |
| optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) | |
| model.compile( | |
| loss="sparse_categorical_crossentropy", | |
| optimizer=optimizer, | |
| metrics=["accuracy"], | |
| ) | |
| model.summary() | |
| # Train | |
| model.fit_generator( | |
| datagen.flow(x_train, y_train, batch_size=64), | |
| steps_per_epoch=50, | |
| epochs=10, | |
| verbose=1, | |
| validation_data=(x_test, y_test), | |
| callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, write_graph=True)], | |
| ) | |