Spaces:

deanna-emery
/

ASL-MoViNet-T5-translator

Runtime error

App Files Files Community

ASL-MoViNet-T5-translator / official /vision /modeling /heads /dense_prediction_heads.py

deanna-emery

updates

93528c6 about 1 year ago

raw

history blame contribute delete

26 kB

	# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Contains definitions of dense prediction heads."""

	from typing import Any, Dict, List, Mapping, Optional, Union

	# Import libraries

	import numpy as np
	import tensorflow as tf, tf_keras

	from official.modeling import tf_utils


	@tf_keras.utils.register_keras_serializable(package='Vision')
	class RetinaNetHead(tf_keras.layers.Layer):
	"""Creates a RetinaNet head."""

	def __init__(
	self,
	min_level: int,
	max_level: int,
	num_classes: int,
	num_anchors_per_location: int,
	num_convs: int = 4,
	num_filters: int = 256,
	attribute_heads: Optional[List[Dict[str, Any]]] = None,
	share_classification_heads: bool = False,
	use_separable_conv: bool = False,
	activation: str = 'relu',
	use_sync_bn: bool = False,
	norm_momentum: float = 0.99,
	norm_epsilon: float = 0.001,
	kernel_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
	bias_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
	num_params_per_anchor: int = 4,
	share_level_convs: bool = True,
	**kwargs,
	):
	"""Initializes a RetinaNet head.

	Args:
	min_level: An `int` number of minimum feature level.
	max_level: An `int` number of maximum feature level.
	num_classes: An `int` number of classes to predict.
	num_anchors_per_location: An `int` number of anchors per pixel location.
	num_convs: An `int` number that represents the number of the intermediate
	conv layers before the prediction.
	num_filters: An `int` number that represents the number of filters of the
	intermediate conv layers.
	attribute_heads: If not None, a list that contains a dict for each
	additional attribute head. Each dict consists of 4 key-value pairs:
	`name`, `type` ('regression' or 'classification'), `size` (number of
	predicted values for each instance), and `prediction_tower_name`
	(optional, specifies shared prediction towers.)
	share_classification_heads: A `bool` that indicates whether sharing
	weights among the main and attribute classification heads.
	use_separable_conv: A `bool` that indicates whether the separable
	convolution layers is used.
	activation: A `str` that indicates which activation is used, e.g. 'relu',
	'swish', etc.
	use_sync_bn: A `bool` that indicates whether to use synchronized batch
	normalization across different replicas.
	norm_momentum: A `float` of normalization momentum for the moving average.
	norm_epsilon: A `float` added to variance to avoid dividing by zero.
	kernel_regularizer: A `tf_keras.regularizers.Regularizer` object for
	Conv2D. Default is None.
	bias_regularizer: A `tf_keras.regularizers.Regularizer` object for Conv2D.
	num_params_per_anchor: Number of parameters required to specify an anchor
	box. For example, `num_params_per_anchor` would be 4 for axis-aligned
	anchor boxes specified by their y-centers, x-centers, heights, and
	widths.
	share_level_convs: An optional bool to enable sharing convs
	across levels for classnet, boxnet, classifier and box regressor.
	If True, convs will be shared across all levels.
	**kwargs: Additional keyword arguments to be passed.
	"""
	super().__init__(**kwargs)
	self._config_dict = {
	'min_level': min_level,
	'max_level': max_level,
	'num_classes': num_classes,
	'num_anchors_per_location': num_anchors_per_location,
	'num_convs': num_convs,
	'num_filters': num_filters,
	'attribute_heads': attribute_heads,
	'share_classification_heads': share_classification_heads,
	'use_separable_conv': use_separable_conv,
	'activation': activation,
	'use_sync_bn': use_sync_bn,
	'norm_momentum': norm_momentum,
	'norm_epsilon': norm_epsilon,
	'kernel_regularizer': kernel_regularizer,
	'bias_regularizer': bias_regularizer,
	'num_params_per_anchor': num_params_per_anchor,
	'share_level_convs': share_level_convs,
	}

	if tf_keras.backend.image_data_format() == 'channels_last':
	self._bn_axis = -1
	else:
	self._bn_axis = 1
	self._activation = tf_utils.get_activation(activation)

	self._conv_kwargs = {
	'filters': self._config_dict['num_filters'],
	'kernel_size': 3,
	'padding': 'same',
	'bias_initializer': tf.zeros_initializer(),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if not self._config_dict['use_separable_conv']:
	self._conv_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(stddev=0.01),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})

	self._bn_kwargs = {
	'axis': self._bn_axis,
	'momentum': self._config_dict['norm_momentum'],
	'epsilon': self._config_dict['norm_epsilon'],
	}

	self._classifier_kwargs = {
	'filters': (
	self._config_dict['num_classes']
	* self._config_dict['num_anchors_per_location']
	),
	'kernel_size': 3,
	'padding': 'same',
	'bias_initializer': tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if not self._config_dict['use_separable_conv']:
	self._classifier_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(stddev=1e-5),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})

	self._box_regressor_kwargs = {
	'filters': (
	self._config_dict['num_params_per_anchor']
	* self._config_dict['num_anchors_per_location']
	),
	'kernel_size': 3,
	'padding': 'same',
	'bias_initializer': tf.zeros_initializer(),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if not self._config_dict['use_separable_conv']:
	self._box_regressor_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(stddev=1e-5),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})

	if self._config_dict['attribute_heads']:
	self._init_attribute_kwargs()

	def _conv_kwargs_new_kernel_init(self, conv_kwargs):
	if 'kernel_initializer' in conv_kwargs:
	conv_kwargs['kernel_initializer'] = tf_utils.clone_initializer(
	conv_kwargs['kernel_initializer']
	)
	if 'pointwise_initializer' in conv_kwargs:
	conv_kwargs['pointwise_initializer'] = tf_utils.clone_initializer(
	conv_kwargs['pointwise_initializer']
	)
	if 'depthwise_initializer' in conv_kwargs:
	conv_kwargs['depthwise_initializer'] = tf_utils.clone_initializer(
	conv_kwargs['depthwise_initializer']
	)
	return conv_kwargs

	def _init_attribute_kwargs(self):
	self._attribute_kwargs = []
	for att_config in self._config_dict['attribute_heads']:
	att_type = att_config['type']
	att_size = att_config['size']
	att_prediction_tower_name = att_config['prediction_tower_name']

	att_predictor_kwargs = {
	'filters': att_size * self._config_dict['num_anchors_per_location'],
	'kernel_size': 3,
	'padding': 'same',
	'bias_initializer': tf.zeros_initializer(),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if att_type == 'regression':
	att_predictor_kwargs.update(
	{'bias_initializer': tf.zeros_initializer()}
	)
	elif att_type == 'classification':
	att_predictor_kwargs.update(
	{
	'bias_initializer': tf.constant_initializer(
	-np.log((1 - 0.01) / 0.01)
	)
	}
	)
	else:
	raise ValueError(
	'Attribute head type {} not supported.'.format(att_type)
	)

	if (
	att_prediction_tower_name
	and self._config_dict['share_classification_heads']
	):
	raise ValueError(
	'share_classification_heads cannot be set as True when'
	' att_prediction_tower_name is specified.'
	)

	if not self._config_dict['use_separable_conv']:
	att_predictor_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(
	stddev=1e-5
	),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})
	self._attribute_kwargs.append(att_predictor_kwargs)

	def _apply_prediction_tower(self, features, convs, norms) -> tf.Tensor:
	x = features
	for conv, norm in zip(convs, norms):
	x = conv(x)
	x = norm(x)
	x = self._activation(x)
	return x

	def _apply_attribute_net(
	self, attributes, level, level_idx, this_level_features, classnet_x
	):
	prediction_tower_output = {}
	for att_config in self._config_dict['attribute_heads']:
	att_name = att_config['name']
	att_type = att_config['type']
	if (
	self._config_dict['share_classification_heads']
	and att_type == 'classification'
	):
	attributes[att_name][str(level)] = self._att_predictors[att_name](
	classnet_x
	)
	else:

	def _apply_attribute_prediction_tower(
	atttribute_name, features, feature_level
	):
	return self._apply_prediction_tower(
	features,
	self._att_convs[atttribute_name],
	self._att_norms[atttribute_name][feature_level],
	)

	prediction_tower_name = att_config['prediction_tower_name']
	if not prediction_tower_name:
	attributes[att_name][str(level)] = self._att_predictors[att_name](
	_apply_attribute_prediction_tower(
	att_name, this_level_features, level_idx
	)
	)
	else:
	if prediction_tower_name not in prediction_tower_output:
	prediction_tower_output[prediction_tower_name] = (
	_apply_attribute_prediction_tower(
	att_name, this_level_features, level_idx
	)
	)
	attributes[att_name][str(level)] = self._att_predictors[att_name](
	prediction_tower_output[prediction_tower_name]
	)

	def _build_prediction_tower(
	self, net_name, predictor_name, conv_op, bn_op, predictor_kwargs
	):
	"""Builds the prediction tower. Convs across levels can be shared or not."""
	convs = []
	norms = []
	for level in range(
	self._config_dict['min_level'], self._config_dict['max_level'] + 1
	):
	if not self._config_dict['share_level_convs']:
	this_level_convs = []
	this_level_norms = []
	for i in range(self._config_dict['num_convs']):
	conv_kwargs = self._conv_kwargs_new_kernel_init(self._conv_kwargs)
	if not self._config_dict['share_level_convs']:
	# Do not share convs.
	this_level_convs.append(
	conv_op(name=f'{net_name}-conv_{level}_{i}', **conv_kwargs)
	)
	elif level == self._config_dict['min_level']:
	convs.append(conv_op(name=f'{net_name}-conv_{i}', **conv_kwargs))
	this_level_norms.append(
	bn_op(name=f'{net_name}-conv-norm_{level}_{i}', **self._bn_kwargs)
	)
	norms.append(this_level_norms)
	if not self._config_dict['share_level_convs']:
	convs.append(this_level_convs)

	# Create predictors after additional convs.
	if self._config_dict['share_level_convs']:
	predictors = conv_op(name=predictor_name, **predictor_kwargs)
	else:
	predictors = []
	for level in range(
	self._config_dict['min_level'], self._config_dict['max_level'] + 1
	):
	predictor_kwargs = self._conv_kwargs_new_kernel_init(predictor_kwargs)
	predictors.append(
	conv_op(name=f'{predictor_name}-{level}', **predictor_kwargs)
	)

	return convs, norms, predictors

	def _build_attribute_net(self, conv_op, bn_op):
	self._att_predictors = {}
	self._att_convs = {}
	self._att_norms = {}

	for att_config, att_predictor_kwargs in zip(
	self._config_dict['attribute_heads'], self._attribute_kwargs
	):
	att_name = att_config['name']
	att_num_convs = (
	att_config.get('num_convs') or self._config_dict['num_convs']
	)
	att_num_filters = (
	att_config.get('num_filters') or self._config_dict['num_filters']
	)
	if att_num_convs < 0:
	raise ValueError(f'Invalid `num_convs` {att_num_convs} for {att_name}.')
	if att_num_filters < 0:
	raise ValueError(
	f'Invalid `num_filters` {att_num_filters} for {att_name}.'
	)
	att_conv_kwargs = self._conv_kwargs.copy()
	att_conv_kwargs['filters'] = att_num_filters
	att_convs_i = []
	att_norms_i = []

	# Build conv and norm layers.
	for level in range(
	self._config_dict['min_level'], self._config_dict['max_level'] + 1
	):
	this_level_att_norms = []
	for i in range(att_num_convs):
	if level == self._config_dict['min_level']:
	att_conv_name = '{}-conv_{}'.format(att_name, i)
	att_convs_i.append(conv_op(name=att_conv_name, **att_conv_kwargs))
	att_norm_name = '{}-conv-norm_{}_{}'.format(att_name, level, i)
	this_level_att_norms.append(
	bn_op(name=att_norm_name, **self._bn_kwargs)
	)
	att_norms_i.append(this_level_att_norms)
	self._att_convs[att_name] = att_convs_i
	self._att_norms[att_name] = att_norms_i

	# Build the final prediction layer.
	self._att_predictors[att_name] = conv_op(
	name='{}_attributes'.format(att_name), **att_predictor_kwargs
	)

	def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
	"""Creates the variables of the head."""
	conv_op = (
	tf_keras.layers.SeparableConv2D
	if self._config_dict['use_separable_conv']
	else tf_keras.layers.Conv2D
	)
	bn_op = (
	tf_keras.layers.experimental.SyncBatchNormalization
	if self._config_dict['use_sync_bn']
	else tf_keras.layers.BatchNormalization
	)

	# Class net.
	self._cls_convs, self._cls_norms, self._classifier = (
	self._build_prediction_tower(
	'classnet', 'scores', conv_op, bn_op, self._classifier_kwargs
	)
	)

	# Box net.
	self._box_convs, self._box_norms, self._box_regressor = (
	self._build_prediction_tower(
	'boxnet', 'boxes', conv_op, bn_op, self._box_regressor_kwargs
	)
	)

	# Attribute learning nets.
	if self._config_dict['attribute_heads']:
	self._build_attribute_net(conv_op, bn_op)

	super().build(input_shape)

	def call(self, features: Mapping[str, tf.Tensor]):
	"""Forward pass of the RetinaNet head.

	Args:
	features: A `dict` of `tf.Tensor` where
	- key: A `str` of the level of the multilevel features.
	- values: A `tf.Tensor`, the feature map tensors, whose shape is
	[batch, height_l, width_l, channels].

	Returns:
	scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
	- key: A `str` of the level of the multilevel predictions.
	- values: A `tf.Tensor` of the box scores predicted from a particular
	feature level, whose shape is
	[batch, height_l, width_l, num_classes * num_anchors_per_location].
	boxes: A `dict` of `tf.Tensor` which includes coordinates of the
	predictions.
	- key: A `str` of the level of the multilevel predictions.
	- values: A `tf.Tensor` of the box scores predicted from a particular
	feature level, whose shape is
	[batch, height_l, width_l,
	num_params_per_anchor * num_anchors_per_location].
	attributes: a dict of (attribute_name, attribute_prediction). Each
	`attribute_prediction` is a dict of:
	- key: `str`, the level of the multilevel predictions.
	- values: `Tensor`, the box scores predicted from a particular feature
	level, whose shape is
	[batch, height_l, width_l,
	attribute_size * num_anchors_per_location].
	Can be an empty dictionary if no attribute learning is required.
	"""
	scores = {}
	boxes = {}
	if self._config_dict['attribute_heads']:
	attributes = {
	att_config['name']: {}
	for att_config in self._config_dict['attribute_heads']
	}
	else:
	attributes = {}

	for i, level in enumerate(
	range(self._config_dict['min_level'],
	self._config_dict['max_level'] + 1)):
	this_level_features = features[str(level)]

	if self._config_dict['share_level_convs']:
	cls_convs = self._cls_convs
	box_convs = self._box_convs
	classifier = self._classifier
	box_regressor = self._box_regressor
	else:
	cls_convs = self._cls_convs[i]
	box_convs = self._box_convs[i]
	classifier = self._classifier[i]
	box_regressor = self._box_regressor[i]

	# Apply class net.
	x = self._apply_prediction_tower(
	this_level_features, cls_convs, self._cls_norms[i]
	)
	scores[str(level)] = classifier(x)
	classnet_x = x

	# Apply box net.
	x = self._apply_prediction_tower(
	this_level_features, box_convs, self._box_norms[i]
	)
	boxes[str(level)] = box_regressor(x)

	# Apply attribute nets.
	if self._config_dict['attribute_heads']:
	self._apply_attribute_net(
	attributes, level, i, this_level_features, classnet_x
	)

	return scores, boxes, attributes

	def get_config(self):
	return self._config_dict

	@classmethod
	def from_config(cls, config):
	return cls(**config)


	@tf_keras.utils.register_keras_serializable(package='Vision')
	class RPNHead(tf_keras.layers.Layer):
	"""Creates a Region Proposal Network (RPN) head."""

	def __init__(
	self,
	min_level: int,
	max_level: int,
	num_anchors_per_location: int,
	num_convs: int = 1,
	num_filters: int = 256,
	use_separable_conv: bool = False,
	activation: str = 'relu',
	use_sync_bn: bool = False,
	norm_momentum: float = 0.99,
	norm_epsilon: float = 0.001,
	kernel_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
	bias_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
	**kwargs):
	"""Initializes a Region Proposal Network head.

	Args:
	min_level: An `int` number of minimum feature level.
	max_level: An `int` number of maximum feature level.
	num_anchors_per_location: An `int` number of number of anchors per pixel
	location.
	num_convs: An `int` number that represents the number of the intermediate
	convolution layers before the prediction.
	num_filters: An `int` number that represents the number of filters of the
	intermediate convolution layers.
	use_separable_conv: A `bool` that indicates whether the separable
	convolution layers is used.
	activation: A `str` that indicates which activation is used, e.g. 'relu',
	'swish', etc.
	use_sync_bn: A `bool` that indicates whether to use synchronized batch
	normalization across different replicas.
	norm_momentum: A `float` of normalization momentum for the moving average.
	norm_epsilon: A `float` added to variance to avoid dividing by zero.
	kernel_regularizer: A `tf_keras.regularizers.Regularizer` object for
	Conv2D. Default is None.
	bias_regularizer: A `tf_keras.regularizers.Regularizer` object for Conv2D.
	**kwargs: Additional keyword arguments to be passed.
	"""
	super(RPNHead, self).__init__(**kwargs)
	self._config_dict = {
	'min_level': min_level,
	'max_level': max_level,
	'num_anchors_per_location': num_anchors_per_location,
	'num_convs': num_convs,
	'num_filters': num_filters,
	'use_separable_conv': use_separable_conv,
	'activation': activation,
	'use_sync_bn': use_sync_bn,
	'norm_momentum': norm_momentum,
	'norm_epsilon': norm_epsilon,
	'kernel_regularizer': kernel_regularizer,
	'bias_regularizer': bias_regularizer,
	}

	if tf_keras.backend.image_data_format() == 'channels_last':
	self._bn_axis = -1
	else:
	self._bn_axis = 1
	self._activation = tf_utils.get_activation(activation)

	def build(self, input_shape):
	"""Creates the variables of the head."""
	conv_op = (tf_keras.layers.SeparableConv2D
	if self._config_dict['use_separable_conv']
	else tf_keras.layers.Conv2D)
	conv_kwargs = {
	'filters': self._config_dict['num_filters'],
	'kernel_size': 3,
	'padding': 'same',
	'bias_initializer': tf.zeros_initializer(),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if not self._config_dict['use_separable_conv']:
	conv_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(
	stddev=0.01),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})
	bn_op = (tf_keras.layers.experimental.SyncBatchNormalization
	if self._config_dict['use_sync_bn']
	else tf_keras.layers.BatchNormalization)
	bn_kwargs = {
	'axis': self._bn_axis,
	'momentum': self._config_dict['norm_momentum'],
	'epsilon': self._config_dict['norm_epsilon'],
	}

	self._convs = []
	self._norms = []
	for level in range(
	self._config_dict['min_level'], self._config_dict['max_level'] + 1):
	this_level_norms = []
	for i in range(self._config_dict['num_convs']):
	if level == self._config_dict['min_level']:
	conv_name = 'rpn-conv_{}'.format(i)
	if 'kernel_initializer' in conv_kwargs:
	conv_kwargs['kernel_initializer'] = tf_utils.clone_initializer(
	conv_kwargs['kernel_initializer'])
	self._convs.append(conv_op(name=conv_name, **conv_kwargs))
	norm_name = 'rpn-conv-norm_{}_{}'.format(level, i)
	this_level_norms.append(bn_op(name=norm_name, **bn_kwargs))
	self._norms.append(this_level_norms)

	classifier_kwargs = {
	'filters': self._config_dict['num_anchors_per_location'],
	'kernel_size': 1,
	'padding': 'valid',
	'bias_initializer': tf.zeros_initializer(),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if not self._config_dict['use_separable_conv']:
	classifier_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(
	stddev=1e-5),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})
	self._classifier = conv_op(name='rpn-scores', **classifier_kwargs)

	box_regressor_kwargs = {
	'filters': 4 * self._config_dict['num_anchors_per_location'],
	'kernel_size': 1,
	'padding': 'valid',
	'bias_initializer': tf.zeros_initializer(),
	'bias_regularizer': self._config_dict['bias_regularizer'],
	}
	if not self._config_dict['use_separable_conv']:
	box_regressor_kwargs.update({
	'kernel_initializer': tf_keras.initializers.RandomNormal(
	stddev=1e-5),
	'kernel_regularizer': self._config_dict['kernel_regularizer'],
	})
	self._box_regressor = conv_op(name='rpn-boxes', **box_regressor_kwargs)

	super(RPNHead, self).build(input_shape)

	def call(self, features: Mapping[str, tf.Tensor]):
	"""Forward pass of the RPN head.

	Args:
	features: A `dict` of `tf.Tensor` where
	- key: A `str` of the level of the multilevel features.
	- values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
	height_l, width_l, channels].

	Returns:
	scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
	- key: A `str` of the level of the multilevel predictions.
	- values: A `tf.Tensor` of the box scores predicted from a particular
	feature level, whose shape is
	[batch, height_l, width_l, num_classes * num_anchors_per_location].
	boxes: A `dict` of `tf.Tensor` which includes coordinates of the
	predictions.
	- key: A `str` of the level of the multilevel predictions.
	- values: A `tf.Tensor` of the box scores predicted from a particular
	feature level, whose shape is
	[batch, height_l, width_l, 4 * num_anchors_per_location].
	"""
	scores = {}
	boxes = {}
	for i, level in enumerate(
	range(self._config_dict['min_level'],
	self._config_dict['max_level'] + 1)):
	x = features[str(level)]
	for conv, norm in zip(self._convs, self._norms[i]):
	x = conv(x)
	x = norm(x)
	x = self._activation(x)
	scores[str(level)] = self._classifier(x)
	boxes[str(level)] = self._box_regressor(x)
	return scores, boxes

	def get_config(self):
	return self._config_dict

	@classmethod
	def from_config(cls, config):
	return cls(**config)