File size: 4,289 Bytes
9383bb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import torch
import torch.nn as nn
from typing import Callable
class RBFLayer(nn.Module):
def __init__(self,
in_features_dim: int,
num_kernels: int,
out_features_dim: int,
radial_function: Callable[[torch.Tensor], torch.Tensor],
norm_function: Callable[[torch.Tensor], torch.Tensor],
normalization: bool = True,
initial_shape_parameter: torch.Tensor = None,
initial_centers_parameter: torch.Tensor = None,
initial_weights_parameters: torch.Tensor = None,
constant_shape_parameter: bool = False,
constant_centers_parameter: bool = False,
constant_weights_parameters: bool = False):
super(RBFLayer, self).__init__()
self.in_features_dim = in_features_dim
self.num_kernels = num_kernels
self.out_features_dim = out_features_dim
self.radial_function = radial_function
self.norm_function = norm_function
self.normalization = normalization
self.initial_shape_parameter = initial_shape_parameter
self.constant_shape_parameter = constant_shape_parameter
self.initial_centers_parameter = initial_centers_parameter
self.constant_centers_parameter = constant_centers_parameter
self.initial_weights_parameters = initial_weights_parameters
self.constant_weights_parameters = constant_weights_parameters
self._make_parameters()
def _make_parameters(self) -> None:
# Initialize linear combination weights
if self.constant_weights_parameters:
self.weights = nn.Parameter(self.initial_weights_parameters, requires_grad=False)
else:
self.weights = nn.Parameter(torch.zeros(self.out_features_dim, self.num_kernels, dtype=torch.float32))
# Initialize kernels' centers
if self.constant_centers_parameter:
self.kernels_centers = nn.Parameter(self.initial_centers_parameter, requires_grad=False)
else:
self.kernels_centers = nn.Parameter(torch.zeros(self.num_kernels, self.in_features_dim, dtype=torch.float32))
# Initialize shape parameter
if self.constant_shape_parameter:
self.log_shapes = nn.Parameter(self.initial_shape_parameter, requires_grad=False)
else:
self.log_shapes = nn.Parameter(torch.zeros(self.num_kernels, dtype=torch.float32))
self.reset()
def reset(self, upper_bound_kernels: float = 1.0, std_shapes: float = 0.1, gain_weights: float = 1.0) -> None:
if self.initial_centers_parameter is None:
nn.init.uniform_(self.kernels_centers, a=-upper_bound_kernels, b=upper_bound_kernels)
if self.initial_shape_parameter is None:
nn.init.normal_(self.log_shapes, mean=0.0, std=std_shapes)
if self.initial_weights_parameters is None:
nn.init.xavier_uniform_(self.weights, gain=gain_weights)
def forward(self, input: torch.Tensor) -> torch.Tensor:
"""
Computes the output of the RBF layer given an input tensor.
Input has size [batch_size, sequence_length, in_features].
"""
batch_size = input.size(0)
sequence_length = input.size(1)
# Expand centers to match the batch and sequence length
c = self.kernels_centers.expand(batch_size, sequence_length, self.num_kernels, self.in_features_dim)
# Compute differences between input and centers
diff = input.unsqueeze(2) - c # Shape: [batch_size, sequence_length, num_kernels, in_features_dim]
# Apply norm function to get distances
r = self.norm_function(diff) # Shape: [batch_size, sequence_length, num_kernels]
# Apply shape parameters (log_shapes) to the distances
eps_r = self.log_shapes.exp().unsqueeze(0).unsqueeze(0) * r
# Apply radial basis function (e.g., Gaussian)
rbfs = self.radial_function(eps_r)
if self.normalization:
rbfs = rbfs / (1e-9 + rbfs.sum(dim=-1, keepdim=True))
# Combine RBF outputs using the weights
out = (self.weights.unsqueeze(0).unsqueeze(0) * rbfs.unsqueeze(2)).sum(dim=-1)
return out
|