|
from typing import Optional, Tuple |
|
|
|
import numpy as np |
|
import torch |
|
from diffusers.utils.torch_utils import randn_tensor |
|
|
|
|
|
class DiagonalGaussianDistribution(object): |
|
def __init__(self, parameters: torch.Tensor, deterministic: bool = False, _dim: int = 1): |
|
|
|
self.parameters = parameters |
|
self.mean, self.logvar = torch.chunk(parameters, 2, dim=_dim) |
|
self.logvar = torch.clamp(self.logvar, -30.0, 20.0) |
|
self.deterministic = deterministic |
|
self.std = torch.exp(0.5 * self.logvar) |
|
self.var = torch.exp(self.logvar) |
|
if self.deterministic: |
|
self.var = self.std = torch.zeros_like( |
|
self.mean, device=self.parameters.device, dtype=self.parameters.dtype |
|
) |
|
|
|
def sample(self, generator: Optional[torch.Generator] = None) -> torch.Tensor: |
|
|
|
sample = randn_tensor( |
|
self.mean.shape, |
|
generator=generator, |
|
device=self.parameters.device, |
|
dtype=self.parameters.dtype, |
|
) |
|
x = self.mean + self.std * sample |
|
return x |
|
|
|
def kl(self, other: "DiagonalGaussianDistribution" = None) -> torch.Tensor: |
|
if self.deterministic: |
|
return torch.Tensor([0.0]) |
|
else: |
|
if other is None: |
|
return 0.5 * torch.sum( |
|
torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, |
|
dim=[1, 2, 3], |
|
) |
|
else: |
|
return 0.5 * torch.sum( |
|
torch.pow(self.mean - other.mean, 2) / other.var |
|
+ self.var / other.var |
|
- 1.0 |
|
- self.logvar |
|
+ other.logvar, |
|
dim=[1, 2, 3], |
|
) |
|
|
|
def nll(self, sample: torch.Tensor, dims: Tuple[int, ...] = [1, 2, 3]) -> torch.Tensor: |
|
if self.deterministic: |
|
return torch.Tensor([0.0]) |
|
logtwopi = np.log(2.0 * np.pi) |
|
return 0.5 * torch.sum( |
|
logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, |
|
dim=dims, |
|
) |
|
|
|
def mode(self) -> torch.Tensor: |
|
return self.mean |
|
|
|
|
|
@torch.no_grad() |
|
def _expand_linear_with_zeroed_weights( |
|
module: torch.nn.Linear, new_in_features: Optional[int] = None, new_out_features: Optional[int] = None |
|
) -> torch.nn.Linear: |
|
if new_in_features is None: |
|
new_in_features = module.in_features |
|
if new_out_features is None: |
|
new_out_features = module.out_features |
|
bias = getattr(module, "bias", None) |
|
new_module = torch.nn.Linear(new_in_features, new_out_features, bias=bias is not None) |
|
new_module.to(device=module.weight.device, dtype=module.weight.dtype) |
|
new_module.weight.zero_() |
|
new_module.weight.data[: module.weight.data.shape[0], : module.weight.data.shape[1]].copy_(module.weight.data) |
|
if bias is not None: |
|
new_module.bias.zero_() |
|
new_module.bias.data[: bias.data.shape[0]].copy_(bias.data) |
|
return new_module |
|
|
|
|
|
@torch.no_grad() |
|
def _expand_conv3d_with_zeroed_weights( |
|
module: torch.nn.Linear, new_in_channels: Optional[int] = None, new_out_channels: Optional[int] = None |
|
) -> torch.nn.Conv3d: |
|
if new_in_channels is None: |
|
new_in_channels = module.in_channels |
|
if new_out_channels is None: |
|
new_out_channels = module.out_channels |
|
bias = getattr(module, "bias", None) |
|
new_module = torch.nn.Conv3d( |
|
new_in_channels, |
|
new_out_channels, |
|
kernel_size=module.kernel_size, |
|
stride=module.stride, |
|
padding=module.padding, |
|
dilation=module.dilation, |
|
groups=module.groups, |
|
bias=bias is not None, |
|
) |
|
new_module.to(device=module.weight.device, dtype=module.weight.dtype) |
|
new_module.weight.zero_() |
|
new_module.weight.data[: module.weight.data.shape[0], : module.weight.data.shape[1]].copy_(module.weight.data) |
|
if bias is not None: |
|
new_module.bias.zero_() |
|
new_module.bias.data[: bias.data.shape[0]].copy_(bias.data) |
|
return new_module |
|
|