|
|
|
""" |
|
MIT License |
|
|
|
Copyright (c) 2023 Shivam Mehta |
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
|
of this software and associated documentation files (the "Software"), to deal |
|
in the Software without restriction, including without limitation the rights |
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
copies of the Software, and to permit persons to whom the Software is |
|
furnished to do so, subject to the following conditions: |
|
|
|
The above copyright notice and this permission notice shall be included in all |
|
copies or substantial portions of the Software. |
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
SOFTWARE. |
|
""" |
|
|
|
import math |
|
from typing import Optional |
|
|
|
import torch |
|
import torch.nn as nn |
|
import torch.nn.functional as F |
|
|
|
from diffusers.models.activations import get_activation |
|
|
|
|
|
class SinusoidalPosEmb(torch.nn.Module): |
|
def __init__(self, dim): |
|
super().__init__() |
|
self.dim = dim |
|
assert self.dim % 2 == 0, "SinusoidalPosEmb requires dim to be even" |
|
|
|
def forward(self, x, scale=1000): |
|
if x.ndim < 1: |
|
x = x.unsqueeze(0) |
|
device = x.device |
|
half_dim = self.dim // 2 |
|
emb = math.log(10000) / (half_dim - 1) |
|
emb = torch.exp(torch.arange(half_dim, device=device).float() * -emb) |
|
emb = scale * x.unsqueeze(1) * emb.unsqueeze(0) |
|
emb = torch.cat((emb.sin(), emb.cos()), dim=-1) |
|
return emb |
|
|
|
|
|
class Block1D(torch.nn.Module): |
|
def __init__(self, dim, dim_out, groups=8): |
|
super().__init__() |
|
self.block = torch.nn.Sequential( |
|
torch.nn.Conv1d(dim, dim_out, 3, padding=1), |
|
torch.nn.GroupNorm(groups, dim_out), |
|
nn.Mish(), |
|
) |
|
|
|
def forward(self, x, mask): |
|
output = self.block(x * mask) |
|
return output * mask |
|
|
|
|
|
class ResnetBlock1D(torch.nn.Module): |
|
def __init__(self, dim, dim_out, time_emb_dim, groups=8): |
|
super().__init__() |
|
self.mlp = torch.nn.Sequential( |
|
nn.Mish(), torch.nn.Linear(time_emb_dim, dim_out) |
|
) |
|
|
|
self.block1 = Block1D(dim, dim_out, groups=groups) |
|
self.block2 = Block1D(dim_out, dim_out, groups=groups) |
|
|
|
self.res_conv = torch.nn.Conv1d(dim, dim_out, 1) |
|
|
|
def forward(self, x, mask, time_emb): |
|
h = self.block1(x, mask) |
|
h += self.mlp(time_emb).unsqueeze(-1) |
|
h = self.block2(h, mask) |
|
output = h + self.res_conv(x * mask) |
|
return output |
|
|
|
|
|
class Downsample1D(nn.Module): |
|
def __init__(self, dim): |
|
super().__init__() |
|
self.conv = torch.nn.Conv1d(dim, dim, 3, 2, 1) |
|
|
|
def forward(self, x): |
|
return self.conv(x) |
|
|
|
|
|
class TimestepEmbedding(nn.Module): |
|
def __init__( |
|
self, |
|
in_channels: int, |
|
time_embed_dim: int, |
|
act_fn: str = "silu", |
|
out_dim: int = None, |
|
post_act_fn: Optional[str] = None, |
|
cond_proj_dim=None, |
|
): |
|
super().__init__() |
|
|
|
self.linear_1 = nn.Linear(in_channels, time_embed_dim) |
|
|
|
if cond_proj_dim is not None: |
|
self.cond_proj = nn.Linear(cond_proj_dim, in_channels, bias=False) |
|
else: |
|
self.cond_proj = None |
|
|
|
self.act = get_activation(act_fn) |
|
|
|
if out_dim is not None: |
|
time_embed_dim_out = out_dim |
|
else: |
|
time_embed_dim_out = time_embed_dim |
|
self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim_out) |
|
|
|
if post_act_fn is None: |
|
self.post_act = None |
|
else: |
|
self.post_act = get_activation(post_act_fn) |
|
|
|
def forward(self, sample, condition=None): |
|
if condition is not None: |
|
sample = sample + self.cond_proj(condition) |
|
sample = self.linear_1(sample) |
|
|
|
if self.act is not None: |
|
sample = self.act(sample) |
|
|
|
sample = self.linear_2(sample) |
|
|
|
if self.post_act is not None: |
|
sample = self.post_act(sample) |
|
return sample |
|
|
|
|
|
class Upsample1D(nn.Module): |
|
"""A 1D upsampling layer with an optional convolution. |
|
|
|
Parameters: |
|
channels (`int`): |
|
number of channels in the inputs and outputs. |
|
use_conv (`bool`, default `False`): |
|
option to use a convolution. |
|
use_conv_transpose (`bool`, default `False`): |
|
option to use a convolution transpose. |
|
out_channels (`int`, optional): |
|
number of output channels. Defaults to `channels`. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
channels, |
|
use_conv=False, |
|
use_conv_transpose=True, |
|
out_channels=None, |
|
name="conv", |
|
): |
|
super().__init__() |
|
self.channels = channels |
|
self.out_channels = out_channels or channels |
|
self.use_conv = use_conv |
|
self.use_conv_transpose = use_conv_transpose |
|
self.name = name |
|
|
|
self.conv = None |
|
if use_conv_transpose: |
|
self.conv = nn.ConvTranspose1d(channels, self.out_channels, 4, 2, 1) |
|
elif use_conv: |
|
self.conv = nn.Conv1d(self.channels, self.out_channels, 3, padding=1) |
|
|
|
def forward(self, inputs): |
|
assert inputs.shape[1] == self.channels |
|
if self.use_conv_transpose: |
|
return self.conv(inputs) |
|
|
|
outputs = F.interpolate(inputs, scale_factor=2.0, mode="nearest") |
|
|
|
if self.use_conv: |
|
outputs = self.conv(outputs) |
|
|
|
return outputs |
|
|