xcczach
/

YAGS

Feature Extraction

Transformers

PyTorch

gpt_sovits

custom_code

Model card Files Files and versions Community

xcczach commited on Oct 10, 2024

Commit

cec8e53

verified ·

1 Parent(s): a858803

Update attentions.py

Browse files

Files changed (1) hide show

attentions.py +12 -12

attentions.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 from torch import nn
 from torch.nn import functional as F
-from . import commons
 from .modules import LayerNorm
@@ -74,7 +74,7 @@ class Encoder(nn.Module):
                 x = self.cond_pre(x)
                 cond_offset = i * 2 * self.hidden_channels
                 g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
-                x = commons.fused_add_tanh_sigmoid_multiply(
                     x, g_l, torch.IntTensor([self.hidden_channels])
                 )
             y = self.attn_layers[i](x, x, attn_mask)
@@ -153,7 +153,7 @@ class Decoder(nn.Module):
         x: decoder input
         h: encoder output
         """
-        self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(
             device=x.device, dtype=x.dtype
         )
         encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
@@ -316,7 +316,7 @@ class MultiHeadAttention(nn.Module):
         if pad_length > 0:
             padded_relative_embeddings = F.pad(
                 relative_embeddings,
-                commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]),
             )
         else:
             padded_relative_embeddings = relative_embeddings
@@ -332,12 +332,12 @@ class MultiHeadAttention(nn.Module):
         """
         batch, heads, length, _ = x.size()
         # Concat columns of pad to shift from relative to absolute indexing.
-        x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]]))
         # Concat extra elements so to add up to shape (len+1, 2*len-1).
         x_flat = x.view([batch, heads, length * 2 * length])
         x_flat = F.pad(
-            x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])
         )
         # Reshape and slice out the padded elements.
@@ -354,11 +354,11 @@ class MultiHeadAttention(nn.Module):
         batch, heads, length, _ = x.size()
         # padd along column
         x = F.pad(
-            x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])
         )
         x_flat = x.view([batch, heads, length**2 + length * (length - 1)])
         # add 0's in the beginning that will skew the elements after reshape
-        x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
         x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:]
         return x_final
@@ -419,7 +419,7 @@ class FFN(nn.Module):
         pad_l = self.kernel_size - 1
         pad_r = 0
         padding = [[0, 0], [0, 0], [pad_l, pad_r]]
-        x = F.pad(x, commons.convert_pad_shape(padding))
         return x
     def _same_padding(self, x):
@@ -428,7 +428,7 @@ class FFN(nn.Module):
         pad_l = (self.kernel_size - 1) // 2
         pad_r = self.kernel_size // 2
         padding = [[0, 0], [0, 0], [pad_l, pad_r]]
-        x = F.pad(x, commons.convert_pad_shape(padding))
         return x
@@ -622,7 +622,7 @@ class FFT(nn.Module):
         if g is not None:
             g = self.cond_layer(g)
-        self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(
             device=x.device, dtype=x.dtype
         )
         x = x * x_mask
@@ -631,7 +631,7 @@ class FFT(nn.Module):
                 x = self.cond_pre(x)
                 cond_offset = i * 2 * self.hidden_channels
                 g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
-                x = commons.fused_add_tanh_sigmoid_multiply(
                     x, g_l, torch.IntTensor([self.hidden_channels])
                 )
             y = self.self_attn_layers[i](x, x, self_attn_mask)

 from torch import nn
 from torch.nn import functional as F
+from .commons import fused_add_tanh_sigmoid_multiply, subsequent_mask,convert_pad_shape
 from .modules import LayerNorm
                 x = self.cond_pre(x)
                 cond_offset = i * 2 * self.hidden_channels
                 g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
+                x = fused_add_tanh_sigmoid_multiply(
                     x, g_l, torch.IntTensor([self.hidden_channels])
                 )
             y = self.attn_layers[i](x, x, attn_mask)
         x: decoder input
         h: encoder output
         """
+        self_attn_mask = subsequent_mask(x_mask.size(2)).to(
             device=x.device, dtype=x.dtype
         )
         encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
         if pad_length > 0:
             padded_relative_embeddings = F.pad(
                 relative_embeddings,
+                convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]),
             )
         else:
             padded_relative_embeddings = relative_embeddings
         """
         batch, heads, length, _ = x.size()
         # Concat columns of pad to shift from relative to absolute indexing.
+        x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]]))
         # Concat extra elements so to add up to shape (len+1, 2*len-1).
         x_flat = x.view([batch, heads, length * 2 * length])
         x_flat = F.pad(
+            x_flat, convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])
         )
         # Reshape and slice out the padded elements.
         batch, heads, length, _ = x.size()
         # padd along column
         x = F.pad(
+            x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])
         )
         x_flat = x.view([batch, heads, length**2 + length * (length - 1)])
         # add 0's in the beginning that will skew the elements after reshape
+        x_flat = F.pad(x_flat, convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
         x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:]
         return x_final
         pad_l = self.kernel_size - 1
         pad_r = 0
         padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+        x = F.pad(x, convert_pad_shape(padding))
         return x
     def _same_padding(self, x):
         pad_l = (self.kernel_size - 1) // 2
         pad_r = self.kernel_size // 2
         padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+        x = F.pad(x, convert_pad_shape(padding))
         return x
         if g is not None:
             g = self.cond_layer(g)
+        self_attn_mask = subsequent_mask(x_mask.size(2)).to(
             device=x.device, dtype=x.dtype
         )
         x = x * x_mask
                 x = self.cond_pre(x)
                 cond_offset = i * 2 * self.hidden_channels
                 g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
+                x = fused_add_tanh_sigmoid_multiply(
                     x, g_l, torch.IntTensor([self.hidden_channels])
                 )
             y = self.self_attn_layers[i](x, x, self_attn_mask)