InstaDeepAI
/

segment_enformer

@@ -1,13 +1,345 @@
-from typing import Any, Dict, List
 import torch
 from einops import rearrange
 from enformer_pytorch import Enformer
 from transformers import PretrainedConfig, PreTrainedModel
-from genomics_research.segmentnt.porting_to_pytorch.layers.segmentation_head import (
-    TorchUNetHead,
-)
 FEATURES = [
     "protein_coding_gene",
@@ -35,7 +367,7 @@ class SegmentEnformerConfig(PretrainedConfig):
         features: List[str] = FEATURES,
         embed_dim: int = 1536,
         dim_divisible_by: int = 128,
-        **kwargs: Dict[str, Any]
     ) -> None:
         self.features = features
         self.embed_dim = embed_dim

+from typing import Any, Callable, Dict, List, Optional, Tuple
 import torch
+import torch.nn as nn
 from einops import rearrange
 from enformer_pytorch import Enformer
 from transformers import PretrainedConfig, PreTrainedModel
+def get_activation_fn(activation_name: str) -> Callable:
+    """
+    Returns torch activation function
+    Args:
+        activation_name (str): Name of the activation function. Possible values are
+            'swish', 'relu', 'gelu', 'sin'
+    Raises:
+        ValueError: If activation_name is not supported
+    Returns:
+        Callable: Activation function
+    """
+    if activation_name == "swish":
+        return nn.functional.silu  # type: ignore
+    elif activation_name == "relu":
+        return nn.functional.relu  # type: ignore
+    elif activation_name == "gelu":
+        return nn.functional.gelu  # type: ignore
+    elif activation_name == "sin":
+        return torch.sin  # type: ignore
+    else:
+        raise ValueError(f"Unsupported activation function: {activation_name}")
+class TorchDownSample1D(nn.Module):
+    """
+    Torch adaptation of DownSample1D in trix.layers.heads.unet_segmentation_head.py
+    """
+    def __init__(
+        self,
+        input_channels: int,
+        output_channels: int,
+        activation_fn: str = "swish",
+        num_layers: int = 2,
+    ):
+        """
+        Args:
+            input_channels: number of input channels
+            output_channels: number of output channels.
+            activation_fn: name of the activation function to use.
+                Should be one of "gelu",
+                "gelu-no-approx", "relu", "swish", "silu", "sin".
+            num_layers: number of convolution layers.
+        """
+        super().__init__()
+        self.conv_layers = nn.ModuleList(
+            [
+                nn.Conv1d(
+                    in_channels=input_channels if i == 0 else output_channels,
+                    out_channels=output_channels,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.avg_pool = nn.AvgPool1d(kernel_size=2, stride=2, padding=0)
+        self.activation_fn: Callable = get_activation_fn(activation_fn)
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        for conv_layer in self.conv_layers:
+            x = self.activation_fn(conv_layer(x))
+        hidden = x
+        x = self.avg_pool(hidden)
+        return x, hidden
+class TorchUpSample1D(nn.Module):
+    """
+    Torch adaptation of UpSample1D in trix.layers.heads.unet_segmentation_head.py
+    """
+    def __init__(
+        self,
+        input_channels: int,
+        output_channels: int,
+        activation_fn: str = "swish",
+        num_layers: int = 2,
+        interpolation_method: str = "nearest",
+    ):
+        """
+        Args:
+            input_channels: number of input channels.
+            output_channels: number of output channels.
+            activation_fn: name of the activation function to use.
+                Should be one of "gelu",
+                "gelu-no-approx", "relu", "swish", "silu", "sin".
+            interpolation_method: Method to be used for upsampling interpolation.
+                Should be one of "nearest", "linear", "cubic", "lanczos3", "lanczos5".
+            num_layers: number of convolution layers.
+        """
+        super().__init__()
+        self.conv_transpose_layers = nn.ModuleList(
+            [
+                nn.ConvTranspose1d(
+                    in_channels=input_channels if i == 0 else output_channels,
+                    out_channels=output_channels,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.interpolation_mode = interpolation_method
+        self.activation_fn: Callable = get_activation_fn(activation_fn)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        for conv_layer in self.conv_transpose_layers:
+            x = self.activation_fn(conv_layer(x))
+        x = nn.functional.interpolate(
+            x,
+            scale_factor=2,
+            mode=self.interpolation_mode,
+            align_corners=False if self.interpolation_mode != "nearest" else None,
+        )
+        return x
+class TorchFinalConv1D(nn.Module):
+    """
+    Torch adaptation of FinalConv1D in trix.layers.heads.unet_segmentation_head.py
+    """
+    def __init__(
+        self,
+        input_channels: int,
+        output_channels: int,
+        activation_fn: str = "swish",
+        num_layers: int = 2,
+    ):
+        """
+        Args:
+            input_channels: number of input channels
+            output_channels: number of output channels.
+            activation_fn: name of the activation function to use.
+                Should be one of "gelu",
+                "gelu-no-approx", "relu", "swish", "silu", "sin".
+            num_layers: number of convolution layers.
+            name: module name.
+        """
+        super().__init__()
+        self.conv_layers = nn.ModuleList(
+            [
+                nn.Conv1d(
+                    in_channels=input_channels if i == 0 else output_channels,
+                    out_channels=output_channels,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.activation_fn: Callable = get_activation_fn(activation_fn)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        for i, conv_layer in enumerate(self.conv_layers):
+            x = conv_layer(x)
+            if i < len(self.conv_layers) - 1:
+                x = self.activation_fn(x)
+        return x
+class TorchUNET1DSegmentationHead(nn.Module):
+    """
+    Torch adaptation of UNET1DSegmentationHead in
+    trix.layers.heads.unet_segmentation_head.py
+    """
+    def __init__(
+        self,
+        num_classes: int,
+        input_embed_dim: int,
+        output_channels_list: Tuple[int, ...] = (64, 128, 256),
+        activation_fn: str = "swish",
+        num_conv_layers_per_block: int = 2,
+        upsampling_interpolation_method: str = "nearest",
+    ):
+        """
+        Args:
+            num_classes: number of classes to segment
+            output_channels_list: list of the number of output channel at each level of
+                the UNET
+            activation_fn: name of the activation function to use.
+                Should be one of "gelu",
+                "gelu-no-approx", "relu", "swish", "silu", "sin".
+            num_conv_layers_per_block: number of convolution layers per block.
+            upsampling_interpolation_method: Method to be used for
+                interpolation in upsampling blocks. Should be one of "nearest",
+                "linear", "cubic", "lanczos3", "lanczos5".
+        """
+        super().__init__()
+        input_channels_list = (input_embed_dim,) + output_channels_list[:-1]
+        self.num_pooling_layers = len(output_channels_list)
+        self.downsample_blocks = nn.ModuleList(
+            [
+                TorchDownSample1D(
+                    input_channels=input_channels,
+                    output_channels=output_channels,
+                    activation_fn=activation_fn,
+                    num_layers=num_conv_layers_per_block,
+                )
+                for input_channels, output_channels in zip(
+                    input_channels_list, output_channels_list
+                )
+            ]
+        )
+        input_channels_list = (output_channels_list[-1],) + tuple(
+            list(reversed(output_channels_list))[:-1]
+        )
+        self.upsample_blocks = nn.ModuleList(
+            [
+                TorchUpSample1D(
+                    input_channels=input_channels,
+                    output_channels=output_channels,
+                    activation_fn=activation_fn,
+                    num_layers=num_conv_layers_per_block,
+                    interpolation_method=upsampling_interpolation_method,
+                )
+                for input_channels, output_channels in zip(
+                    input_channels_list, reversed(output_channels_list)
+                )
+            ]
+        )
+        self.final_block = TorchFinalConv1D(
+            activation_fn=activation_fn,
+            input_channels=output_channels_list[0],
+            output_channels=num_classes * 2,
+            num_layers=num_conv_layers_per_block,
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.shape[-1] % 2**self.num_pooling_layers:
+            raise ValueError(
+                "Input length must be divisible by 2 to the power of "
+                "the number of pooling layers."
+            )
+        hiddens = []
+        for downsample_block in self.downsample_blocks:
+            x, hidden = downsample_block(x)
+            hiddens.append(hidden)
+        for upsample_block, hidden in zip(self.upsample_blocks, reversed(hiddens)):
+            x = upsample_block(x) + hidden
+        x = self.final_block(x)
+        return x
+class TorchUNetHead(nn.Module):
+    """
+    Torch adaptation of UNetHead in
+    genomics_research/segmentnt/layers/segmentation_head.py
+    """
+    def __init__(
+        self,
+        features: List[str],
+        num_classes: int = 2,
+        embed_dimension: int = 1024,
+        nucl_per_token: int = 6,
+        num_layers: int = 2,
+        remove_cls_token: bool = True,
+    ):
+        """
+        Args:
+            features (List[str]): List of features names.
+            num_classes (int): Number of classes.
+            embed_dimension (int): Embedding dimension.
+            nucl_per_token (int): Number of nucleotides per token.
+            num_layers (int): Number of layers.
+            remove_cls_token (bool): Whether to remove the CLS token.
+            name: Name  the layer. Defaults to None.
+        """
+        super().__init__()
+        self._num_features = len(features)
+        self._num_classes = num_classes
+        self.nucl_per_token = nucl_per_token
+        self.remove_cls_token = remove_cls_token
+        self.unet = TorchUNET1DSegmentationHead(
+            num_classes=embed_dimension // 2,
+            output_channels_list=tuple(
+                embed_dimension * (2**i) for i in range(num_layers)
+            ),
+            input_embed_dim=embed_dimension,
+        )
+        self.fc = nn.Linear(
+            embed_dimension,
+            self.nucl_per_token * self._num_classes * self._num_features,
+        )
+    def forward(
+        self, x: torch.Tensor, sequence_mask: Optional[torch.Tensor] = None
+    ) -> Dict[str, torch.Tensor]:
+        if self.remove_cls_token:
+            x = x[:, 1:]
+        x = self.unet(x)
+        x = nn.functional.silu(x)
+        x = x.transpose(2, 1)
+        logits = self.fc(x)
+        batch_size, seq_len, _ = x.shape
+        logits = logits.view(  # noqa
+            batch_size,
+            seq_len * self.nucl_per_token,
+            self._num_features,
+            self._num_classes,
+        )
+        return {"logits": logits}
 FEATURES = [
     "protein_coding_gene",
         features: List[str] = FEATURES,
         embed_dim: int = 1536,
         dim_divisible_by: int = 128,
+        **kwargs: Dict[str, Any],
     ) -> None:
         self.features = features
         self.embed_dim = embed_dim