Spaces:

ilhamap
/

seminar-demo

Paused

App Files Files Community

ilhamap commited on Mar 14, 2024

Commit

cee5099

verified ·

1 Parent(s): 18c4e17

Upload 4 files

Browse files

Files changed (4) hide show

app.py +361 -0
module.py +275 -0
requirements.txt +9 -0
sstvit.py +94 -0

app.py ADDED Viewed

	@@ -0,0 +1,361 @@

+import streamlit as st
+import io
+import collections
+from scipy.io import loadmat
+import matplotlib.pyplot as plt
+from PIL import Image
+import numpy as np
+import torch
+import argparse
+import torch.nn as nn
+import torch.utils.data as Data
+import torch.backends.cudnn as cudnn
+from scipy.io import loadmat
+from scipy.io import savemat
+from torch import optim
+from torch.autograd import Variable
+from sstvit import SSTViT
+from sklearn.metrics import confusion_matrix
+import matplotlib.pyplot as plt
+from matplotlib import colors
+import numpy as np
+from patchify import patchify, unpatchify
+import time
+from matplotlib import colors as mcolors
+import base64
+import pandas as pd
+import st_aggrid
+import os
+import json
+import plotly.express as px
+css='''
+<style>
+    section.main > div {max-width:60rem}
+</style>
+'''
+st.markdown(css, unsafe_allow_html=True)
+class Args(dict):
+    __setattr__ = dict.__setitem__
+    __getattr__ = dict.__getitem__
+args = {
+    'dataset' : 'mg',
+    'flag_test' : 'train',
+    'gpu_id' : 0,
+    'seed' : int(0),
+    'batch_size' : int(64),
+    'test_freq' : int(10),
+    'patches' : int(5),
+    'band_patches' : int(1),
+    'epoches' : int(2000),
+    'learning_rate' : float(5e-4),
+    'gamma' : float(0.9),
+    'weight_decay' : float(0),
+    'train_number' : int(500)
+}
+args = Args(args) # dict2object
+obj = args.copy() # object2dict
+os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
+def test_epoch(model, test_loader):
+    pre = np.array([])
+    for batch_idx, (batch_data_t1, batch_data_t2) in enumerate(test_loader):
+        batch_data_t1 = batch_data_t1
+        batch_data_t2 = batch_data_t2
+        batch_pred = model(batch_data_t1,batch_data_t2)
+        _, pred = batch_pred.topk(1, 1, True, True)
+        pp = pred.squeeze()
+        pre = np.append(pre, pp.data.cpu().numpy())
+    return pre
+mdic = ['Before','After','Before','After']
+colors = ['#3b68f8', '#ff0201', '#23fe01'] #-1,0,1,2,3
+cmap = mcolors.ListedColormap(colors)
+# Parameter Setting
+np.random.seed(args.seed)
+torch.manual_seed(args.seed)
+torch.cuda.manual_seed(args.seed)
+cudnn.deterministic = True
+cudnn.benchmark = False
+def encode_masks_to_rgb(masks):
+    colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0)]
+    # Create an empty RGB image
+    height, width = masks.shape
+    rgb_image = np.zeros((height, width, 3), dtype=np.uint8)
+    # Assign colors based on the mask values
+    for i in range(len(colors)):
+        mask_indices = masks == i
+        rgb_image[mask_indices] = colors[i]
+    return rgb_image
+def count_pixel(pred):
+    image = Image.fromarray(pred)
+    # Define the colors you want to count in RGB format
+    color2label = {
+        (0, 0, 255): "Non Mangrove",
+        (255, 0, 0): "Mangrove Loss",
+        (0, 255, 0): "Mangrove Before",
+    }
+    # Create a flattened list of pixel values
+    pixels = list(image.getdata())
+    # Count the number of pixels for each color
+    color_counts = collections.Counter(pixels)
+    # Calculate the total number of pixels in the image
+    total_pixels = len(pixels)
+    # Initialize a dictionary to store the average number of pixels for each class
+    average_counts = {color2label[label]: (count / total_pixels)*100 for label, count in color_counts.items()}
+    class_counts = {color2label[label]: count for label, count in color_counts.items()}
+    pix_avg = {}
+    pix_count = {}
+    for _, i in color2label.items():
+        try:
+            pix_avg[i] = average_counts[i]
+            pix_count[i] = class_counts[i]
+        except:
+            pix_avg[i] = 0
+            pix_count[i] = 0
+    x = {
+        "class": list(pix_avg.keys()),
+        "percentage": list(pix_avg.values()),
+        "pixel_count": list(pix_count.values())
+    }
+    # print(x)
+    return pd.DataFrame(x)
+def count_pixel1(pred):
+    image = Image.fromarray(pred)
+    # Define the colors you want to count in RGB format
+    color2label = {
+        (0, 0, 255): "Non Mangrove",
+        (255, 0, 0): "Mangrove Loss",
+        (0, 255, 0): "Mangrove After",
+    }
+    # Create a flattened list of pixel values
+    pixels = list(image.getdata())
+    # Count the number of pixels for each color
+    color_counts = collections.Counter(pixels)
+    # Calculate the total number of pixels in the image
+    total_pixels = len(pixels)
+    # Initialize a dictionary to store the average number of pixels for each class
+    average_counts = {color2label[label]: (count / total_pixels)*100 for label, count in color_counts.items()}
+    class_counts = {color2label[label]: count for label, count in color_counts.items()}
+    pix_avg = {}
+    pix_count = {}
+    for _, i in color2label.items():
+        try:
+            pix_avg[i] = average_counts[i]
+            pix_count[i] = class_counts[i]
+        except:
+            pix_avg[i] = 0
+            pix_count[i] = 0
+    x = {
+        "class": list(pix_avg.keys()),
+        "percentage": list(pix_avg.values()),
+        "pixel_count": list(pix_count.values())
+    }
+    # print(x)
+    return pd.DataFrame(x)
+file = st.file_uploader("Upload file", type=['mat'])
+if file:
+    data_img2 = loadmat(file)['data_img2']
+    data_img1 = loadmat(file)['data_img1']
+    st.subheader("Preview Dataset")
+    col1, col2 = st.columns(2)
+    with col1:
+      fig = plt.figure(figsize=(5, 5))
+      plt.subplot(121)
+      plt.imshow(data_img1)
+      plt.title('Before', fontweight='bold')
+      plt.xticks([])
+      plt.yticks([])
+      plt.subplot(122)
+      plt.imshow(data_img2)
+      plt.title('After', fontweight='bold')
+      plt.xticks([])
+      plt.yticks([])
+      plt.show()
+      st.pyplot(fig)
+    holder = st.empty()
+    if holder.button("Start Prediction"):
+        start = time.time()
+        holder.empty()
+        with st.spinner("Processing, please wait around 7-15 minute"):
+            data_t1 = loadmat(file)['data_t1']
+            data_t2 = loadmat(file)['data_t2']
+            L_post = loadmat(file)['L_post']
+            L_pre = loadmat(file)['L_pre']
+            data_img1 = loadmat(file)['data_img1']
+            data_img2 = loadmat(file)['data_img2']
+            L_post = np.double(L_post)
+            L_post[L_post==0]=-0.8
+            L_post[L_post==1]=0
+            L_post[L_post==0]=-0.2
+            L_pre = np.double(L_pre)
+            L_pre[L_pre==0]=-0.8
+            L_pre[L_pre==1]=0
+            L_pre[L_pre==0]=-0.2
+            data_t1 = data_t1[:L_post.shape[0],:L_post.shape[1],:]
+            data_t2 = data_t2[:L_post.shape[0],:L_post.shape[1],:]
+            data_cb1 = np.zeros(shape=(L_post.shape[0],L_post.shape[1],11),dtype=np.float32)
+            data_cb2 = np.zeros(shape=(L_post.shape[0],L_post.shape[1],11),dtype=np.float32)
+            data_cb1[:,:,:10]=data_t1
+            data_cb1[:,:,10]=L_pre
+            data_cb2[:,:,:10]=data_t2
+            data_cb2[:,:,10]=L_post
+            height, width, band = data_cb1.shape
+            height=height-4
+            width = width-4
+            x1 = patchify(data_cb1, (5, 5, 11), step=1).reshape(-1,5*5, 11)
+            x2 = patchify(data_cb2, (5, 5, 11), step=1).reshape(-1,5*5, 11)
+            # create model
+            model = SSTViT(
+                image_size = 5,
+                near_band = args.band_patches,
+                num_patches = 11,
+                num_classes = 3,
+                dim = 32,
+                depth = 2,
+                heads = 4,
+                dim_head=16,
+                mlp_dim = 8,
+                b_dim = 512,
+                b_depth = 3,
+                b_heads = 8,
+                b_dim_head= 32,
+                b_mlp_head = 8,
+                dropout = 0.2,
+                emb_dropout = 0.1,
+            )
+            model.load_state_dict(torch.load("model/lsstformer.pth",map_location=torch.device("cpu")))
+            x1_true_band=torch.from_numpy(x1.transpose(0,2,1)).type(torch.FloatTensor)
+            x2_true_band=torch.from_numpy(x1.transpose(0,2,1)).type(torch.FloatTensor)
+            Label_true=Data.TensorDataset(x1_true_band,x2_true_band)
+            label_true_loader=Data.DataLoader(Label_true,batch_size=100,shuffle=False)
+            model.eval()
+            # output classification maps
+            pre_u = test_epoch(model, label_true_loader)
+            prediction_matrix = pre_u.reshape(height,width)
+            x1_true_band=torch.from_numpy(x1.transpose(0,2,1)).type(torch.FloatTensor)
+            x2_true_band=torch.from_numpy(x2.transpose(0,2,1)).type(torch.FloatTensor)
+            Label_true=Data.TensorDataset(x1_true_band,x2_true_band)
+            label_true_loader=Data.DataLoader(Label_true,batch_size=100,shuffle=False)
+            model.eval()
+            # output classification maps
+            pre_u = test_epoch(model, label_true_loader)
+            prediction_matrix2 = pre_u.reshape(height,width)
+            A = prediction_matrix.reshape(-1)
+            B = prediction_matrix2.reshape(-1)
+            mg = np.array(np.where(A==2))
+            mg1 = np.array(np.where(B==2))
+            mgls = np.array(np.where(B==1))
+            class_counts = count_pixel(encode_masks_to_rgb(prediction_matrix))
+            class_counts1 = count_pixel1(encode_masks_to_rgb(prediction_matrix2))
+            with st.container():
+              st.subheader("Prediction Result")
+              col1, col2 = st.columns(2)
+              with col1:
+                with st.container():
+                    fig = plt.figure(figsize=(10, 10))
+                    plt.subplot(121)
+                    plt.imshow(prediction_matrix, cmap=cmap)
+                    plt.title('Before',fontsize=25, fontweight='bold')
+                    plt.xticks([])
+                    plt.yticks([])
+                    plt.subplot(122)
+                    plt.imshow(prediction_matrix2, cmap=cmap)
+                    plt.title('After',fontsize=25, fontweight='bold')
+                    plt.xticks([])
+                    plt.yticks([])
+                    plt.show()
+                    st.pyplot(fig)
+                    buf = io.BytesIO()
+                    fig.savefig(buf, format="png")
+              with col2:
+                with st.container():
+                  table_data = {
+                      "Total mangrove before":f"{mg.shape[1]*100} m\u00B2",
+                      "Total mangrove after":f"{mg1.shape[1]*100} m\u00B2",
+                      "Total mangrove loss":f"{mgls.shape[1]*100} m\u00B2",
+                  }
+                  df = pd.DataFrame(list(table_data.items()), columns=['Key', 'Value'])
+                  MIN_HEIGHT = 100
+                  MAX_HEIGHT = 180
+                  ROW_HEIGHT = 50
+                  # st.dataframe(df, hide_index=True, use_container_width=True)
+                  st_aggrid.AgGrid(df,fit_columns_on_grid_load=True, height=min(MIN_HEIGHT + len(df) * ROW_HEIGHT, MAX_HEIGHT))
+            with st.container():
+                st.subheader("Pixel Distribution")
+                df = class_counts
+                df = df.drop(0)
+                df1 = df.drop(1)
+                df2 = class_counts1
+                df3 = df2.drop(0)
+                vertical_concat = pd.concat([df1, df3], axis=0)
+                MIN_HEIGHT = 100
+                MAX_HEIGHT = 180
+                ROW_HEIGHT = 50
+                vertical_concat = vertical_concat.iloc[[0,2,1],:]
+                st_aggrid.AgGrid(vertical_concat,fit_columns_on_grid_load=True, height=min(MIN_HEIGHT + len(vertical_concat) * ROW_HEIGHT, MAX_HEIGHT))
+                fig = px.bar(vertical_concat, x='percentage', y='class', color='class', orientation='h',
+                                color_discrete_sequence=["green","green", "red", "blue"],
+                                    category_orders={"class": ["Mangrove Before","Mangrove After", "Mangrove Loss", "Non Mangrove",]}
+                                )
+                st.plotly_chart(fig,use_container_width=False)
+        end = time.time()
+        process = end-start
+        st.write('process',process)
+show_file = st.empty()
+if not file:
+    url = "https://drive.usercontent.google.com/download?id=1u48pMzRWQ2Etfjaq5A0CUjRtGKZaJoJy&export=download&authuser=2&confirm=t&uuid=52b0e01e-377f-42cb-8412-c84aa38a1740&at=APZUnTXslmuCCV1drJ2WWtkZr9BR%3A1710357675310"
+    show_file.info("""
+    The model was trained using Sentinel-2 imagery, users can upload MAT files to perform LSST-Former for mangrove loss detection models that have been trained in this research. Tool for generate from Sentinel-2 to MAT file i will create later, please download demo dataset bellow. for better in mobile phone, se desktop mode.
+    """)
+    st.write("download demo datasets this [link](%s)" % url)

module.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from einops import rearrange, repeat
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+    def forward(self, x, **kwargs):
+        return self.fn(x, **kwargs) + x
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout = 0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+class Attention(nn.Module):
+    def __init__(self, dim, heads, dim_head, dropout):
+        super().__init__()
+        inner_dim = dim_head * heads
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x, mask = None):
+        # x:[b,n,dim]
+        b, n, _, h = *x.shape, self.heads
+        # get qkv tuple:([b,n,head_num*head_dim],[...],[...])
+        qkv = self.to_qkv(x).chunk(3, dim = -1)
+        # split q,k,v from [b,n,head_num*head_dim] -> [b,head_num,n,head_dim]
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
+        # transpose(k) * q / sqrt(head_dim) -> [b,head_num,n,n]
+        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
+        mask_value = -torch.finfo(dots.dtype).max
+        # mask value: -inf
+        if mask is not None:
+            mask = F.pad(mask.flatten(1), (1, 0), value = True)
+            assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
+            mask = mask[:, None, :] * mask[:, :, None]
+            dots.masked_fill_(~mask, mask_value)
+            del mask
+        # softmax normalization -> attention matrix
+        attn = dots.softmax(dim=-1)
+        # value * attention matrix -> output
+        out = torch.einsum('bhij,bhjd->bhid', attn, v)
+        # cat all output -> [b, n, head_num*head_dim]
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.to_out(out)
+        return out
+class CrossAttention(nn.Module):
+    def __init__(self, dim, heads, dim_head, dropout):
+        super().__init__()
+        inner_dim = dim_head *  heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.to_k = nn.Linear(dim, inner_dim , bias=False)
+        self.to_v = nn.Linear(dim, inner_dim , bias = False)
+        self.to_q = nn.Linear(dim, inner_dim, bias = False)
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        ) if project_out else nn.Identity()
+    def forward(self, x_qkv):
+        b, n, _, h = *x_qkv.shape, self.heads
+        k = self.to_k(x_qkv)
+        k = rearrange(k, 'b n (h d) -> b h n d', h = h)
+        v = self.to_v(x_qkv)
+        v = rearrange(v, 'b n (h d) -> b h n d', h = h)
+        q = self.to_q(x_qkv[:, 0].unsqueeze(1))
+        q = rearrange(q, 'b n (h d) -> b h n d', h = h)
+        dots = torch.einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+        attn = dots.softmax(dim=-1)
+        out = torch.einsum('b h i j, b h j d -> b h i d', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out =  self.to_out(out)
+        return out
+class Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_head, dropout, num_channel):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, mlp_head, dropout = dropout)))
+            ]))
+        self.skipcat = nn.ModuleList([])
+        for _ in range(depth-2):
+            self.skipcat.append(nn.Conv2d(num_channel+1, num_channel+1, [1, 2], 1, 0))
+    def forward(self, x, mask = None):
+        for attn, ff in self.layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        return x
+class SSTransformer(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_head, b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, num_patches, dropout):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        self.k_layers = nn.ModuleList([])
+        self.channels_to_embedding = nn.Linear(num_patches, b_dim)
+        self.cls_token = nn.Parameter(torch.randn(1, 1, b_dim))
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, mlp_head, dropout = dropout)))
+            ]))
+        for _ in range(b_depth):
+            self.k_layers.append(nn.ModuleList([
+                Residual(PreNorm(b_dim, Attention(dim=b_dim, heads=b_heads, dim_head=b_dim_head, dropout = dropout))),
+                Residual(PreNorm(b_dim, FeedForward(b_dim, b_mlp_head, dropout = dropout)))
+            ]))
+    def forward(self, x, mask = None):
+        for attn, ff in self.layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        x = rearrange(x, 'b n d -> b d n')
+        x = self.channels_to_embedding(x)
+        b, d, n = x.shape
+        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
+        x = torch.cat((cls_tokens, x), dim = 1)
+        for attn, ff in self.k_layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        return x
+class SSTransformer_pyramid(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_head, b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, num_patches, dropout):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        self.k_layers = nn.ModuleList([])
+        self.channels_to_embedding = nn.Linear(num_patches, b_dim)
+        self.cls_token = nn.Parameter(torch.randn(1, 1, b_dim))
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, mlp_head, dropout = dropout)))
+            ]))
+        for _ in range(b_depth):
+            self.k_layers.append(nn.ModuleList([
+                Residual(PreNorm(b_dim, Attention(dim=b_dim, heads=b_heads, dim_head=b_dim_head, dropout = dropout))),
+                Residual(PreNorm(b_dim, FeedForward(b_dim, b_mlp_head, dropout = dropout)))
+            ]))
+    def forward(self, x, mask = None):
+        for attn, ff in self.layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        out_feature = x
+        x = rearrange(x, 'b n d -> b d n')
+        x = self.channels_to_embedding(x)
+        b, d, n = x.shape
+        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
+        x = torch.cat((cls_tokens, x), dim = 1)
+        for attn, ff in self.k_layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        return x, out_feature
+class ViT(nn.Module):
+    def __init__(self, image_size, near_band, num_patches, num_classes, dim, depth, heads, mlp_dim, pool='cls', channel_dim=1, dim_head = 16, dropout=0., emb_dropout=0., mode='ViT'):
+        super().__init__()
+        patch_dim = image_size ** 2 * near_band
+        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
+        self.patch_to_embedding = nn.Linear(channel_dim, dim)
+        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
+        self.dropout = nn.Dropout(emb_dropout)
+        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, num_patches, mode)
+        self.pool = pool
+        self.to_latent = nn.Identity()
+        self.mlp_head = nn.Sequential(
+            nn.LayerNorm(dim),
+            nn.Linear(dim, num_classes)
+        )
+    def forward(self, x, mask = None):
+        # patchs[batch, patch_num, patch_size*patch_size*c]  [batch,200,145*145]
+        # x = rearrange(x, 'b c h w -> b c (h w)')
+        ## embedding every patch vector to embedding size: [batch, patch_num, embedding_size]
+        x = self.patch_to_embedding(x) #[b,n,dim]
+        b, n, _ = x.shape
+        # add position embedding
+        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b) #[b,1,dim]
+        x = torch.cat((cls_tokens, x), dim = 1) #[b,n+1,dim]
+        x += self.pos_embedding[:, :(n + 1)]
+        x = self.dropout(x)
+        # transformer: x[b,n + 1,dim] -> x[b,n + 1,dim]
+        x = self.transformer(x, mask)
+        # classification: using cls_token output
+        x = self.to_latent(x[:,0])
+        # MLP classification layer
+        return self.mlp_head(x)
+class SSFormer_v4(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_head, b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, num_patches, dropout, mode):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        self.k_layers = nn.ModuleList([])
+        self.channels_to_embedding = nn.Linear(num_patches, b_dim)
+        self.cls_token = nn.Parameter(torch.randn(1, 1, b_dim))
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, mlp_head, dropout = dropout)))
+            ]))
+        for _ in range(b_depth):
+            self.k_layers.append(nn.ModuleList([
+                Residual(PreNorm(b_dim, Attention(dim=b_dim, heads=b_heads, dim_head=b_dim_head, dropout = dropout))),
+                Residual(PreNorm(b_dim, FeedForward(b_dim, b_mlp_head, dropout = dropout)))
+            ]))
+        self.mode = mode
+    def forward(self, x, c, mask = None):
+        for attn, ff in self.layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        x = rearrange(x, 'b n d -> b d n')
+        x = self.channels_to_embedding(x)
+        b, d, n = x.shape
+        cls_tokens = repeat(c, '() n d -> b n d', b = b)
+        x = torch.cat((cls_tokens, x), dim = 1)
+        for attn, ff in self.k_layers:
+            x = attn(x, mask = mask)
+            x = ff(x)
+        return x

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+einops
+patchify
+argparse
+scipy
+scikit-learn
+torch
+streamlit-aggrid
+plotly
+collection

sstvit.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from einops.layers.torch import Rearrange
+from module import Attention, PreNorm, FeedForward, CrossAttention, SSTransformer
+import numpy as np
+class SSTTransformerEncoder(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, num_patches, cross_attn_depth=3, cross_attn_heads=8, dropout = 0):
+        super().__init__()
+        self.transformer = SSTransformer(dim, depth, heads, dim_head, mlp_dim, b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, num_patches, dropout)
+        self.cross_attn_layers = nn.ModuleList([])
+        for _ in range(cross_attn_depth):
+            self.cross_attn_layers.append(PreNorm(b_dim, CrossAttention(b_dim, heads = cross_attn_heads, dim_head=dim_head, dropout=0)))
+    def forward(self, x1, x2):
+        x1 = self.transformer(x1)
+        x2 = self.transformer(x2)
+        for cross_attn in self.cross_attn_layers:
+            x1_class = x1[:, 0]
+            x1 = x1[:, 1:]
+            x2_class = x2[:, 0]
+            x2 = x2[:, 1:]
+            # Cross Attn
+            cat1_q = x1_class.unsqueeze(1)
+            cat1_qkv = torch.cat((cat1_q, x2), dim=1)
+            cat1_out = cat1_q+cross_attn(cat1_qkv)
+            x1 = torch.cat((cat1_out, x1), dim=1)
+            cat2_q = x2_class.unsqueeze(1)
+            cat2_qkv = torch.cat((cat2_q, x1), dim=1)
+            cat2_out = cat2_q+cross_attn(cat2_qkv)
+            x2 = torch.cat((cat2_out, x2), dim=1)
+        return cat1_out, cat2_out
+class SSTViT(nn.Module):
+    def __init__(self, image_size, near_band, num_patches, num_classes, dim, depth, heads, mlp_dim, b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, pool='cls', channels=1, dim_head = 16, dropout=0., emb_dropout=0., multi_scale_enc_depth=1):
+        super().__init__()
+        patch_dim = image_size ** 2 * near_band
+        self.num_patches = num_patches+1
+        self.pos_embedding = nn.Parameter(torch.randn(1, self.num_patches, dim))
+        self.patch_to_embedding = nn.Linear(patch_dim, dim)
+        self.cls_token_t1 = nn.Parameter(torch.randn(1, 1, dim))
+        self.cls_token_t2 = nn.Parameter(torch.randn(1, 1, dim))
+        self.dropout = nn.Dropout(emb_dropout)
+        self.multi_scale_transformers = nn.ModuleList([])
+        for _ in range(multi_scale_enc_depth):
+            self.multi_scale_transformers.append(SSTTransformerEncoder(dim, depth, heads, dim_head, mlp_dim,b_dim, b_depth, b_heads, b_dim_head, b_mlp_head, self.num_patches,
+                                                                                    dropout = 0.))
+        self.pool = pool
+        self.to_latent = nn.Identity()
+        self.mlp_head = nn.Sequential(
+            nn.LayerNorm(b_dim),
+            nn.Linear(b_dim, num_classes)
+        )
+    def forward(self, x1, x2):
+        # patchs[batch, patch_num, patch_size*patch_size*c]  [batch,200,145*145]
+        # x = rearrange(x, 'b c h w -> b c (h w)')
+        ## embedding every patch vector to embedding size: [batch, patch_num, embedding_size]
+        x1 = self.patch_to_embedding(x1) #[b,n,dim]
+        x2 = self.patch_to_embedding(x2)
+        b, n, _ = x1.shape
+        # add position embedding
+        cls_tokens_t1 = repeat(self.cls_token_t1, '() n d -> b n d', b = b) #[b,1,dim]
+        cls_tokens_t2 = repeat(self.cls_token_t2, '() n d -> b n d', b = b)
+        x1 = torch.cat((cls_tokens_t1, x1), dim = 1) #[b,n+1,dim]
+        x1 += self.pos_embedding[:, :(n + 1)]
+        x1 = self.dropout(x1)
+        x2 = torch.cat((cls_tokens_t2, x2), dim = 1) #[b,n+1,dim]
+        x2 += self.pos_embedding[:, :(n + 1)]
+        x2 = self.dropout(x2)
+        # transformer: x[b,n + 1,dim] -> x[b,n + 1,dim]
+        for multi_scale_transformer in self.multi_scale_transformers:
+            out1, out2 = multi_scale_transformer(x1, x2)
+        # classification: using cls_token output
+        out1 = self.to_latent(out1[:,0])
+        out2 = self.to_latent(out2[:,0])
+        out = out1+out2
+        # MLP classification layer
+        return self.mlp_head(out)