update

Files changed (7) hide show

models/cifm.py +2 -4
models/egnn_void_invariant.py +0 -9
models/layers/__init__.py +0 -0
models/layers/__pycache__/__init__.cpython-311.pyc +0 -0
models/layers/__pycache__/egnn_layer_void_invariant.cpython-311.pyc +0 -0
models/layers/egnn_layer_void_invariant.py +0 -48
test.ipynb +51 -6

models/cifm.py CHANGED Viewed

@@ -32,8 +32,8 @@ class CIFM(
         self.hidden_dim = args.hidden_dim
         self.radius_spatial_graph = args.radius_spatial_graph
-    def channel_matching(self, adata, channel2ensembl_ids_source, zero_init_for_unmatched_genes=True):
-        channel2ensembl_ids_target = [[i] for i in adata.var.index.tolist()]
         linear_in = nn.Linear(len(channel2ensembl_ids_target), self.hidden_dim, bias=False)
         linear_out1 = nn.Linear(self.hidden_dim, len(channel2ensembl_ids_target), bias=False)
@@ -97,8 +97,6 @@ class CIFM(
         expressions_dec = self.relu(self.mask_cell_expression(embeddings_dec))
         dropouts_dec = self.sigmoid(self.mask_cell_dropout(embeddings_dec))
-        # import pdb ; pdb.set_trace()
         expressions_dec[dropouts_dec<=0.5] = 0
         return expressions_dec

         self.hidden_dim = args.hidden_dim
         self.radius_spatial_graph = args.radius_spatial_graph
+    def channel_matching(self, channel2ensembl_ids_target, channel2ensembl_ids_source, zero_init_for_unmatched_genes=True):
+        # channel2ensembl_ids_target = [[i] for i in adata.var.index.tolist()]
         linear_in = nn.Linear(len(channel2ensembl_ids_target), self.hidden_dim, bias=False)
         linear_out1 = nn.Linear(self.hidden_dim, len(channel2ensembl_ids_target), bias=False)
         expressions_dec = self.relu(self.mask_cell_expression(embeddings_dec))
         dropouts_dec = self.sigmoid(self.mask_cell_dropout(embeddings_dec))
         expressions_dec[dropouts_dec<=0.5] = 0
         return expressions_dec

models/egnn_void_invariant.py CHANGED Viewed

@@ -48,15 +48,6 @@ class VIEGNNModel(torch.nn.Module):
         self.convs = torch.nn.ModuleList()
         for _ in range(num_layers):
             self.convs.append(EGNNLayer(emb_dim, num_mlp_layers_in_module, aggr))
-        # MLP predictor for invariant tasks using only scalar features
-        # self.pred = torch.nn.Sequential(
-        #     torch.nn.Linear(emb_dim, emb_dim, bias=False),
-        #     torch.nn.ReLU(),
-        #     torch.nn.Linear(emb_dim, out_dim, bias=False)
-        # )
-        # layers = [torch.nn.Linear(emb_dim, emb_dim, bias=False), torch.nn.ReLU()] * (num_mlp_layers_in_module-1) + [torch.nn.Linear(emb_dim, out_dim, bias=False)]
-        # self.pred = torch.nn.Sequential(*layers)
         self.pred = MLPBiasFree(in_dim=emb_dim, out_dim=out_dim, hidden_dim=emb_dim, num_layer=num_mlp_layers_in_module)
     # unroll the batch argments and comment out the pooling operation

         self.convs = torch.nn.ModuleList()
         for _ in range(num_layers):
             self.convs.append(EGNNLayer(emb_dim, num_mlp_layers_in_module, aggr))
         self.pred = MLPBiasFree(in_dim=emb_dim, out_dim=out_dim, hidden_dim=emb_dim, num_layer=num_mlp_layers_in_module)
     # unroll the batch argments and comment out the pooling operation

models/layers/__init__.py DELETED Viewed

File without changes

models/layers/__pycache__/__init__.cpython-311.pyc DELETED Viewed

Binary file (170 Bytes)

models/layers/__pycache__/egnn_layer_void_invariant.cpython-311.pyc DELETED Viewed

Binary file (4.8 kB)

models/layers/egnn_layer_void_invariant.py CHANGED Viewed

@@ -23,49 +23,11 @@ class EGNNLayer(MessagePassing):
         super().__init__(aggr=aggr)
         self.emb_dim = emb_dim
-        # self.activation = ReLU()
         self.dist_embedding = Linear(1, emb_dim, bias=False)
         self.innerprod_embedding = MLPBiasFree(in_dim=1, out_dim=1, hidden_dim=emb_dim, num_layer=num_mlp_layers)
-        # MLP `\psi_h` for computing messages `m_ij`
-        # self.mlp_msg = Sequential(
-        #     Linear(2 * emb_dim + 1, emb_dim, bias=False),
-        #     torch.nn.LayerNorm(emb_dim, bias=False),
-        #     self.activation,
-        #     Linear(emb_dim, emb_dim, bias=False),
-        #     torch.nn.LayerNorm(emb_dim, bias=False),
-        #     self.activation,
-        # )
-        # layers = [Linear(2 * emb_dim + 1, emb_dim, bias=False), torch.nn.LayerNorm(emb_dim, bias=False), self.activation] \
-        #         + [Linear(emb_dim, emb_dim, bias=False), torch.nn.LayerNorm(emb_dim, bias=False), self.activation] * (num_mlp_layers-1)
-        # layers = [Linear(3 * emb_dim, emb_dim, bias=False)] \
-        #         + [self.activation, Linear(emb_dim, emb_dim, bias=False)] * (num_mlp_layers-1) \
-        #         + [torch.nn.LayerNorm(emb_dim, bias=False)]
-        # self.mlp_msg = Sequential(*layers)
         self.mlp_msg = MLPBiasFree(in_dim=3*emb_dim, out_dim=emb_dim, hidden_dim=emb_dim, num_layer=num_mlp_layers)
-        # MLP `\psi_x` for computing messages `\overrightarrow{m}_ij`
-        # self.mlp_pos = Sequential(
-        #     Linear(emb_dim, emb_dim), torch.nn.LayerNorm(emb_dim), self.activation, Linear(emb_dim, 1)
-        # )
-        # layers = [Linear(emb_dim, emb_dim, bias=False), torch.nn.LayerNorm(emb_dim, bias=False), self.activation] * (num_mlp_layers-1) + [Linear(emb_dim, 1, bias=False)]
-        # layers = [Linear(emb_dim, emb_dim, bias=False), self.activation] * (num_mlp_layers-1) + [Linear(emb_dim, 1, bias=False)]
-        # self.mlp_pos = Sequential(*layers)
         self.mlp_pos = MLPBiasFree(in_dim=emb_dim, out_dim=1, hidden_dim=emb_dim, num_layer=num_mlp_layers)
-        # MLP `\phi` for computing updated node features `h_i^{l+1}`
-        # self.mlp_upd = Sequential(
-        #     Linear(2 * emb_dim, emb_dim, bias=False),
-        #     torch.nn.LayerNorm(emb_dim, bias=False),
-        #     self.activation,
-        #     Linear(emb_dim, emb_dim, bias=False),
-        #     torch.nn.LayerNorm(emb_dim, bias=False),
-        #     self.activation,
-        # )
-        # layers = [Linear(emb_dim, emb_dim, bias=False), torch.nn.LayerNorm(emb_dim, bias=False), self.activation] * num_mlp_layers
-        # layers = [Linear(emb_dim, emb_dim, bias=False)] + [self.activation, Linear(emb_dim, emb_dim, bias=False)] * (num_mlp_layers-1)
-        # self.mlp_upd = Sequential(*layers)
         self.mlp_upd = MLPBiasFree(in_dim=emb_dim, out_dim=emb_dim, hidden_dim=emb_dim, num_layer=num_mlp_layers)
     def forward(self, h, pos, edge_index):
@@ -83,7 +45,6 @@ class EGNNLayer(MessagePassing):
     def message(self, h_i, h_j, pos_i, pos_j):
         # Compute messages
         pos_diff = pos_i - pos_j
-        # dists = torch.norm(pos_diff, dim=-1).unsqueeze(1)
         dists = torch.exp(- torch.norm(pos_diff, dim=-1).unsqueeze(1) / 30 ) # reference distances: 30um
         inner_prod = torch.mean(h_i * h_j, dim=-1).unsqueeze(1)
         msg = torch.cat([h_i, h_j, self.dist_embedding(dists)], dim=-1) * self.innerprod_embedding(inner_prod)
@@ -91,10 +52,6 @@ class EGNNLayer(MessagePassing):
         # Scale magnitude of displacement vector
         pos_diff = pos_diff * self.mlp_pos(msg)
         # NOTE: some papers divide pos_diff by (dists + 1) to stabilise model.
-        # NOTE: lucidrains clamps pos_diff between some [-n, +n], also for stability.
-        # print(torch.cat([h_i, h_j, self.dist_embedding(dists)], dim=-1))
-        # print(msg)
-        # import pdb; pdb.set_trace()
         return msg, pos_diff, inner_prod
     def aggregate(self, inputs, index):
@@ -109,17 +66,12 @@ class EGNNLayer(MessagePassing):
         counts = scatter(counts, index, dim=0, reduce="add")
         counts[counts==0] = 1
         pos_aggr = pos_aggr / counts
-        # print(msgs)
-        # print(msg_aggr)
-        # import pdb; pdb.set_trace()
         return msg_aggr, pos_aggr
     def update(self, aggr_out, h, pos):
         msg_aggr, pos_aggr = aggr_out
-        # upd_out = self.mlp_upd(torch.cat([h, msg_aggr], dim=-1))
         upd_out = self.mlp_upd(msg_aggr)
         upd_pos = pos + pos_aggr
-        # import pdb; pdb.set_trace()
         return upd_out, upd_pos
     def __repr__(self) -> str:

         super().__init__(aggr=aggr)
         self.emb_dim = emb_dim
         self.dist_embedding = Linear(1, emb_dim, bias=False)
         self.innerprod_embedding = MLPBiasFree(in_dim=1, out_dim=1, hidden_dim=emb_dim, num_layer=num_mlp_layers)
         self.mlp_msg = MLPBiasFree(in_dim=3*emb_dim, out_dim=emb_dim, hidden_dim=emb_dim, num_layer=num_mlp_layers)
         self.mlp_pos = MLPBiasFree(in_dim=emb_dim, out_dim=1, hidden_dim=emb_dim, num_layer=num_mlp_layers)
         self.mlp_upd = MLPBiasFree(in_dim=emb_dim, out_dim=emb_dim, hidden_dim=emb_dim, num_layer=num_mlp_layers)
     def forward(self, h, pos, edge_index):
     def message(self, h_i, h_j, pos_i, pos_j):
         # Compute messages
         pos_diff = pos_i - pos_j
         dists = torch.exp(- torch.norm(pos_diff, dim=-1).unsqueeze(1) / 30 ) # reference distances: 30um
         inner_prod = torch.mean(h_i * h_j, dim=-1).unsqueeze(1)
         msg = torch.cat([h_i, h_j, self.dist_embedding(dists)], dim=-1) * self.innerprod_embedding(inner_prod)
         # Scale magnitude of displacement vector
         pos_diff = pos_diff * self.mlp_pos(msg)
         # NOTE: some papers divide pos_diff by (dists + 1) to stabilise model.
         return msg, pos_diff, inner_prod
     def aggregate(self, inputs, index):
         counts = scatter(counts, index, dim=0, reduce="add")
         counts[counts==0] = 1
         pos_aggr = pos_aggr / counts
         return msg_aggr, pos_aggr
     def update(self, aggr_out, h, pos):
         msg_aggr, pos_aggr = aggr_out
         upd_out = self.mlp_upd(msg_aggr)
         upd_pos = pos + pos_aggr
         return upd_out, upd_pos
     def __repr__(self) -> str:

test.ipynb CHANGED Viewed

@@ -12,9 +12,16 @@
     "import scanpy as sc"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -94,13 +101,25 @@
    ],
    "source": [
     "args_model = torch.load('./model_files/args.pt')\n",
-    "model = CIFM.from_pretrained('ynyou/CIFM', args=args_model)\n",
     "model.eval()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -120,7 +139,6 @@
     }
    ],
    "source": [
-    "channel2ensembl = torch.load('./model_files/channel2ensembl.pt')\n",
     "adata = sc.read_h5ad('./adata.h5ad')\n",
     "adata.layers['counts'] = adata.X.copy()\n",
     "sc.pp.normalize_total(adata)\n",
@@ -128,9 +146,20 @@
     "adata"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -142,7 +171,16 @@
     }
    ],
    "source": [
-    "model.channel_matching(adata, channel2ensembl)"
    ]
   },
   {
@@ -174,6 +212,13 @@
     "embeddings, embeddings.shape"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,

     "import scanpy as sc"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. load model"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
    ],
    "source": [
     "args_model = torch.load('./model_files/args.pt')\n",
+    "device = 'cpu' # or 'cuda\n",
+    "model = CIFM.from_pretrained('ynyou/CIFM', args=args_model).to(device)\n",
     "model.eval()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. load and preprocess sample adata\n",
+    "- some requirements for adata:\n",
+    "- ```adata.X```: need to the raw count\n",
+    "- ```adata.obsm['spatial']```: the coordinates of cells in the unit of micrometer\n",
+    "- if in a different unit, it might result in a weird geometric graph: we use a radius 20 (micrometer) to construct the geometric graph in the model, so a different unit might result in a overly sparse or dense graph"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
     "adata = sc.read_h5ad('./adata.h5ad')\n",
     "adata.layers['counts'] = adata.X.copy()\n",
     "sc.pp.normalize_total(adata)\n",
     "adata"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. match feature channels\n",
+    "- we need a list which maps feature channels to ensemble ids: ```channel2ensembl_ids_target```\n",
+    "- format: ```channel2ensembl_ids_target = [[ensemblid1_for_channel1, ensemblid1_for_channel1, ...], [ensemblid11_for_channel2, ensemblid12_for_channel2, ...], ...]```\n",
+    "- one channel could correspond to multiple ensemble ids, e.g., when your original data the channels are annotated with gene names\n",
+    "- you can use to BioMart map you each gene name to one or multiple ensemble ids"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
+    "channel2ensembl_ids_target = [[i] for i in adata.var.index.tolist()]\n",
+    "channel2ensembl_ids_source = torch.load('./model_files/channel2ensembl.pt')\n",
+    "model.channel_matching(channel2ensembl_ids_target, channel2ensembl_ids_source)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. embed the microenvironments centered at each cell"
    ]
   },
   {
     "embeddings, embeddings.shape"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. infer the potential gene expressions at certain locations"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,