update

Files changed (6) hide show

.gitattributes +1 -0
README.md +3 -3
figures/autoregressive.gif +0 -0
figures/cifm.png +0 -0
models_cifm/cifm.py +2 -1
test.ipynb +47 -89

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 adata.h5ad filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 adata.h5ad filter=lfs diff=lfs merge=lfs -text
+figures/cifm.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -10,11 +10,11 @@ tags:
 - Library: ynyou/CIFM
 - Docs: [More Information Needed] -->
-# CI-FM: Cellular Interaction Foundation Model
 ## Overview
-This is the PyTorch implementation of the CI-FM model -- an AI model that can simulate the activities within a living tissue (AI virtual tissue).
-The current version of CI-FM has 138M parameters and is trained on around 23M cells of spatial genomics. The signature functions of CI-FM are:
 - **Embedding** of celllular microenvironments via ```embeddings = model.embed(adata)``` (the 1st Figure below panel D top);
 - **Inference/simulation** of cellular gene expressions within a certain microenvironment via ```expressions = model.predict_cells_at_locations(adata, target_locs)``` (the 1st Figure below panel D bottom, and the 2nd Figure below).

 - Library: ynyou/CIFM
 - Docs: [More Information Needed] -->
+# CIFM: Cellular Interaction Foundation Model
 ## Overview
+This is the PyTorch implementation of the CIFM model -- an AI model that can simulate the activities within a living tissue (AI virtual tissue).
+The current version of CIFM has 138M parameters and is trained on around 23M cells of spatial genomics. The signature functions of CIFM are:
 - **Embedding** of celllular microenvironments via ```embeddings = model.embed(adata)``` (the 1st Figure below panel D top);
 - **Inference/simulation** of cellular gene expressions within a certain microenvironment via ```expressions = model.predict_cells_at_locations(adata, target_locs)``` (the 1st Figure below panel D bottom, and the 2nd Figure below).

figures/autoregressive.gif CHANGED Viewed

figures/cifm.png CHANGED Viewed

Git LFS Details

SHA256: 57927c560bd4a3625814dbead09178ea660637c07f9e1a3ab5e7787678f5998f
Pointer size: 132 Bytes
Size of remote file: 1.64 MB

models_cifm/cifm.py CHANGED Viewed

@@ -19,12 +19,13 @@ class CIFM(
         super().__init__()
         self.gene_encoder = MLPBiasFree(in_dim=args.in_dim, out_dim=args.hidden_dim, hidden_dim=args.hidden_dim, num_layer=args.num_mlp_layers_in_module)
         self.model = VIEGNNModel(num_layers=args.num_layer, num_mlp_layers_in_module=args.num_mlp_layers_in_module,
-                emb_dim=args.hidden_dim, in_dim=args.hidden_dim, out_dim=args.hidden_dim, residual=True)
         self.mask_cell_decoder = VIEGNNModel(num_layers=args.num_layer, num_mlp_layers_in_module=args.num_mlp_layers_in_module,
                 emb_dim=args.hidden_dim, in_dim=args.hidden_dim, out_dim=args.hidden_dim, residual=False)
         self.mask_cell_expression = MLPBiasFree(in_dim=args.hidden_dim, out_dim=args.in_dim, hidden_dim=args.hidden_dim, num_layer=args.num_mlp_layers_in_module)
         self.mask_cell_dropout = MLPBiasFree(in_dim=args.hidden_dim, out_dim=args.in_dim, hidden_dim=args.hidden_dim, num_layer=args.num_mlp_layers_in_module)
         self.mask_embedding = nn.Embedding(1, args.hidden_dim)
         self.relu = nn.ReLU()
         self.sigmoid = nn.Sigmoid()

         super().__init__()
         self.gene_encoder = MLPBiasFree(in_dim=args.in_dim, out_dim=args.hidden_dim, hidden_dim=args.hidden_dim, num_layer=args.num_mlp_layers_in_module)
         self.model = VIEGNNModel(num_layers=args.num_layer, num_mlp_layers_in_module=args.num_mlp_layers_in_module,
+                emb_dim=args.hidden_dim, in_dim=args.hidden_dim, out_dim=args.hidden_dim, residual=False)
         self.mask_cell_decoder = VIEGNNModel(num_layers=args.num_layer, num_mlp_layers_in_module=args.num_mlp_layers_in_module,
                 emb_dim=args.hidden_dim, in_dim=args.hidden_dim, out_dim=args.hidden_dim, residual=False)
         self.mask_cell_expression = MLPBiasFree(in_dim=args.hidden_dim, out_dim=args.in_dim, hidden_dim=args.hidden_dim, num_layer=args.num_mlp_layers_in_module)
         self.mask_cell_dropout = MLPBiasFree(in_dim=args.hidden_dim, out_dim=args.in_dim, hidden_dim=args.hidden_dim, num_layer=args.num_mlp_layers_in_module)
         self.mask_embedding = nn.Embedding(1, args.hidden_dim)
+        self.proj = MLPBiasFree(in_dim=args.hidden_dim, out_dim=1, hidden_dim=args.hidden_dim, num_layer=4)
         self.relu = nn.ReLU()
         self.sigmoid = nn.Sigmoid()

test.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,82 +21,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "CIFM(\n",
-       "  (gene_encoder): MLPBiasFree(\n",
-       "    (layers): ModuleList(\n",
-       "      (0): Linear(in_features=18289, out_features=1024, bias=False)\n",
-       "      (1-3): 3 x Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "    )\n",
-       "    (layernorms): ModuleList(\n",
-       "      (0-2): 3 x LayerNorm((1024,), eps=1e-05, elementwise_affine=False)\n",
-       "    )\n",
-       "    (activation): ReLU()\n",
-       "  )\n",
-       "  (model): VIEGNNModel(\n",
-       "    (emb_in): Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "    (convs): ModuleList(\n",
-       "      (0-1): 2 x EGNNLayer(emb_dim=1024, aggr=sum)\n",
-       "    )\n",
-       "    (pred): MLPBiasFree(\n",
-       "      (layers): ModuleList(\n",
-       "        (0-3): 4 x Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "      )\n",
-       "      (layernorms): ModuleList(\n",
-       "        (0-2): 3 x LayerNorm((1024,), eps=1e-05, elementwise_affine=False)\n",
-       "      )\n",
-       "      (activation): ReLU()\n",
-       "    )\n",
-       "  )\n",
-       "  (mask_cell_decoder): VIEGNNModel(\n",
-       "    (emb_in): Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "    (convs): ModuleList(\n",
-       "      (0-1): 2 x EGNNLayer(emb_dim=1024, aggr=sum)\n",
-       "    )\n",
-       "    (pred): MLPBiasFree(\n",
-       "      (layers): ModuleList(\n",
-       "        (0-3): 4 x Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "      )\n",
-       "      (layernorms): ModuleList(\n",
-       "        (0-2): 3 x LayerNorm((1024,), eps=1e-05, elementwise_affine=False)\n",
-       "      )\n",
-       "      (activation): ReLU()\n",
-       "    )\n",
-       "  )\n",
-       "  (mask_cell_expression): MLPBiasFree(\n",
-       "    (layers): ModuleList(\n",
-       "      (0-2): 3 x Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "      (3): Linear(in_features=1024, out_features=18289, bias=False)\n",
-       "    )\n",
-       "    (layernorms): ModuleList(\n",
-       "      (0-2): 3 x LayerNorm((1024,), eps=1e-05, elementwise_affine=False)\n",
-       "    )\n",
-       "    (activation): ReLU()\n",
-       "  )\n",
-       "  (mask_cell_dropout): MLPBiasFree(\n",
-       "    (layers): ModuleList(\n",
-       "      (0-2): 3 x Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "      (3): Linear(in_features=1024, out_features=18289, bias=False)\n",
-       "    )\n",
-       "    (layernorms): ModuleList(\n",
-       "      (0-2): 3 x LayerNorm((1024,), eps=1e-05, elementwise_affine=False)\n",
-       "    )\n",
-       "    (activation): ReLU()\n",
-       "  )\n",
-       "  (mask_embedding): Embedding(1, 1024)\n",
-       "  (relu): ReLU()\n",
-       "  (sigmoid): Sigmoid()\n",
-       ")"
       ]
      },
-     "execution_count": 2,
      "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -123,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -145,7 +85,7 @@
    "source": [
     "adata = sc.read_h5ad('./adata.h5ad')\n",
     "adata.layers['counts'] = adata.X.copy()\n",
-    "sc.pp.normalize_total(adata)\n",
     "sc.pp.log1p(adata)\n",
     "adata"
    ]
@@ -163,14 +103,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "matching 18289 gene channels out of 18289 unmatched channels: []\n"
      ]
     }
    ],
@@ -194,14 +134,14 @@
     {
      "data": {
       "text/plain": [
-       "(tensor([[-0.4132, -0.9847,  0.1647,  ..., -0.8351, -0.8177, -1.3235],\n",
-       "         [ 0.8701,  0.0967, -0.3676,  ...,  0.2687, -1.4821,  0.1605],\n",
-       "         [-0.5178, -0.4442, -0.0862,  ..., -0.7446, -0.5761, -0.5571],\n",
        "         ...,\n",
-       "         [ 1.2264,  1.2326,  0.2791,  ...,  0.8018, -1.4069,  1.4567],\n",
-       "         [ 0.6699, -0.6107,  0.2450,  ..., -0.1975, -0.6034, -0.6608],\n",
-       "         [-1.9240, -1.8125, -0.0766,  ..., -0.2799, -0.0217, -2.2051]]),\n",
-       " torch.Size([13898, 1024]))"
       ]
      },
      "execution_count": 5,
@@ -224,23 +164,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(tensor([[0.0000, 0.0000, 0.8603,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "         [0.0000, 0.0000, 0.6644,  ..., 0.0000, 0.0000, 0.0000],\n",
        "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
        "         ...,\n",
-       "         [0.0000, 0.0000, 0.9809,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "         [0.6641, 0.0000, 0.6858,  ..., 0.0000, 0.0000, 0.0000],\n",
-       "         [0.4999, 0.0000, 0.5311,  ..., 0.0000, 0.0000, 0.0000]]),\n",
        " torch.Size([10, 18289]))"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -260,9 +200,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "# you can convert it into normalize counts\n",
     "counts_normalized = np.exp(expressions) - 1\n",

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "18d58ba0049e4560b7bd0916fbd6ea33",
+       "version_major": 2,
+       "version_minor": 0
+      },
       "text/plain": [
+       "model.safetensors:   0%|          | 0.00/569M [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
    "source": [
     "adata = sc.read_h5ad('./adata.h5ad')\n",
     "adata.layers['counts'] = adata.X.copy()\n",
+    "sc.pp.normalize_total(adata, target_sum=1e4)\n",
     "sc.pp.log1p(adata)\n",
     "adata"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "matching 18289 gene channels out of 18289 ; unmatched channels: []\n"
      ]
     }
    ],
     {
      "data": {
       "text/plain": [
+       "(tensor([[-0.4326, -0.8625,  0.1121,  ...,  0.4980,  0.3855, -0.1965],\n",
+       "         [-0.6833, -0.9950,  0.1927,  ..., -0.2064,  0.6193,  0.0387],\n",
+       "         [-0.2099, -0.9877,  0.3462,  ...,  0.2102,  0.6807, -0.2155],\n",
        "         ...,\n",
+       "         [-0.0187, -0.8444,  0.3058,  ...,  0.1030,  0.8362, -0.1859],\n",
+       "         [-0.5535, -0.8201,  0.7805,  ..., -0.1402,  0.5221, -0.3520],\n",
+       "         [-0.9339, -0.8467,  0.0600,  ...,  0.0406,  0.3608,  0.3418]]),\n",
+       " torch.Size([24844, 1024]))"
       ]
      },
      "execution_count": 5,
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "(tensor([[0.0000, 0.0000, 2.8781,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 2.9699,  ..., 0.0000, 0.0000, 0.0000],\n",
        "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
        "         ...,\n",
+       "         [0.0000, 0.0000, 3.2570,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]),\n",
        " torch.Size([10, 18289]))"
       ]
      },
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[0.0000, 0.0000, 0.0002,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 0.0002,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         ...,\n",
+       "         [0.0000, 0.0000, 0.0003,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
+       "         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]),\n",
+       " torch.Size([10, 18289]))"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# you can convert it into normalize counts\n",
     "counts_normalized = np.exp(expressions) - 1\n",