Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

inDelphi_model/README.md +88 -0
inDelphi_model/config.json +0 -1
inDelphi_model/insertion_model.pkl +3 -0
inDelphi_model/model.py +104 -0
inDelphi_model/runs/Nov20_09-57-19_ljw-System-Product-Name/events.out.tfevents.1732067840.ljw-System-Product-Name.1186644.0 +3 -0
inDelphi_model/training_args.bin +3 -0

inDelphi_model/README.md ADDED Viewed

	@@ -0,0 +1,88 @@

+---
+library_name: transformers
+tags:
+- generated_from_trainer
+datasets:
+- crispr_data
+model-index:
+- name: SX_ispymac_inDelphi
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# SX_ispymac_inDelphi
+This model is a fine-tuned version of [](https://huggingface.co/) on the crispr_data dataset.
+It achieves the following results on the evaluation set:
+- Loss: -100.9903
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 100
+- eval_batch_size: 100
+- seed: 63036
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.05
+- num_epochs: 30.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| -79.5476      | 1.0   | 326  | -92.0552        |
+| -99.7202      | 2.0   | 652  | -100.0530       |
+| -101.9133     | 3.0   | 978  | -100.5825       |
+| -102.2183     | 4.0   | 1304 | -100.7356       |
+| -102.3128     | 5.0   | 1630 | -100.6958       |
+| -102.3754     | 6.0   | 1956 | -100.9292       |
+| -102.4144     | 7.0   | 2282 | -100.9071       |
+| -102.422      | 8.0   | 2608 | -100.9715       |
+| -102.4247     | 9.0   | 2934 | -100.7776       |
+| -102.4267     | 10.0  | 3260 | -100.8441       |
+| -102.4171     | 11.0  | 3586 | -100.9532       |
+| -102.4438     | 12.0  | 3912 | -100.9826       |
+| -102.4623     | 13.0  | 4238 | -100.9485       |
+| -102.4658     | 14.0  | 4564 | -100.9664       |
+| -102.4692     | 15.0  | 4890 | -100.9808       |
+| -102.4732     | 16.0  | 5216 | -100.9686       |
+| -102.468      | 17.0  | 5542 | -100.9455       |
+| -102.4662     | 18.0  | 5868 | -100.9530       |
+| -102.4805     | 19.0  | 6194 | -100.9872       |
+| -102.4865     | 20.0  | 6520 | -100.8664       |
+| -102.4719     | 21.0  | 6846 | -100.9966       |
+| -102.4959     | 22.0  | 7172 | -100.9938       |
+| -102.4963     | 23.0  | 7498 | -100.9695       |
+| -102.4953     | 24.0  | 7824 | -100.9951       |
+| -102.5011     | 25.0  | 8150 | -100.9904       |
+| -102.5003     | 26.0  | 8476 | -100.9612       |
+| -102.5097     | 27.0  | 8802 | -100.9850       |
+| -102.5167     | 28.0  | 9128 | -100.9925       |
+| -102.5139     | 29.0  | 9454 | -100.9887       |
+| -102.517      | 30.0  | 9780 | -100.9903       |
+### Framework versions
+- Transformers 4.44.2
+- Pytorch 2.4.0+cu124
+- Datasets 2.21.0
+- Tokenizers 0.19.1

inDelphi_model/config.json CHANGED Viewed

@@ -1,6 +1,5 @@
 {
   "DELLEN_LIMIT": 60,
-  "_name_or_path": "/home/ljw/sdc1/CRISPR_results/inDelphi/SX_ispymac_inDelphi",
   "architectures": [
     "inDelphiModel"
   ],

 {
   "DELLEN_LIMIT": 60,
   "architectures": [
     "inDelphiModel"
   ],

inDelphi_model/insertion_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83f2cf9363b2a32d6e8267bf5df06ac9bbc7e6a231996bcbbd243eaa3768d3d3
+size 2868606

inDelphi_model/model.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from transformers import PretrainedConfig, PreTrainedModel
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+class inDelphiConfig(PretrainedConfig):
+    model_type = "inDelphi"
+    label_names = ["genotype_count", "total_del_len_count"]
+    def __init__(
+        self,
+        DELLEN_LIMIT = 60, # the upper limit of deletion length (strictly less than DELLEN_LIMIT)
+        mid_dim = 16, # the size of middle layer of MLP
+        seed = 63036, # random seed for intialization
+        **kwargs,
+    ):
+        self.DELLEN_LIMIT = DELLEN_LIMIT
+        self.mid_dim = mid_dim
+        self.seed = seed
+        super().__init__(**kwargs)
+class inDelphiModel(PreTrainedModel):
+    config_class = inDelphiConfig
+    def __init__(self, config):
+        super().__init__(config)
+        # In more recent versions of PyTorch, you no longer need to explicitly register_parameter, it's enough to set a member of your nn.Module with nn.Parameter to "notify" pytorch that this variable should be treated as a trainable parameter (https://stackoverflow.com/questions/59234238/how-to-add-parameters-in-module-class-in-pytorch-custom-model).
+        self.generator = torch.Generator().manual_seed(config.seed)
+        self.DELLEN_LIMIT = config.DELLEN_LIMIT
+        self.register_buffer('del_lens', torch.arange(1, config.DELLEN_LIMIT, dtype=torch.float32))
+        self.mh_in_layer = nn.Linear(in_features=2, out_features=config.mid_dim)
+        self.mh_mid_layer = nn.Linear(in_features=config.mid_dim, out_features=config.mid_dim)
+        self.mh_out_layer = nn.Linear(in_features=config.mid_dim, out_features=1)
+        self.mhless_in_layer = nn.Linear(in_features=1, out_features=config.mid_dim)
+        self.mhless_mid_layer = nn.Linear(in_features=config.mid_dim, out_features=config.mid_dim)
+        self.mhless_out_layer = nn.Linear(in_features=config.mid_dim, out_features=1)
+        self.mid_active = self.sigmoid
+        self.out_active = self.logit_to_weight
+        self.initialize_weights()
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, mean=0, std=1, generator=self.generator)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, mh_input, mh_del_len, genotype_count=None, total_del_len_count=None):
+        batch_size = mh_input.shape[0]
+        mh_weight = self.mh_in_layer(mh_input)
+        mh_weight = self.mid_active(mh_weight)
+        mh_weight = self.mh_mid_layer(mh_weight)
+        mh_weight = self.mid_active(mh_weight)
+        mh_weight = self.mh_out_layer(mh_weight)
+        mh_weight = self.out_active(mh_weight, mh_del_len)
+        mhless_weight = self.mhless_in_layer(self.del_lens[:, None])
+        mhless_weight = self.mid_active(mhless_weight)
+        mhless_weight = self.mhless_mid_layer(mhless_weight)
+        mhless_weight = self.mid_active(mhless_weight)
+        mhless_weight = self.mhless_out_layer(mhless_weight)
+        mhless_weight = self.out_active(mhless_weight, self.del_lens)
+        total_del_len_weight = torch.zeros(batch_size, mhless_weight.shape[0] + 1, dtype=mh_weight.dtype, device=mh_weight.device).scatter_add(dim=1, index=mh_del_len - 1, src=mh_weight)[:, :-1] + mhless_weight
+        if genotype_count is not None and total_del_len_count is not None:
+            loss = self.negative_correlation(mh_weight, mhless_weight, total_del_len_weight, genotype_count, total_del_len_count)
+            return {
+                "mh_weight": mh_weight,
+                "mhless_weight": mhless_weight,
+                "total_del_len_weight": total_del_len_weight,
+                "loss": loss
+            }
+        return {
+            "mh_weight": mh_weight,
+            "mhless_weight": mhless_weight,
+            "total_del_len_weight": total_del_len_weight
+        }
+    def logit_to_weight(self, logits, del_lens):
+        return torch.exp(logits.squeeze() - 0.25 * del_lens) * (del_lens < self.DELLEN_LIMIT)
+    def sigmoid(self, x):
+        return 0.5 * (F.tanh(x) + 1)
+    def negative_correlation(self, mh_weight, mhless_weight, total_del_len_weight, genotype_count, total_del_len_count):
+        batch_size = mh_weight.shape[0]
+        genotype_pearson = (
+            F.normalize(
+                torch.cat(
+                    (mh_weight, mhless_weight.expand(batch_size, -1)),
+                    dim = 1
+                ),
+                p=2.0,
+                dim=1
+            ) *
+            F.normalize(genotype_count, p=2.0, dim=1)
+        ).sum()
+        total_del_len_pearson = (
+            F.normalize(total_del_len_weight, p=2.0, dim=1) *
+            F.normalize(total_del_len_count, p=2.0, dim=1)
+        ).sum()
+        return -genotype_pearson - total_del_len_pearson

inDelphi_model/runs/Nov20_09-57-19_ljw-System-Product-Name/events.out.tfevents.1732067840.ljw-System-Product-Name.1186644.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b607194c1fbfaf19b662444c2a98ed15bc1e83486249d67666d55ea1ba1362d
+size 19414

inDelphi_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8b1e3b72b95617cf6e0f5e9723ac37039c4f7f61f2f73f8cf461633a17b0a4b
+size 5304