Upload folder using huggingface_hub
Browse files
FOREcasT_model/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
tags:
|
4 |
+
- generated_from_trainer
|
5 |
+
datasets:
|
6 |
+
- crispr_data
|
7 |
+
model-index:
|
8 |
+
- name: SX_ispymac_FOREcasT
|
9 |
+
results: []
|
10 |
+
---
|
11 |
+
|
12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
+
should probably proofread and complete it, then remove this comment. -->
|
14 |
+
|
15 |
+
# SX_ispymac_FOREcasT
|
16 |
+
|
17 |
+
This model is a fine-tuned version of [](https://huggingface.co/) on the crispr_data dataset.
|
18 |
+
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 50.9107
|
20 |
+
|
21 |
+
## Model description
|
22 |
+
|
23 |
+
More information needed
|
24 |
+
|
25 |
+
## Intended uses & limitations
|
26 |
+
|
27 |
+
More information needed
|
28 |
+
|
29 |
+
## Training and evaluation data
|
30 |
+
|
31 |
+
More information needed
|
32 |
+
|
33 |
+
## Training procedure
|
34 |
+
|
35 |
+
### Training hyperparameters
|
36 |
+
|
37 |
+
The following hyperparameters were used during training:
|
38 |
+
- learning_rate: 0.001
|
39 |
+
- train_batch_size: 100
|
40 |
+
- eval_batch_size: 100
|
41 |
+
- seed: 63036
|
42 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
43 |
+
- lr_scheduler_type: linear
|
44 |
+
- lr_scheduler_warmup_ratio: 0.05
|
45 |
+
- num_epochs: 30.0
|
46 |
+
|
47 |
+
### Training results
|
48 |
+
|
49 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
50 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
51 |
+
| 5348.4985 | 1.0 | 326 | 4421.2725 |
|
52 |
+
| 3199.7485 | 2.0 | 652 | 2104.5654 |
|
53 |
+
| 1490.7015 | 3.0 | 978 | 1002.8325 |
|
54 |
+
| 713.0995 | 4.0 | 1304 | 480.3265 |
|
55 |
+
| 344.2885 | 5.0 | 1630 | 238.0577 |
|
56 |
+
| 178.0734 | 6.0 | 1956 | 131.9894 |
|
57 |
+
| 106.8135 | 7.0 | 2282 | 87.4150 |
|
58 |
+
| 76.4049 | 8.0 | 2608 | 67.8651 |
|
59 |
+
| 62.8691 | 9.0 | 2934 | 58.8372 |
|
60 |
+
| 56.6853 | 10.0 | 3260 | 54.8777 |
|
61 |
+
| 53.8423 | 11.0 | 3586 | 53.1428 |
|
62 |
+
| 52.4774 | 12.0 | 3912 | 51.9998 |
|
63 |
+
| 51.8337 | 13.0 | 4238 | 51.5273 |
|
64 |
+
| 51.4675 | 14.0 | 4564 | 51.2515 |
|
65 |
+
| 51.3089 | 15.0 | 4890 | 51.1025 |
|
66 |
+
| 51.2186 | 16.0 | 5216 | 51.0217 |
|
67 |
+
| 51.1556 | 17.0 | 5542 | 51.0496 |
|
68 |
+
| 51.1167 | 18.0 | 5868 | 50.9584 |
|
69 |
+
| 51.1001 | 19.0 | 6194 | 50.9669 |
|
70 |
+
| 51.0948 | 20.0 | 6520 | 50.9908 |
|
71 |
+
| 51.0668 | 21.0 | 6846 | 50.9851 |
|
72 |
+
| 51.0647 | 22.0 | 7172 | 50.9715 |
|
73 |
+
| 51.0496 | 23.0 | 7498 | 50.9685 |
|
74 |
+
| 51.0475 | 24.0 | 7824 | 50.9484 |
|
75 |
+
| 51.0267 | 25.0 | 8150 | 50.9452 |
|
76 |
+
| 51.0162 | 26.0 | 8476 | 50.9290 |
|
77 |
+
| 51.0045 | 27.0 | 8802 | 50.9395 |
|
78 |
+
| 50.9944 | 28.0 | 9128 | 50.9163 |
|
79 |
+
| 50.9849 | 29.0 | 9454 | 50.9197 |
|
80 |
+
| 50.9731 | 30.0 | 9780 | 50.9107 |
|
81 |
+
|
82 |
+
|
83 |
+
### Framework versions
|
84 |
+
|
85 |
+
- Transformers 4.44.2
|
86 |
+
- Pytorch 2.4.0+cu124
|
87 |
+
- Datasets 2.21.0
|
88 |
+
- Tokenizers 0.19.1
|
FOREcasT_model/config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/home/ljw/sdc1/CRISPR_results/FOREcasT/SX_ispymac_FOREcasT",
|
3 |
"architectures": [
|
4 |
"FOREcasTModel"
|
5 |
],
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"FOREcasTModel"
|
4 |
],
|
FOREcasT_model/model.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig, PreTrainedModel
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
class FOREcasTConfig(PretrainedConfig):
|
7 |
+
model_type = "FOREcasT"
|
8 |
+
label_names = ["count"]
|
9 |
+
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
reg_const = 0.01, # regularization coefficient for insertion
|
13 |
+
i1_reg_const = 0.01, # regularization coefficient for deletion
|
14 |
+
seed = 63036, # random seed for intialization
|
15 |
+
**kwargs
|
16 |
+
):
|
17 |
+
self.reg_const = reg_const
|
18 |
+
self.i1_reg_const = i1_reg_const
|
19 |
+
self.seed = seed
|
20 |
+
super().__init__(**kwargs)
|
21 |
+
|
22 |
+
class FOREcasTModel(PreTrainedModel):
|
23 |
+
config_class = FOREcasTConfig
|
24 |
+
|
25 |
+
@staticmethod
|
26 |
+
def get_feature_label():
|
27 |
+
def features_pairwise_label(features1_label, features2_label):
|
28 |
+
features_label = []
|
29 |
+
for label1 in features1_label:
|
30 |
+
for label2 in features2_label:
|
31 |
+
features_label.append(f'PW_{label1}_vs_{label2}')
|
32 |
+
return features_label
|
33 |
+
feature_DelSize_label = ["Any Deletion", "D1", "D2-3", "D4-7", "D8-12", "D>12"]
|
34 |
+
feature_InsSize_label = ["Any Insertion", "I1", "I2"]
|
35 |
+
feature_DelLoc_label = ['DL-1--1', 'DL-2--2', 'DL-3--3', 'DL-4--6', 'DL-7--10', 'DL-11--15', 'DL-16--30', 'DL<-30', 'DL>=0', 'DR0-0', 'DR1-1', 'DR2-2', 'DR3-5', 'DR6-9', 'DR10-14', 'DR15-29', 'DR<0', 'DR>=30']
|
36 |
+
feature_InsSeq_label = ["I1_A", "I1_C", "I1_G", "I1_T", "I2_AA", "I2_AC", "I2_AG", "I2_AT", "I2_CA", "I2_CC", "I2_CG", "I2_CT", "I2_GA", "I2_GC", "I2_GG", "I2_GT", "I2_TA", "I2_TC", "I2_TG", "I2_TT"]
|
37 |
+
feature_InsLoc_label = ["IL-1--1", "IL-2--2", "IL-3--3", "IL<-3", "IL>=0"]
|
38 |
+
feature_LocalCutSiteSequence_label = []
|
39 |
+
for offset in range(-5, 4):
|
40 |
+
for nt in ["A", "G", "C", "T"]:
|
41 |
+
feature_LocalCutSiteSequence_label.append(f"CS{offset}_NT={nt}")
|
42 |
+
feature_LocalCutSiteSeqMatches_label = []
|
43 |
+
for offset1 in range(-3, 2):
|
44 |
+
for offset2 in range(-3, offset1):
|
45 |
+
for nt in ["A", "G", "C", "T"]:
|
46 |
+
feature_LocalCutSiteSeqMatches_label.append(f"M_CS{offset1}_{offset2}_NT={nt}")
|
47 |
+
feature_LocalRelativeSequence_label = []
|
48 |
+
for offset in range(-3, 3):
|
49 |
+
for nt in ["A", "G", "C", "T"]:
|
50 |
+
feature_LocalRelativeSequence_label.append(f'L{offset}_NT={nt}')
|
51 |
+
for offset in range(-3, 3):
|
52 |
+
for nt in ["A", "G", "C", "T"]:
|
53 |
+
feature_LocalRelativeSequence_label.append(f'R{offset}_NT={nt}')
|
54 |
+
feature_SeqMatches_label = []
|
55 |
+
for loffset in range(-3, 3):
|
56 |
+
for roffset in range(-3, 3):
|
57 |
+
feature_SeqMatches_label.append(f'X_L{loffset}_R{roffset}')
|
58 |
+
feature_SeqMatches_label.append(f'M_L{loffset}_R{roffset}')
|
59 |
+
feature_I1or2Rpt_label = ['I1Rpt', 'I1NonRpt', 'I2Rpt', 'I2NonRpt']
|
60 |
+
feature_microhomology_label = ['L_MH1-1', 'R_MH1-1', 'L_MH2-2', 'R_MH2-2', 'L_MH3-3', 'R_MH3-3', 'L_MM1_MH3-3', 'R_MM1_MH3-3', 'L_MH4-6', 'R_MH4-6', 'L_MM1_MH4-6', 'R_MM1_MH4-6', 'L_MH7-10', 'R_MH7-10', 'L_MM1_MH7-10', 'R_MM1_MH7-10', 'L_MH11-15', 'R_MH11-15', 'L_MM1_MH11-15', 'R_MM1_MH11-15', 'No MH']
|
61 |
+
return (
|
62 |
+
features_pairwise_label(feature_DelSize_label, feature_DelLoc_label) +
|
63 |
+
feature_InsSize_label +
|
64 |
+
feature_DelSize_label +
|
65 |
+
feature_DelLoc_label +
|
66 |
+
feature_InsLoc_label +
|
67 |
+
feature_InsSeq_label +
|
68 |
+
features_pairwise_label(feature_LocalCutSiteSequence_label, feature_InsSize_label + feature_DelSize_label) +
|
69 |
+
features_pairwise_label(feature_microhomology_label + feature_LocalRelativeSequence_label, feature_DelSize_label + feature_DelLoc_label) +
|
70 |
+
features_pairwise_label(feature_LocalCutSiteSeqMatches_label + feature_SeqMatches_label, feature_DelSize_label) +
|
71 |
+
features_pairwise_label(feature_InsSeq_label + feature_LocalCutSiteSequence_label + feature_LocalCutSiteSeqMatches_label, feature_I1or2Rpt_label) +
|
72 |
+
feature_I1or2Rpt_label +
|
73 |
+
feature_LocalCutSiteSequence_label +
|
74 |
+
feature_LocalCutSiteSeqMatches_label +
|
75 |
+
feature_LocalRelativeSequence_label +
|
76 |
+
feature_SeqMatches_label +
|
77 |
+
feature_microhomology_label
|
78 |
+
)
|
79 |
+
|
80 |
+
def __init__(self, config) -> None:
|
81 |
+
super().__init__(config)
|
82 |
+
# In more recent versions of PyTorch, you no longer need to explicitly register_parameter, it's enough to set a member of your nn.Module with nn.Parameter to "notify" pytorch that this variable should be treated as a trainable parameter (https://stackoverflow.com/questions/59234238/how-to-add-parameters-in-module-class-in-pytorch-custom-model).
|
83 |
+
self.generator = torch.Generator().manual_seed(config.seed)
|
84 |
+
is_delete = torch.tensor(['I' not in label for label in FOREcasTModel.get_feature_label()])
|
85 |
+
self.register_buffer('reg_coff', (is_delete * config.reg_const + ~is_delete * config.i1_reg_const))
|
86 |
+
self.linear = nn.Linear(in_features=len(self.reg_coff), out_features=1, bias=False)
|
87 |
+
self.initialize_weights()
|
88 |
+
|
89 |
+
def initialize_weights(self):
|
90 |
+
for m in self.modules():
|
91 |
+
if isinstance(m, nn.Linear):
|
92 |
+
nn.init.normal_(m.weight, mean=0, std=1, generator=self.generator)
|
93 |
+
if m.bias is not None:
|
94 |
+
nn.init.constant_(m.bias, 0)
|
95 |
+
|
96 |
+
def forward(self, feature, count=None) -> torch.Tensor:
|
97 |
+
logit = self.linear(feature).squeeze()
|
98 |
+
if count is not None:
|
99 |
+
return {
|
100 |
+
"logit": logit,
|
101 |
+
"loss": self.kl_divergence(logit, count)
|
102 |
+
}
|
103 |
+
return {"logit": logit}
|
104 |
+
|
105 |
+
def kl_divergence(self, logit, count):
|
106 |
+
return F.kl_div(
|
107 |
+
F.log_softmax(logit, dim=-1),
|
108 |
+
F.normalize(count + 0.5, p=1.0, dim=-1), # add 0.5 to prevent log(0), see loadOligoFeaturesAndReadCounts
|
109 |
+
reduction='sum'
|
110 |
+
) + logit.shape[0] * (self.reg_coff * (self.linear.weight ** 2)).sum()
|
FOREcasT_model/runs/Nov22_09-23-55_ljw-System-Product-Name/events.out.tfevents.1732238639.ljw-System-Product-Name.7114.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed325373d7b951d73e184d2af92c077947a22c7e715472ab8f68d3f946c22c35
|
3 |
+
size 19361
|
FOREcasT_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f40f824eac3f47aa7f2c23d3f36d4ec4956c4b4b4149c8d4f1f110f5d33d1396
|
3 |
+
size 5304
|