Add model files
Browse files- modeling_custom.py +61 -0
modeling_custom.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PreTrainedModel, PretrainedConfig
|
2 |
+
from transformers.modeling_outputs import BaseModelOutput
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from models.model.transformer import Transformer
|
6 |
+
from models.model.sparse_autoencoder import SparseAutoencoder
|
7 |
+
|
8 |
+
class CustomConfig(PretrainedConfig):
|
9 |
+
model_type = "custom_model"
|
10 |
+
|
11 |
+
def __init__(self, hidden_size=768, num_attention_heads=12, num_hidden_layers=12, intermediate_size=3072, hidden_dropout_prob=0.1, num_act_classes=5, num_emotion_classes=7, **kwargs):
|
12 |
+
super().__init__(**kwargs)
|
13 |
+
self.hidden_size = hidden_size
|
14 |
+
self.num_attention_heads = num_attention_heads
|
15 |
+
self.num_hidden_layers = num_hidden_layers
|
16 |
+
self.intermediate_size = intermediate_size
|
17 |
+
self.hidden_dropout_prob = hidden_dropout_prob
|
18 |
+
self.num_act_classes = num_act_classes
|
19 |
+
self.num_emotion_classes = num_emotion_classes
|
20 |
+
|
21 |
+
class CustomModel(PreTrainedModel):
|
22 |
+
config_class = CustomConfig
|
23 |
+
|
24 |
+
def __init__(self, config):
|
25 |
+
super().__init__(config)
|
26 |
+
self.transformer = Transformer(
|
27 |
+
src_pad_idx=0,
|
28 |
+
trg_pad_idx=0,
|
29 |
+
trg_sos_idx=101,
|
30 |
+
enc_voc_size=30522,
|
31 |
+
dec_voc_size=30522,
|
32 |
+
d_model=config.hidden_size,
|
33 |
+
max_len=128,
|
34 |
+
ffn_hidden=config.intermediate_size,
|
35 |
+
n_head=config.num_attention_heads,
|
36 |
+
n_layers=config.num_hidden_layers,
|
37 |
+
drop_prob=config.hidden_dropout_prob,
|
38 |
+
device='cuda' if torch.cuda.is_available() else 'cpu'
|
39 |
+
)
|
40 |
+
|
41 |
+
self.batch_norm = nn.BatchNorm1d(config.hidden_size)
|
42 |
+
self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
|
43 |
+
self.act_classifier = nn.Linear(config.hidden_size, config.num_act_classes)
|
44 |
+
self.emotion_classifier = nn.Linear(config.hidden_size, config.num_emotion_classes)
|
45 |
+
self.sparse_autoencoder = SparseAutoencoder(
|
46 |
+
input_size=config.hidden_size,
|
47 |
+
hidden_size=config.hidden_size // 2,
|
48 |
+
sparsity_param=0.05,
|
49 |
+
beta=3
|
50 |
+
)
|
51 |
+
self.init_weights()
|
52 |
+
|
53 |
+
def forward(self, input_ids=None, attention_mask=None, **kwargs):
|
54 |
+
transformer_output = self.transformer.encoder(input_ids, attention_mask)
|
55 |
+
transformer_output = self.batch_norm(transformer_output.view(-1, transformer_output.size(-1)))
|
56 |
+
transformer_output = self.dropout(transformer_output)
|
57 |
+
reconstructed, kl_div, encoded = self.sparse_autoencoder(transformer_output)
|
58 |
+
cls_output = reconstructed[:, 0, :]
|
59 |
+
act_output = self.act_classifier(cls_output)
|
60 |
+
emotion_output = self.emotion_classifier(cls_output)
|
61 |
+
return BaseModelOutput(last_hidden_state=cls_output, act_output=act_output, emotion_output=emotion_output, kl_div=kl_div)
|