|
from transformers import PretrainedConfig
|
|
|
|
class transformerConfig(PretrainedConfig):
|
|
model_type = "custom_transformer"
|
|
|
|
def __init__(
|
|
self,
|
|
src_vocab_len : int =184,
|
|
tgt_vocab : int =201,
|
|
num_hiddens : int =32,
|
|
num_layers : int =2,
|
|
dropout : int =0.1,
|
|
batch_size : int =64,
|
|
num_steps : int =10,
|
|
lr : int =0.005,
|
|
num_epochs : int =200,
|
|
|
|
ffn_num_input : int =32,
|
|
ffn_num_hiddens : int =64,
|
|
num_heads : int =4,
|
|
key_size : int =32,
|
|
query_size : int =32,
|
|
value_size : int =32,
|
|
norm_shape : int =[32],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
**kwargs,
|
|
):
|
|
|
|
|
|
|
|
|
|
self.src_vocab_len = src_vocab_len
|
|
self.tgt_vocab = tgt_vocab
|
|
self.num_hiddens = num_hiddens
|
|
self.num_layers = num_layers
|
|
self.dropout = dropout
|
|
self.batch_size = batch_size
|
|
self.num_steps = num_steps
|
|
self.lr = lr
|
|
self.num_epochs = num_epochs
|
|
self.ffn_num_input = ffn_num_input
|
|
self.ffn_num_hiddens = ffn_num_hiddens
|
|
self.num_heads = num_heads
|
|
self.key_size = key_size
|
|
self.query_size = query_size
|
|
self.value_size = value_size
|
|
self.norm_shape = norm_shape
|
|
|
|
super().__init__(**kwargs) |