custom-transformer / configuration_transformer.py
bu1's picture
Upload model
6ce1a2a verified
from transformers import PretrainedConfig
class transformerConfig(PretrainedConfig):
model_type = "custom_transformer"
def __init__(
self,
src_vocab_len : int =184,
tgt_vocab : int =201,
num_hiddens : int =32,
num_layers : int =2,
dropout : int =0.1,
batch_size : int =64,
num_steps : int =10,
lr : int =0.005,
num_epochs : int =200,
# device=d2l.try_gpu(),
ffn_num_input : int =32,
ffn_num_hiddens : int =64,
num_heads : int =4,
key_size : int =32,
query_size : int =32,
value_size : int =32,
norm_shape : int =[32],
# block_type="bottleneck",
# layers: List[int] = [3, 4, 6, 3],
# num_classes: int = 1000,
# input_channels: int = 3,
# cardinality: int = 1,
# base_width: int = 64,
# stem_width: int = 64,
# stem_type: str = "",
# avg_down: bool = False,
**kwargs,
):
# if block_type not in ["basic", "bottleneck"]:
# raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
# if stem_type not in ["", "deep", "deep-tiered"]:
# raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
self.src_vocab_len = src_vocab_len
self.tgt_vocab = tgt_vocab
self.num_hiddens = num_hiddens
self.num_layers = num_layers
self.dropout = dropout
self.batch_size = batch_size
self.num_steps = num_steps
self.lr = lr
self.num_epochs = num_epochs
self.ffn_num_input = ffn_num_input
self.ffn_num_hiddens = ffn_num_hiddens
self.num_heads = num_heads
self.key_size = key_size
self.query_size = query_size
self.value_size = value_size
self.norm_shape = norm_shape
super().__init__(**kwargs)