ylwt's picture
Update config.yaml
a28fd84 verified
arch:
type: TransformerLMHeadModel
args:
transformer_config:
type: TransformerDecoderOnlyModel
args:
embed_config:
type: TransformerEmbeddingBlock
args:
token_embed_config:
type: TokenEmbedding
args:
n_embed: 1024
n_vocab: 151936
pos_embed_config: null
type_embed_config: null
ln_config: null
p_drop_embed: 0.0
concat_strategy: id_first
decoder_config:
type: TransformerDecoderBlock
args:
attn_config:
type: LlamaAttention
args:
n_embed: 1024
n_pos: 32768
n_head: 16
n_key_value_head: 16
head_size: 64
p_drop_attn: 0.0
p_drop_resid: 0.0
bias_attn: true
bias_proj: false
cross_attn: false
scale_dot_product: true
scale_layer_wise: false
layer_idx: null
rope_config:
type: MistralRotaryEmbedding
args:
rotary_head_size: 64
n_pos: 32768
base: 1.0e06
scaling_type: null
scaling_factor: null
mlp_config:
type: LlamaMLP
args:
n_embed: 1024
n_inner: 2816
act_fn_config:
type: SiLUActivation
args: {}
ln_config:
type: LlamaRMSNorm
args:
n_embed: 1024
ln_eps: 1.0e-06
n_embed: 1024
post_norm: false
add_cross_attn: false
n_embed: 1024
n_layer: 24
n_head: 16
ln_config:
type: LlamaRMSNorm
args:
n_embed: 1024
ln_eps: 1.0e-06
perform_linear_bias: false
attn_window_size_loop_unit: null
lm_head_config:
type: TransformerLMHead
args:
n_vocab: 151936
n_embed: 1024
perform_transform: false
act_fn_config: null
ln_config: null