AvivBick commited on
Commit
5398256
·
verified ·
1 Parent(s): fbf6cf9

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +25 -1
config.json CHANGED
@@ -1 +1,25 @@
1
- {"name": "LayeredMambaLM", "input": {"vocab_size": 128256, "tie_embeddings": true, "pad_vocab_size_multiple": 8, "lm_head_bias": false}, "MixerModel": {"name": "MixerModel", "input": {"d_model": 3072, "n_layer": 28, "lm_head_prenorm": "rms"}, "Blocks": [{"name": "LlamaBlock", "n_layers": 28, "input": {"resid_dropout": 0.0, "mlp_intermediate_size": 8192, "mlp_act_fn": "silu"}, "Layer": {"name": "DiscreteMamba2", "input": {"d_state": 64, "n_qk_heads": 32, "n_v_heads": 32, "expand": 1, "chunk_size": 128, "activation": "identity", "use_ref_impl": false, "bias": false, "norm_cls": "none", "initializer": {"a_log": "default", "x": "default", "B": "default", "C": "default", "D": "default", "z": "identity", "out": "default", "convolution": "identity"}}}}]}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llamba",
3
+ "vocab_size": 128256,
4
+ "tie_embeddings": true,
5
+ "pad_vocab_size_multiple": 8,
6
+ "lm_head_bias": false,
7
+ "d_model": 3072,
8
+ "n_layer": 28,
9
+ "resid_dropout": 0.0,
10
+ "norm_epsilon": 1e-5,
11
+ "mlp_cfg": {
12
+ "intermediate_size": 8192,
13
+ "bias": false,
14
+ "act_fn": "silu"
15
+ },
16
+ "ssm_cfg": {
17
+ "d_state": 64,
18
+ "n_v_heads": 32,
19
+ "n_qk_heads": 32,
20
+ "expand": 1,
21
+ "chunk_size": 128,
22
+ "activation": "identity",
23
+ "bias": false
24
+ }
25
+ }