{ "LanguageModel":{ "type":"LayeredMambaLM", "input":{ "vocab_size":51200, "pad_vocab_size_multiple":8 } }, "MixerModel":{ "type":"LayerMixerModel", "input":{ "d_model":2048, "n_layer":24, "lm_head_prenorm":"layer" } }, "Block1":{ "n_layers":4, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } }, "Block2":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.phi_attention", "core_input":{ } }, "Block3":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } }, "Block4":{ "n_layers":4, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "nheads":32, "expand":1, "activation":"identity", "use_ref_impl":false, "bias":false, "norm_cls":"none" } }, "Block5":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.phi_attention", "core_input":{ } }, "Block6":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } }, "Block7":{ "n_layers":4, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } }, "Block8":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.phi_attention", "core_input":{ } }, "Block9":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } }, "Block10":{ "n_layers":4, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } }, "Block11":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.phi_attention", "core_input":{ } }, "Block12":{ "n_layers":1, "BlockType":"modules.phi_block", "block_input":{ "resid_dropout":0.0 }, "CoreType":"modules.mixers.discrete_mamba2", "core_input":{ "d_state":64, "n_v_heads":32, "n_qk_heads":32, "d_conv":4, "conv_bias":true, "expand":1, "chunk_size":128, "activation":"identity", "bias":false } } }