|
{ |
|
"architectures": [ |
|
"HrwkvHybridForCausalLM" |
|
], |
|
"description": "Hybrid-RWKV Strategically Interleaved RWKV-Attention", |
|
"base_model": "RekaAI/reka-flash-3.1", |
|
"model_revision": "", |
|
"transformer_layers": [8,15,22,29,36,43], |
|
"rwkv_layers": [0,1,2,3,4,5,6,7,9,10,11,12,13,14,16,17,18,19,20,21,23,24,25,26,27,28,30,31,32,33,34,35,37,38,39,40,41,42], |
|
"rwkv_architecture": "hxa079", |
|
"enable_qk_norm": false, |
|
"nope_in_transformer": true, |
|
"nope_in_rwkv": false, |
|
"lora_rank_decay": 320, |
|
"lora_rank_iclr":128, |
|
"lora_rank_value_residual_mix":96, |
|
"lora_rank_key_residual_mix":96, |
|
"lora_rank_gate":384, |
|
|
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 100257, |
|
"classifier_dropout": 0.0, |
|
"eos_token_id": 100257, |
|
"head_dim": 96, |
|
"hidden_act": "silu", |
|
"hidden_size": 6144, |
|
"id2label": { |
|
"0": "LABEL_0" |
|
}, |
|
"initializer_range": 0.006, |
|
"intermediate_size": 19648, |
|
"label2id": { |
|
"LABEL_0": 0 |
|
}, |
|
"max_position_embeddings": 98304, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 64, |
|
"num_hidden_layers": 44, |
|
"num_key_value_heads": 8, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 8000000, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.50.3", |
|
"use_cache": true, |
|
"vocab_size": 100352 |
|
} |
|
|