base_model: YOYO-AI/Qwen2.5-14B-1M-YOYO-V3 | |
dtype: bfloat16 | |
merge_method: slerp | |
parameters: | |
t: | |
- filter: self_attn | |
value: [0.7, 0.6, 0.5, 0.4, 0.3] # | |
- filter: mlp | |
value: [0.3, 0.4, 0.5, 0.6, 0.7] | |
- filter: input_layernorm|post_attention_layernorm | |
value: 0.6 | |
- value: 0.5 | |
slices: | |
- sources: | |
- layer_range: [0, 48] | |
model: YOYO-AI/Qwen2.5-14B-1M-YOYO-V3 | |
- layer_range: [0, 48] | |
model: Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7 | |