base_model: Qwen/Qwen2-1.5B gate_mode: random dtype: bfloat16 shared_experts: - source_model: Qwen/Qwen2-1.5B experts: - source_model: Qwen/Qwen2-1.5B - source_model: Qwen/Qwen2-1.5B