File size: 478 Bytes
7d37d64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
base_model: Qwen/QwQ-32B
gate_mode: random
architecture: qwen
dtype: bfloat16
experts:
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
  - source_model: Qwen/QwQ-32B
shared_experts:
  - source_model: Qwen/QwQ-32B
    residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model