kewang2 commited on
Commit
84b299a
·
verified ·
1 Parent(s): 6cc0914

update safetensors (#4)

Browse files

- update safetensors (a9a066a1b6adde77ae4311f5c6ff8215ae0cc43d)

model-00001-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec367513a1319878a3fb158343a1f427e431abc1d794897c766f59afa35fb31
3
  size 4580626288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf57efc8312d2b40047468e02fcfa5dea83e093324d6e3ae8c605585b35e865
3
  size 4580626288
model-00002-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:214e4f99fd8fb48c5a89d56af1f8013620fd7c8b79f50492c55dd18613711be0
3
  size 4316163132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c89b697ba20329c0817407347be3bdc30dc9b9b12109ac14d1bbcb1242a2858
3
  size 4316163132
model-00003-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b186f92f960c9234defab1c6792719d26b3bc7b93279037832e89fd6d2ad7c9
3
  size 4316163132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f271827bbe627d55e3e55ee2faf2537dde193e0ad5bbdc253326b7d62a4945
3
  size 4316163132
model-00004-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4230d8e2a948534fcd8a804a31fe15bac0a1ac493d99f5522214c02dc5c4d7fb
3
  size 4404465896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38a85f21294a9ec8d8a3d21bb8829a236eccb9012c6f06e6488cfa1e0e5317c
3
  size 4404465896
model-00005-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e10c728188282b4282663a310d1c730807dddacad8eb75b27de2484fe292693
3
  size 4316163132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c468d4604dfb310dd9ef00b5ae9af3fb92c272665e2adbb89166f026acebaf5
3
  size 4316163132
model-00006-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b36cd359b17e2cf6359e93fa481b650a8f06129bbaa03abe3b677b4bb1894e5
3
  size 4316163132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95489aee501efa03690232c580e076abe7ecaf8e418dd748370a69a9bdd67dd
3
  size 4316163132
model-00007-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5444da9522151cf4fb9c70d654c0b2a5a10d82531e7a7d93dae3eaf7bd1588e
3
  size 4404465896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51f69ec461af84d18a47b929f32c96155394118aad2cc41458a13604d307e7e5
3
  size 4404465896
model-00008-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3436de4ac57ec20d06ffb43ae8770390bae134ee4ecee760529bede40f1c154
3
  size 4316163140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23956280445176270e10a468aa01df6806e803856a5b8742212803a1201c108f
3
  size 4316163140
model-00009-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86288e09e95668acba93a9aa4bd50e0e901f6589f1ad708d4c77952f40b7eb7b
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddba3f0439acd2d9e9df455013074d9a26b05dc2ddf768295b2bbf49da3b2b6
3
  size 4316163148
model-00010-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d180831fa09b54b07255865cd11f7a8ff45b765db558170c5e95c6eb3798130a
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc568763d6d779f8178201a9dee494b8b46cb77fa2bea5a563182f0091a2f6d4
3
  size 4404465928
model-00011-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9d43e791bf9249e1a6bfcffb35d3d1961c9a3cf01e60146bb94657a44c952b
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc951e498a80a690b2363074f47b6fe0a6ff88c63ee552eebc7cd707eef6a82
3
  size 4316163148
model-00012-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95e586bcbf6f60a3d2af9ff0bebf3892bdc0ac30d6266273953c06569b369c7e
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f36d087f5727324284c1c42956e383c8f5d739193a5fd2e3b8ee454c9c9edd7
3
  size 4316163148
model-00013-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f218ab25afb5decd2c70dcc9e371a9b27551de621eb54394120d30ba27bcdff9
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:372550c9341bc79e72ca877aa52357ac4af6fabd9c7341ea316139574f881b8c
3
  size 4404465928
model-00014-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81675d1662d35ead898c8214b1e9189dd405756cc3021ffe09ad1697308f4f8e
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e3deb1415d30dd21eadb6a8d5e227a975e1cdb734edf60c1b69260dd66b2440
3
  size 4316163148
model-00015-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de0afd542a68df60b2c8694e000c0bd1f341d7b2b037e0b22a2c252fa34e6327
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15d7267d65cd7ace9948c3827dbc69cdc37ddaee8ed7121e42960e44980c786
3
  size 4316163148
model-00016-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c462b983ba00eea8298b3529d3dbddbdca2a1710dcdd7e145bdfee590f9f216
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c1812b2649b4fe477d850ee55ec11f3513a410319ce646ba5ee7ef9bae64b0b
3
  size 4404465928
model-00017-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1762e1763fdd25635f90ea14b4a2ac3b4f0e9bdea3ba169464b9fcf5df5e640b
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237480227d0d80a219b5554a0d52f866d0cab4651e91e1873498344fb5c5b77d
3
  size 4316163148
model-00018-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3a7b858cf5dd2c4a0443c12c3f7934695c0d4c165896210092a445410960450
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acda466f31ac834c7a3e14e7588ae21fcd25fbc2b2c1c949787252d802c3bbd
3
  size 4316163148
model-00019-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a83957fc4d108717308a4722bc93ce6079874e47d4662f7c5ee840e809d971e6
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e18a26e43e65f754485d21bb457910f000b5a5a3e5dfab94ed221d9575a064a6
3
  size 4404465928
model-00020-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28bf912a14f4213e0d352f0cadf52a9bf5e15ede36b6c00f4d7ba7584e29d9ab
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:275d49469f1c6fe973f55f3070bb3042bf9f803f156bf6079dbff0d507a84d1e
3
  size 4316163148
model-00021-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f4be79a690a53de7ce9af3dfc0bfa0b10972e0561561eedee6acff441715563
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70147bb9e6137f46380caf2d9e3f5851a4028824aef1e7539a4819cacdcc60f9
3
  size 4316163148
model-00022-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b06b77c9c1d56b4578f9e2d31f99b82cd20c07628d130fee469c0e8fdb72958
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a994f2c89910d0e03ba9a0436092451475612c00985e78eeb5cd097c3325ccec
3
  size 4404465928
model-00023-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ed89844123fe82011483e19f7e030e6d689d4c7b28c5e1be76d0b12a3f606d1
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63698e7f206c8b46c33f50c0bb501f46814846eca8b4131828b37b983e23c198
3
  size 4316163148
model-00024-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8350b59292272e1a93e6938d0806a13f7a2c0614124430b3f119d19b071e9da
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e237058b37e165b52de38125b67e327ff1b2e148eeecf4bfcb174b4971e90832
3
  size 4316163148
model-00025-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d36547baac359bc6778a0319f6557ed95d491a8c0803b8e8f07a98bf21d83e6
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e46bb3504ff644b7c175072b52d9c976953c1b28516b1648ee31cc031275483b
3
  size 4404465928
model-00026-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c3ee65cb30d135da801f00f5d096066d5df3d91355486ea2d55f8606ba27d8
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f70ecb615a00beaf1e388c7186685ed7a6e60562590e8440beba4ab77dd7db
3
  size 4316163148
model-00027-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf5e4ce8ef7170c1834079e5fdf355d653a16ed97ed4125730f04191c2a7eb03
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc46949078ee225020630210cb4cfadba32e411fb77fb4eb3061d39e22952dba
3
  size 4316163148
model-00028-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d58f4bb5b36cf5bc14593a428e875fb41105cd5be772e9d0d054501f78177bd
3
  size 4404465928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ed68f8ecee354bb6ae0c9b4220011f237be526c94e4d00d1ffd941e6ac3482
3
  size 4404465928
model-00029-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da1e00a52cec0e2030cd496c6acfbdc5aa1856ed5159c45f82920c491f2078fe
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda502f90445769a3234e6691429639af26c9d14b17119755c59195182684aa7
3
  size 4316163148
model-00030-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acaefde94e3e4f4348b82e8625528308f565428f7c38d21c0fe71c0c0c19570d
3
  size 4316163148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a514517f20ac8e3fe06f5954c2f7332719d5c9774a472a3a3bc3f96a86c824d
3
  size 4316163148
model-00031-of-00031.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:492b5a38c9d4ecadf247290f0f34ba7a46f5b20e2afdb53537782d6205ab6b83
3
  size 2290102976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bc0b65cc4b5bc75d20a0fb6e20a3ec1264033fc1682928cae9b3acca9db32f7
3
  size 2290102976
model.safetensors.index.json CHANGED
@@ -4,15 +4,15 @@
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00031-of-00031.safetensors",
7
- "transformer.blocks.0.ffn.experts.mlp.v1.input_scale": "model-00001-of-00031.safetensors",
8
- "transformer.blocks.0.ffn.experts.mlp.v1.weight": "model-00001-of-00031.safetensors",
9
- "transformer.blocks.0.ffn.experts.mlp.v1.weight_scale": "model-00001-of-00031.safetensors",
10
- "transformer.blocks.0.ffn.experts.mlp.w1.input_scale": "model-00001-of-00031.safetensors",
11
- "transformer.blocks.0.ffn.experts.mlp.w1.weight": "model-00001-of-00031.safetensors",
12
- "transformer.blocks.0.ffn.experts.mlp.w1.weight_scale": "model-00001-of-00031.safetensors",
13
- "transformer.blocks.0.ffn.experts.mlp.w2.input_scale": "model-00001-of-00031.safetensors",
14
- "transformer.blocks.0.ffn.experts.mlp.w2.weight": "model-00001-of-00031.safetensors",
15
- "transformer.blocks.0.ffn.experts.mlp.w2.weight_scale": "model-00001-of-00031.safetensors",
16
  "transformer.blocks.0.ffn.router.layer.weight": "model-00001-of-00031.safetensors",
17
  "transformer.blocks.0.norm_attn_norm.attn.Wqkv.input_scale": "model-00001-of-00031.safetensors",
18
  "transformer.blocks.0.norm_attn_norm.attn.Wqkv.weight": "model-00001-of-00031.safetensors",
@@ -23,15 +23,15 @@
23
  "transformer.blocks.0.norm_attn_norm.attn.out_proj.weight_scale": "model-00001-of-00031.safetensors",
24
  "transformer.blocks.0.norm_attn_norm.norm_1.weight": "model-00001-of-00031.safetensors",
25
  "transformer.blocks.0.norm_attn_norm.norm_2.weight": "model-00001-of-00031.safetensors",
26
- "transformer.blocks.1.ffn.experts.mlp.v1.input_scale": "model-00002-of-00031.safetensors",
27
- "transformer.blocks.1.ffn.experts.mlp.v1.weight": "model-00002-of-00031.safetensors",
28
- "transformer.blocks.1.ffn.experts.mlp.v1.weight_scale": "model-00002-of-00031.safetensors",
29
- "transformer.blocks.1.ffn.experts.mlp.w1.input_scale": "model-00002-of-00031.safetensors",
30
- "transformer.blocks.1.ffn.experts.mlp.w1.weight": "model-00002-of-00031.safetensors",
31
- "transformer.blocks.1.ffn.experts.mlp.w1.weight_scale": "model-00002-of-00031.safetensors",
32
- "transformer.blocks.1.ffn.experts.mlp.w2.input_scale": "model-00002-of-00031.safetensors",
33
- "transformer.blocks.1.ffn.experts.mlp.w2.weight": "model-00002-of-00031.safetensors",
34
- "transformer.blocks.1.ffn.experts.mlp.w2.weight_scale": "model-00002-of-00031.safetensors",
35
  "transformer.blocks.1.ffn.router.layer.weight": "model-00001-of-00031.safetensors",
36
  "transformer.blocks.1.norm_attn_norm.attn.Wqkv.input_scale": "model-00001-of-00031.safetensors",
37
  "transformer.blocks.1.norm_attn_norm.attn.Wqkv.weight": "model-00001-of-00031.safetensors",
@@ -42,15 +42,15 @@
42
  "transformer.blocks.1.norm_attn_norm.attn.out_proj.weight_scale": "model-00001-of-00031.safetensors",
43
  "transformer.blocks.1.norm_attn_norm.norm_1.weight": "model-00001-of-00031.safetensors",
44
  "transformer.blocks.1.norm_attn_norm.norm_2.weight": "model-00001-of-00031.safetensors",
45
- "transformer.blocks.10.ffn.experts.mlp.v1.input_scale": "model-00009-of-00031.safetensors",
46
- "transformer.blocks.10.ffn.experts.mlp.v1.weight": "model-00009-of-00031.safetensors",
47
- "transformer.blocks.10.ffn.experts.mlp.v1.weight_scale": "model-00009-of-00031.safetensors",
48
- "transformer.blocks.10.ffn.experts.mlp.w1.input_scale": "model-00008-of-00031.safetensors",
49
- "transformer.blocks.10.ffn.experts.mlp.w1.weight": "model-00008-of-00031.safetensors",
50
- "transformer.blocks.10.ffn.experts.mlp.w1.weight_scale": "model-00008-of-00031.safetensors",
51
- "transformer.blocks.10.ffn.experts.mlp.w2.input_scale": "model-00009-of-00031.safetensors",
52
- "transformer.blocks.10.ffn.experts.mlp.w2.weight": "model-00009-of-00031.safetensors",
53
- "transformer.blocks.10.ffn.experts.mlp.w2.weight_scale": "model-00009-of-00031.safetensors",
54
  "transformer.blocks.10.ffn.router.layer.weight": "model-00008-of-00031.safetensors",
55
  "transformer.blocks.10.norm_attn_norm.attn.Wqkv.input_scale": "model-00008-of-00031.safetensors",
56
  "transformer.blocks.10.norm_attn_norm.attn.Wqkv.weight": "model-00008-of-00031.safetensors",
@@ -61,15 +61,15 @@
61
  "transformer.blocks.10.norm_attn_norm.attn.out_proj.weight_scale": "model-00008-of-00031.safetensors",
62
  "transformer.blocks.10.norm_attn_norm.norm_1.weight": "model-00008-of-00031.safetensors",
63
  "transformer.blocks.10.norm_attn_norm.norm_2.weight": "model-00008-of-00031.safetensors",
64
- "transformer.blocks.11.ffn.experts.mlp.v1.input_scale": "model-00009-of-00031.safetensors",
65
- "transformer.blocks.11.ffn.experts.mlp.v1.weight": "model-00009-of-00031.safetensors",
66
- "transformer.blocks.11.ffn.experts.mlp.v1.weight_scale": "model-00009-of-00031.safetensors",
67
- "transformer.blocks.11.ffn.experts.mlp.w1.input_scale": "model-00009-of-00031.safetensors",
68
- "transformer.blocks.11.ffn.experts.mlp.w1.weight": "model-00009-of-00031.safetensors",
69
- "transformer.blocks.11.ffn.experts.mlp.w1.weight_scale": "model-00009-of-00031.safetensors",
70
- "transformer.blocks.11.ffn.experts.mlp.w2.input_scale": "model-00010-of-00031.safetensors",
71
- "transformer.blocks.11.ffn.experts.mlp.w2.weight": "model-00010-of-00031.safetensors",
72
- "transformer.blocks.11.ffn.experts.mlp.w2.weight_scale": "model-00010-of-00031.safetensors",
73
  "transformer.blocks.11.ffn.router.layer.weight": "model-00009-of-00031.safetensors",
74
  "transformer.blocks.11.norm_attn_norm.attn.Wqkv.input_scale": "model-00009-of-00031.safetensors",
75
  "transformer.blocks.11.norm_attn_norm.attn.Wqkv.weight": "model-00009-of-00031.safetensors",
@@ -80,15 +80,15 @@
80
  "transformer.blocks.11.norm_attn_norm.attn.out_proj.weight_scale": "model-00009-of-00031.safetensors",
81
  "transformer.blocks.11.norm_attn_norm.norm_1.weight": "model-00009-of-00031.safetensors",
82
  "transformer.blocks.11.norm_attn_norm.norm_2.weight": "model-00009-of-00031.safetensors",
83
- "transformer.blocks.12.ffn.experts.mlp.v1.input_scale": "model-00010-of-00031.safetensors",
84
- "transformer.blocks.12.ffn.experts.mlp.v1.weight": "model-00010-of-00031.safetensors",
85
- "transformer.blocks.12.ffn.experts.mlp.v1.weight_scale": "model-00010-of-00031.safetensors",
86
- "transformer.blocks.12.ffn.experts.mlp.w1.input_scale": "model-00010-of-00031.safetensors",
87
- "transformer.blocks.12.ffn.experts.mlp.w1.weight": "model-00010-of-00031.safetensors",
88
- "transformer.blocks.12.ffn.experts.mlp.w1.weight_scale": "model-00010-of-00031.safetensors",
89
- "transformer.blocks.12.ffn.experts.mlp.w2.input_scale": "model-00010-of-00031.safetensors",
90
- "transformer.blocks.12.ffn.experts.mlp.w2.weight": "model-00010-of-00031.safetensors",
91
- "transformer.blocks.12.ffn.experts.mlp.w2.weight_scale": "model-00010-of-00031.safetensors",
92
  "transformer.blocks.12.ffn.router.layer.weight": "model-00010-of-00031.safetensors",
93
  "transformer.blocks.12.norm_attn_norm.attn.Wqkv.input_scale": "model-00010-of-00031.safetensors",
94
  "transformer.blocks.12.norm_attn_norm.attn.Wqkv.weight": "model-00010-of-00031.safetensors",
@@ -99,15 +99,15 @@
99
  "transformer.blocks.12.norm_attn_norm.attn.out_proj.weight_scale": "model-00010-of-00031.safetensors",
100
  "transformer.blocks.12.norm_attn_norm.norm_1.weight": "model-00010-of-00031.safetensors",
101
  "transformer.blocks.12.norm_attn_norm.norm_2.weight": "model-00010-of-00031.safetensors",
102
- "transformer.blocks.13.ffn.experts.mlp.v1.input_scale": "model-00011-of-00031.safetensors",
103
- "transformer.blocks.13.ffn.experts.mlp.v1.weight": "model-00011-of-00031.safetensors",
104
- "transformer.blocks.13.ffn.experts.mlp.v1.weight_scale": "model-00011-of-00031.safetensors",
105
- "transformer.blocks.13.ffn.experts.mlp.w1.input_scale": "model-00011-of-00031.safetensors",
106
- "transformer.blocks.13.ffn.experts.mlp.w1.weight": "model-00011-of-00031.safetensors",
107
- "transformer.blocks.13.ffn.experts.mlp.w1.weight_scale": "model-00011-of-00031.safetensors",
108
- "transformer.blocks.13.ffn.experts.mlp.w2.input_scale": "model-00011-of-00031.safetensors",
109
- "transformer.blocks.13.ffn.experts.mlp.w2.weight": "model-00011-of-00031.safetensors",
110
- "transformer.blocks.13.ffn.experts.mlp.w2.weight_scale": "model-00011-of-00031.safetensors",
111
  "transformer.blocks.13.ffn.router.layer.weight": "model-00010-of-00031.safetensors",
112
  "transformer.blocks.13.norm_attn_norm.attn.Wqkv.input_scale": "model-00010-of-00031.safetensors",
113
  "transformer.blocks.13.norm_attn_norm.attn.Wqkv.weight": "model-00010-of-00031.safetensors",
@@ -118,15 +118,15 @@
118
  "transformer.blocks.13.norm_attn_norm.attn.out_proj.weight_scale": "model-00010-of-00031.safetensors",
119
  "transformer.blocks.13.norm_attn_norm.norm_1.weight": "model-00010-of-00031.safetensors",
120
  "transformer.blocks.13.norm_attn_norm.norm_2.weight": "model-00010-of-00031.safetensors",
121
- "transformer.blocks.14.ffn.experts.mlp.v1.input_scale": "model-00012-of-00031.safetensors",
122
- "transformer.blocks.14.ffn.experts.mlp.v1.weight": "model-00012-of-00031.safetensors",
123
- "transformer.blocks.14.ffn.experts.mlp.v1.weight_scale": "model-00012-of-00031.safetensors",
124
- "transformer.blocks.14.ffn.experts.mlp.w1.input_scale": "model-00011-of-00031.safetensors",
125
- "transformer.blocks.14.ffn.experts.mlp.w1.weight": "model-00011-of-00031.safetensors",
126
- "transformer.blocks.14.ffn.experts.mlp.w1.weight_scale": "model-00011-of-00031.safetensors",
127
- "transformer.blocks.14.ffn.experts.mlp.w2.input_scale": "model-00012-of-00031.safetensors",
128
- "transformer.blocks.14.ffn.experts.mlp.w2.weight": "model-00012-of-00031.safetensors",
129
- "transformer.blocks.14.ffn.experts.mlp.w2.weight_scale": "model-00012-of-00031.safetensors",
130
  "transformer.blocks.14.ffn.router.layer.weight": "model-00011-of-00031.safetensors",
131
  "transformer.blocks.14.norm_attn_norm.attn.Wqkv.input_scale": "model-00011-of-00031.safetensors",
132
  "transformer.blocks.14.norm_attn_norm.attn.Wqkv.weight": "model-00011-of-00031.safetensors",
@@ -137,15 +137,15 @@
137
  "transformer.blocks.14.norm_attn_norm.attn.out_proj.weight_scale": "model-00011-of-00031.safetensors",
138
  "transformer.blocks.14.norm_attn_norm.norm_1.weight": "model-00011-of-00031.safetensors",
139
  "transformer.blocks.14.norm_attn_norm.norm_2.weight": "model-00011-of-00031.safetensors",
140
- "transformer.blocks.15.ffn.experts.mlp.v1.input_scale": "model-00012-of-00031.safetensors",
141
- "transformer.blocks.15.ffn.experts.mlp.v1.weight": "model-00012-of-00031.safetensors",
142
- "transformer.blocks.15.ffn.experts.mlp.v1.weight_scale": "model-00012-of-00031.safetensors",
143
- "transformer.blocks.15.ffn.experts.mlp.w1.input_scale": "model-00012-of-00031.safetensors",
144
- "transformer.blocks.15.ffn.experts.mlp.w1.weight": "model-00012-of-00031.safetensors",
145
- "transformer.blocks.15.ffn.experts.mlp.w1.weight_scale": "model-00012-of-00031.safetensors",
146
- "transformer.blocks.15.ffn.experts.mlp.w2.input_scale": "model-00013-of-00031.safetensors",
147
- "transformer.blocks.15.ffn.experts.mlp.w2.weight": "model-00013-of-00031.safetensors",
148
- "transformer.blocks.15.ffn.experts.mlp.w2.weight_scale": "model-00013-of-00031.safetensors",
149
  "transformer.blocks.15.ffn.router.layer.weight": "model-00012-of-00031.safetensors",
150
  "transformer.blocks.15.norm_attn_norm.attn.Wqkv.input_scale": "model-00012-of-00031.safetensors",
151
  "transformer.blocks.15.norm_attn_norm.attn.Wqkv.weight": "model-00012-of-00031.safetensors",
@@ -156,15 +156,15 @@
156
  "transformer.blocks.15.norm_attn_norm.attn.out_proj.weight_scale": "model-00012-of-00031.safetensors",
157
  "transformer.blocks.15.norm_attn_norm.norm_1.weight": "model-00012-of-00031.safetensors",
158
  "transformer.blocks.15.norm_attn_norm.norm_2.weight": "model-00012-of-00031.safetensors",
159
- "transformer.blocks.16.ffn.experts.mlp.v1.input_scale": "model-00013-of-00031.safetensors",
160
- "transformer.blocks.16.ffn.experts.mlp.v1.weight": "model-00013-of-00031.safetensors",
161
- "transformer.blocks.16.ffn.experts.mlp.v1.weight_scale": "model-00013-of-00031.safetensors",
162
- "transformer.blocks.16.ffn.experts.mlp.w1.input_scale": "model-00013-of-00031.safetensors",
163
- "transformer.blocks.16.ffn.experts.mlp.w1.weight": "model-00013-of-00031.safetensors",
164
- "transformer.blocks.16.ffn.experts.mlp.w1.weight_scale": "model-00013-of-00031.safetensors",
165
- "transformer.blocks.16.ffn.experts.mlp.w2.input_scale": "model-00013-of-00031.safetensors",
166
- "transformer.blocks.16.ffn.experts.mlp.w2.weight": "model-00013-of-00031.safetensors",
167
- "transformer.blocks.16.ffn.experts.mlp.w2.weight_scale": "model-00013-of-00031.safetensors",
168
  "transformer.blocks.16.ffn.router.layer.weight": "model-00013-of-00031.safetensors",
169
  "transformer.blocks.16.norm_attn_norm.attn.Wqkv.input_scale": "model-00013-of-00031.safetensors",
170
  "transformer.blocks.16.norm_attn_norm.attn.Wqkv.weight": "model-00013-of-00031.safetensors",
@@ -175,15 +175,15 @@
175
  "transformer.blocks.16.norm_attn_norm.attn.out_proj.weight_scale": "model-00013-of-00031.safetensors",
176
  "transformer.blocks.16.norm_attn_norm.norm_1.weight": "model-00013-of-00031.safetensors",
177
  "transformer.blocks.16.norm_attn_norm.norm_2.weight": "model-00013-of-00031.safetensors",
178
- "transformer.blocks.17.ffn.experts.mlp.v1.input_scale": "model-00014-of-00031.safetensors",
179
- "transformer.blocks.17.ffn.experts.mlp.v1.weight": "model-00014-of-00031.safetensors",
180
- "transformer.blocks.17.ffn.experts.mlp.v1.weight_scale": "model-00014-of-00031.safetensors",
181
- "transformer.blocks.17.ffn.experts.mlp.w1.input_scale": "model-00014-of-00031.safetensors",
182
- "transformer.blocks.17.ffn.experts.mlp.w1.weight": "model-00014-of-00031.safetensors",
183
- "transformer.blocks.17.ffn.experts.mlp.w1.weight_scale": "model-00014-of-00031.safetensors",
184
- "transformer.blocks.17.ffn.experts.mlp.w2.input_scale": "model-00014-of-00031.safetensors",
185
- "transformer.blocks.17.ffn.experts.mlp.w2.weight": "model-00014-of-00031.safetensors",
186
- "transformer.blocks.17.ffn.experts.mlp.w2.weight_scale": "model-00014-of-00031.safetensors",
187
  "transformer.blocks.17.ffn.router.layer.weight": "model-00013-of-00031.safetensors",
188
  "transformer.blocks.17.norm_attn_norm.attn.Wqkv.input_scale": "model-00013-of-00031.safetensors",
189
  "transformer.blocks.17.norm_attn_norm.attn.Wqkv.weight": "model-00013-of-00031.safetensors",
@@ -194,15 +194,15 @@
194
  "transformer.blocks.17.norm_attn_norm.attn.out_proj.weight_scale": "model-00013-of-00031.safetensors",
195
  "transformer.blocks.17.norm_attn_norm.norm_1.weight": "model-00013-of-00031.safetensors",
196
  "transformer.blocks.17.norm_attn_norm.norm_2.weight": "model-00013-of-00031.safetensors",
197
- "transformer.blocks.18.ffn.experts.mlp.v1.input_scale": "model-00015-of-00031.safetensors",
198
- "transformer.blocks.18.ffn.experts.mlp.v1.weight": "model-00015-of-00031.safetensors",
199
- "transformer.blocks.18.ffn.experts.mlp.v1.weight_scale": "model-00015-of-00031.safetensors",
200
- "transformer.blocks.18.ffn.experts.mlp.w1.input_scale": "model-00014-of-00031.safetensors",
201
- "transformer.blocks.18.ffn.experts.mlp.w1.weight": "model-00014-of-00031.safetensors",
202
- "transformer.blocks.18.ffn.experts.mlp.w1.weight_scale": "model-00014-of-00031.safetensors",
203
- "transformer.blocks.18.ffn.experts.mlp.w2.input_scale": "model-00015-of-00031.safetensors",
204
- "transformer.blocks.18.ffn.experts.mlp.w2.weight": "model-00015-of-00031.safetensors",
205
- "transformer.blocks.18.ffn.experts.mlp.w2.weight_scale": "model-00015-of-00031.safetensors",
206
  "transformer.blocks.18.ffn.router.layer.weight": "model-00014-of-00031.safetensors",
207
  "transformer.blocks.18.norm_attn_norm.attn.Wqkv.input_scale": "model-00014-of-00031.safetensors",
208
  "transformer.blocks.18.norm_attn_norm.attn.Wqkv.weight": "model-00014-of-00031.safetensors",
@@ -213,15 +213,15 @@
213
  "transformer.blocks.18.norm_attn_norm.attn.out_proj.weight_scale": "model-00014-of-00031.safetensors",
214
  "transformer.blocks.18.norm_attn_norm.norm_1.weight": "model-00014-of-00031.safetensors",
215
  "transformer.blocks.18.norm_attn_norm.norm_2.weight": "model-00014-of-00031.safetensors",
216
- "transformer.blocks.19.ffn.experts.mlp.v1.input_scale": "model-00015-of-00031.safetensors",
217
- "transformer.blocks.19.ffn.experts.mlp.v1.weight": "model-00015-of-00031.safetensors",
218
- "transformer.blocks.19.ffn.experts.mlp.v1.weight_scale": "model-00015-of-00031.safetensors",
219
- "transformer.blocks.19.ffn.experts.mlp.w1.input_scale": "model-00015-of-00031.safetensors",
220
- "transformer.blocks.19.ffn.experts.mlp.w1.weight": "model-00015-of-00031.safetensors",
221
- "transformer.blocks.19.ffn.experts.mlp.w1.weight_scale": "model-00015-of-00031.safetensors",
222
- "transformer.blocks.19.ffn.experts.mlp.w2.input_scale": "model-00016-of-00031.safetensors",
223
- "transformer.blocks.19.ffn.experts.mlp.w2.weight": "model-00016-of-00031.safetensors",
224
- "transformer.blocks.19.ffn.experts.mlp.w2.weight_scale": "model-00016-of-00031.safetensors",
225
  "transformer.blocks.19.ffn.router.layer.weight": "model-00015-of-00031.safetensors",
226
  "transformer.blocks.19.norm_attn_norm.attn.Wqkv.input_scale": "model-00015-of-00031.safetensors",
227
  "transformer.blocks.19.norm_attn_norm.attn.Wqkv.weight": "model-00015-of-00031.safetensors",
@@ -232,15 +232,15 @@
232
  "transformer.blocks.19.norm_attn_norm.attn.out_proj.weight_scale": "model-00015-of-00031.safetensors",
233
  "transformer.blocks.19.norm_attn_norm.norm_1.weight": "model-00015-of-00031.safetensors",
234
  "transformer.blocks.19.norm_attn_norm.norm_2.weight": "model-00015-of-00031.safetensors",
235
- "transformer.blocks.2.ffn.experts.mlp.v1.input_scale": "model-00003-of-00031.safetensors",
236
- "transformer.blocks.2.ffn.experts.mlp.v1.weight": "model-00003-of-00031.safetensors",
237
- "transformer.blocks.2.ffn.experts.mlp.v1.weight_scale": "model-00003-of-00031.safetensors",
238
- "transformer.blocks.2.ffn.experts.mlp.w1.input_scale": "model-00002-of-00031.safetensors",
239
- "transformer.blocks.2.ffn.experts.mlp.w1.weight": "model-00002-of-00031.safetensors",
240
- "transformer.blocks.2.ffn.experts.mlp.w1.weight_scale": "model-00002-of-00031.safetensors",
241
- "transformer.blocks.2.ffn.experts.mlp.w2.input_scale": "model-00003-of-00031.safetensors",
242
- "transformer.blocks.2.ffn.experts.mlp.w2.weight": "model-00003-of-00031.safetensors",
243
- "transformer.blocks.2.ffn.experts.mlp.w2.weight_scale": "model-00003-of-00031.safetensors",
244
  "transformer.blocks.2.ffn.router.layer.weight": "model-00002-of-00031.safetensors",
245
  "transformer.blocks.2.norm_attn_norm.attn.Wqkv.input_scale": "model-00002-of-00031.safetensors",
246
  "transformer.blocks.2.norm_attn_norm.attn.Wqkv.weight": "model-00002-of-00031.safetensors",
@@ -251,15 +251,15 @@
251
  "transformer.blocks.2.norm_attn_norm.attn.out_proj.weight_scale": "model-00002-of-00031.safetensors",
252
  "transformer.blocks.2.norm_attn_norm.norm_1.weight": "model-00002-of-00031.safetensors",
253
  "transformer.blocks.2.norm_attn_norm.norm_2.weight": "model-00002-of-00031.safetensors",
254
- "transformer.blocks.20.ffn.experts.mlp.v1.input_scale": "model-00016-of-00031.safetensors",
255
- "transformer.blocks.20.ffn.experts.mlp.v1.weight": "model-00016-of-00031.safetensors",
256
- "transformer.blocks.20.ffn.experts.mlp.v1.weight_scale": "model-00016-of-00031.safetensors",
257
- "transformer.blocks.20.ffn.experts.mlp.w1.input_scale": "model-00016-of-00031.safetensors",
258
- "transformer.blocks.20.ffn.experts.mlp.w1.weight": "model-00016-of-00031.safetensors",
259
- "transformer.blocks.20.ffn.experts.mlp.w1.weight_scale": "model-00016-of-00031.safetensors",
260
- "transformer.blocks.20.ffn.experts.mlp.w2.input_scale": "model-00016-of-00031.safetensors",
261
- "transformer.blocks.20.ffn.experts.mlp.w2.weight": "model-00016-of-00031.safetensors",
262
- "transformer.blocks.20.ffn.experts.mlp.w2.weight_scale": "model-00016-of-00031.safetensors",
263
  "transformer.blocks.20.ffn.router.layer.weight": "model-00016-of-00031.safetensors",
264
  "transformer.blocks.20.norm_attn_norm.attn.Wqkv.input_scale": "model-00016-of-00031.safetensors",
265
  "transformer.blocks.20.norm_attn_norm.attn.Wqkv.weight": "model-00016-of-00031.safetensors",
@@ -270,15 +270,15 @@
270
  "transformer.blocks.20.norm_attn_norm.attn.out_proj.weight_scale": "model-00016-of-00031.safetensors",
271
  "transformer.blocks.20.norm_attn_norm.norm_1.weight": "model-00016-of-00031.safetensors",
272
  "transformer.blocks.20.norm_attn_norm.norm_2.weight": "model-00016-of-00031.safetensors",
273
- "transformer.blocks.21.ffn.experts.mlp.v1.input_scale": "model-00017-of-00031.safetensors",
274
- "transformer.blocks.21.ffn.experts.mlp.v1.weight": "model-00017-of-00031.safetensors",
275
- "transformer.blocks.21.ffn.experts.mlp.v1.weight_scale": "model-00017-of-00031.safetensors",
276
- "transformer.blocks.21.ffn.experts.mlp.w1.input_scale": "model-00017-of-00031.safetensors",
277
- "transformer.blocks.21.ffn.experts.mlp.w1.weight": "model-00017-of-00031.safetensors",
278
- "transformer.blocks.21.ffn.experts.mlp.w1.weight_scale": "model-00017-of-00031.safetensors",
279
- "transformer.blocks.21.ffn.experts.mlp.w2.input_scale": "model-00017-of-00031.safetensors",
280
- "transformer.blocks.21.ffn.experts.mlp.w2.weight": "model-00017-of-00031.safetensors",
281
- "transformer.blocks.21.ffn.experts.mlp.w2.weight_scale": "model-00017-of-00031.safetensors",
282
  "transformer.blocks.21.ffn.router.layer.weight": "model-00016-of-00031.safetensors",
283
  "transformer.blocks.21.norm_attn_norm.attn.Wqkv.input_scale": "model-00016-of-00031.safetensors",
284
  "transformer.blocks.21.norm_attn_norm.attn.Wqkv.weight": "model-00016-of-00031.safetensors",
@@ -289,15 +289,15 @@
289
  "transformer.blocks.21.norm_attn_norm.attn.out_proj.weight_scale": "model-00016-of-00031.safetensors",
290
  "transformer.blocks.21.norm_attn_norm.norm_1.weight": "model-00016-of-00031.safetensors",
291
  "transformer.blocks.21.norm_attn_norm.norm_2.weight": "model-00016-of-00031.safetensors",
292
- "transformer.blocks.22.ffn.experts.mlp.v1.input_scale": "model-00018-of-00031.safetensors",
293
- "transformer.blocks.22.ffn.experts.mlp.v1.weight": "model-00018-of-00031.safetensors",
294
- "transformer.blocks.22.ffn.experts.mlp.v1.weight_scale": "model-00018-of-00031.safetensors",
295
- "transformer.blocks.22.ffn.experts.mlp.w1.input_scale": "model-00017-of-00031.safetensors",
296
- "transformer.blocks.22.ffn.experts.mlp.w1.weight": "model-00017-of-00031.safetensors",
297
- "transformer.blocks.22.ffn.experts.mlp.w1.weight_scale": "model-00017-of-00031.safetensors",
298
- "transformer.blocks.22.ffn.experts.mlp.w2.input_scale": "model-00018-of-00031.safetensors",
299
- "transformer.blocks.22.ffn.experts.mlp.w2.weight": "model-00018-of-00031.safetensors",
300
- "transformer.blocks.22.ffn.experts.mlp.w2.weight_scale": "model-00018-of-00031.safetensors",
301
  "transformer.blocks.22.ffn.router.layer.weight": "model-00017-of-00031.safetensors",
302
  "transformer.blocks.22.norm_attn_norm.attn.Wqkv.input_scale": "model-00017-of-00031.safetensors",
303
  "transformer.blocks.22.norm_attn_norm.attn.Wqkv.weight": "model-00017-of-00031.safetensors",
@@ -308,15 +308,15 @@
308
  "transformer.blocks.22.norm_attn_norm.attn.out_proj.weight_scale": "model-00017-of-00031.safetensors",
309
  "transformer.blocks.22.norm_attn_norm.norm_1.weight": "model-00017-of-00031.safetensors",
310
  "transformer.blocks.22.norm_attn_norm.norm_2.weight": "model-00017-of-00031.safetensors",
311
- "transformer.blocks.23.ffn.experts.mlp.v1.input_scale": "model-00018-of-00031.safetensors",
312
- "transformer.blocks.23.ffn.experts.mlp.v1.weight": "model-00018-of-00031.safetensors",
313
- "transformer.blocks.23.ffn.experts.mlp.v1.weight_scale": "model-00018-of-00031.safetensors",
314
- "transformer.blocks.23.ffn.experts.mlp.w1.input_scale": "model-00018-of-00031.safetensors",
315
- "transformer.blocks.23.ffn.experts.mlp.w1.weight": "model-00018-of-00031.safetensors",
316
- "transformer.blocks.23.ffn.experts.mlp.w1.weight_scale": "model-00018-of-00031.safetensors",
317
- "transformer.blocks.23.ffn.experts.mlp.w2.input_scale": "model-00019-of-00031.safetensors",
318
- "transformer.blocks.23.ffn.experts.mlp.w2.weight": "model-00019-of-00031.safetensors",
319
- "transformer.blocks.23.ffn.experts.mlp.w2.weight_scale": "model-00019-of-00031.safetensors",
320
  "transformer.blocks.23.ffn.router.layer.weight": "model-00018-of-00031.safetensors",
321
  "transformer.blocks.23.norm_attn_norm.attn.Wqkv.input_scale": "model-00018-of-00031.safetensors",
322
  "transformer.blocks.23.norm_attn_norm.attn.Wqkv.weight": "model-00018-of-00031.safetensors",
@@ -327,15 +327,15 @@
327
  "transformer.blocks.23.norm_attn_norm.attn.out_proj.weight_scale": "model-00018-of-00031.safetensors",
328
  "transformer.blocks.23.norm_attn_norm.norm_1.weight": "model-00018-of-00031.safetensors",
329
  "transformer.blocks.23.norm_attn_norm.norm_2.weight": "model-00018-of-00031.safetensors",
330
- "transformer.blocks.24.ffn.experts.mlp.v1.input_scale": "model-00019-of-00031.safetensors",
331
- "transformer.blocks.24.ffn.experts.mlp.v1.weight": "model-00019-of-00031.safetensors",
332
- "transformer.blocks.24.ffn.experts.mlp.v1.weight_scale": "model-00019-of-00031.safetensors",
333
- "transformer.blocks.24.ffn.experts.mlp.w1.input_scale": "model-00019-of-00031.safetensors",
334
- "transformer.blocks.24.ffn.experts.mlp.w1.weight": "model-00019-of-00031.safetensors",
335
- "transformer.blocks.24.ffn.experts.mlp.w1.weight_scale": "model-00019-of-00031.safetensors",
336
- "transformer.blocks.24.ffn.experts.mlp.w2.input_scale": "model-00019-of-00031.safetensors",
337
- "transformer.blocks.24.ffn.experts.mlp.w2.weight": "model-00019-of-00031.safetensors",
338
- "transformer.blocks.24.ffn.experts.mlp.w2.weight_scale": "model-00019-of-00031.safetensors",
339
  "transformer.blocks.24.ffn.router.layer.weight": "model-00019-of-00031.safetensors",
340
  "transformer.blocks.24.norm_attn_norm.attn.Wqkv.input_scale": "model-00019-of-00031.safetensors",
341
  "transformer.blocks.24.norm_attn_norm.attn.Wqkv.weight": "model-00019-of-00031.safetensors",
@@ -346,15 +346,15 @@
346
  "transformer.blocks.24.norm_attn_norm.attn.out_proj.weight_scale": "model-00019-of-00031.safetensors",
347
  "transformer.blocks.24.norm_attn_norm.norm_1.weight": "model-00019-of-00031.safetensors",
348
  "transformer.blocks.24.norm_attn_norm.norm_2.weight": "model-00019-of-00031.safetensors",
349
- "transformer.blocks.25.ffn.experts.mlp.v1.input_scale": "model-00020-of-00031.safetensors",
350
- "transformer.blocks.25.ffn.experts.mlp.v1.weight": "model-00020-of-00031.safetensors",
351
- "transformer.blocks.25.ffn.experts.mlp.v1.weight_scale": "model-00020-of-00031.safetensors",
352
- "transformer.blocks.25.ffn.experts.mlp.w1.input_scale": "model-00020-of-00031.safetensors",
353
- "transformer.blocks.25.ffn.experts.mlp.w1.weight": "model-00020-of-00031.safetensors",
354
- "transformer.blocks.25.ffn.experts.mlp.w1.weight_scale": "model-00020-of-00031.safetensors",
355
- "transformer.blocks.25.ffn.experts.mlp.w2.input_scale": "model-00020-of-00031.safetensors",
356
- "transformer.blocks.25.ffn.experts.mlp.w2.weight": "model-00020-of-00031.safetensors",
357
- "transformer.blocks.25.ffn.experts.mlp.w2.weight_scale": "model-00020-of-00031.safetensors",
358
  "transformer.blocks.25.ffn.router.layer.weight": "model-00019-of-00031.safetensors",
359
  "transformer.blocks.25.norm_attn_norm.attn.Wqkv.input_scale": "model-00019-of-00031.safetensors",
360
  "transformer.blocks.25.norm_attn_norm.attn.Wqkv.weight": "model-00019-of-00031.safetensors",
@@ -365,15 +365,15 @@
365
  "transformer.blocks.25.norm_attn_norm.attn.out_proj.weight_scale": "model-00019-of-00031.safetensors",
366
  "transformer.blocks.25.norm_attn_norm.norm_1.weight": "model-00019-of-00031.safetensors",
367
  "transformer.blocks.25.norm_attn_norm.norm_2.weight": "model-00019-of-00031.safetensors",
368
- "transformer.blocks.26.ffn.experts.mlp.v1.input_scale": "model-00021-of-00031.safetensors",
369
- "transformer.blocks.26.ffn.experts.mlp.v1.weight": "model-00021-of-00031.safetensors",
370
- "transformer.blocks.26.ffn.experts.mlp.v1.weight_scale": "model-00021-of-00031.safetensors",
371
- "transformer.blocks.26.ffn.experts.mlp.w1.input_scale": "model-00020-of-00031.safetensors",
372
- "transformer.blocks.26.ffn.experts.mlp.w1.weight": "model-00020-of-00031.safetensors",
373
- "transformer.blocks.26.ffn.experts.mlp.w1.weight_scale": "model-00020-of-00031.safetensors",
374
- "transformer.blocks.26.ffn.experts.mlp.w2.input_scale": "model-00021-of-00031.safetensors",
375
- "transformer.blocks.26.ffn.experts.mlp.w2.weight": "model-00021-of-00031.safetensors",
376
- "transformer.blocks.26.ffn.experts.mlp.w2.weight_scale": "model-00021-of-00031.safetensors",
377
  "transformer.blocks.26.ffn.router.layer.weight": "model-00020-of-00031.safetensors",
378
  "transformer.blocks.26.norm_attn_norm.attn.Wqkv.input_scale": "model-00020-of-00031.safetensors",
379
  "transformer.blocks.26.norm_attn_norm.attn.Wqkv.weight": "model-00020-of-00031.safetensors",
@@ -384,15 +384,15 @@
384
  "transformer.blocks.26.norm_attn_norm.attn.out_proj.weight_scale": "model-00020-of-00031.safetensors",
385
  "transformer.blocks.26.norm_attn_norm.norm_1.weight": "model-00020-of-00031.safetensors",
386
  "transformer.blocks.26.norm_attn_norm.norm_2.weight": "model-00020-of-00031.safetensors",
387
- "transformer.blocks.27.ffn.experts.mlp.v1.input_scale": "model-00021-of-00031.safetensors",
388
- "transformer.blocks.27.ffn.experts.mlp.v1.weight": "model-00021-of-00031.safetensors",
389
- "transformer.blocks.27.ffn.experts.mlp.v1.weight_scale": "model-00021-of-00031.safetensors",
390
- "transformer.blocks.27.ffn.experts.mlp.w1.input_scale": "model-00021-of-00031.safetensors",
391
- "transformer.blocks.27.ffn.experts.mlp.w1.weight": "model-00021-of-00031.safetensors",
392
- "transformer.blocks.27.ffn.experts.mlp.w1.weight_scale": "model-00021-of-00031.safetensors",
393
- "transformer.blocks.27.ffn.experts.mlp.w2.input_scale": "model-00022-of-00031.safetensors",
394
- "transformer.blocks.27.ffn.experts.mlp.w2.weight": "model-00022-of-00031.safetensors",
395
- "transformer.blocks.27.ffn.experts.mlp.w2.weight_scale": "model-00022-of-00031.safetensors",
396
  "transformer.blocks.27.ffn.router.layer.weight": "model-00021-of-00031.safetensors",
397
  "transformer.blocks.27.norm_attn_norm.attn.Wqkv.input_scale": "model-00021-of-00031.safetensors",
398
  "transformer.blocks.27.norm_attn_norm.attn.Wqkv.weight": "model-00021-of-00031.safetensors",
@@ -403,15 +403,15 @@
403
  "transformer.blocks.27.norm_attn_norm.attn.out_proj.weight_scale": "model-00021-of-00031.safetensors",
404
  "transformer.blocks.27.norm_attn_norm.norm_1.weight": "model-00021-of-00031.safetensors",
405
  "transformer.blocks.27.norm_attn_norm.norm_2.weight": "model-00021-of-00031.safetensors",
406
- "transformer.blocks.28.ffn.experts.mlp.v1.input_scale": "model-00022-of-00031.safetensors",
407
- "transformer.blocks.28.ffn.experts.mlp.v1.weight": "model-00022-of-00031.safetensors",
408
- "transformer.blocks.28.ffn.experts.mlp.v1.weight_scale": "model-00022-of-00031.safetensors",
409
- "transformer.blocks.28.ffn.experts.mlp.w1.input_scale": "model-00022-of-00031.safetensors",
410
- "transformer.blocks.28.ffn.experts.mlp.w1.weight": "model-00022-of-00031.safetensors",
411
- "transformer.blocks.28.ffn.experts.mlp.w1.weight_scale": "model-00022-of-00031.safetensors",
412
- "transformer.blocks.28.ffn.experts.mlp.w2.input_scale": "model-00022-of-00031.safetensors",
413
- "transformer.blocks.28.ffn.experts.mlp.w2.weight": "model-00022-of-00031.safetensors",
414
- "transformer.blocks.28.ffn.experts.mlp.w2.weight_scale": "model-00022-of-00031.safetensors",
415
  "transformer.blocks.28.ffn.router.layer.weight": "model-00022-of-00031.safetensors",
416
  "transformer.blocks.28.norm_attn_norm.attn.Wqkv.input_scale": "model-00022-of-00031.safetensors",
417
  "transformer.blocks.28.norm_attn_norm.attn.Wqkv.weight": "model-00022-of-00031.safetensors",
@@ -422,15 +422,15 @@
422
  "transformer.blocks.28.norm_attn_norm.attn.out_proj.weight_scale": "model-00022-of-00031.safetensors",
423
  "transformer.blocks.28.norm_attn_norm.norm_1.weight": "model-00022-of-00031.safetensors",
424
  "transformer.blocks.28.norm_attn_norm.norm_2.weight": "model-00022-of-00031.safetensors",
425
- "transformer.blocks.29.ffn.experts.mlp.v1.input_scale": "model-00023-of-00031.safetensors",
426
- "transformer.blocks.29.ffn.experts.mlp.v1.weight": "model-00023-of-00031.safetensors",
427
- "transformer.blocks.29.ffn.experts.mlp.v1.weight_scale": "model-00023-of-00031.safetensors",
428
- "transformer.blocks.29.ffn.experts.mlp.w1.input_scale": "model-00023-of-00031.safetensors",
429
- "transformer.blocks.29.ffn.experts.mlp.w1.weight": "model-00023-of-00031.safetensors",
430
- "transformer.blocks.29.ffn.experts.mlp.w1.weight_scale": "model-00023-of-00031.safetensors",
431
- "transformer.blocks.29.ffn.experts.mlp.w2.input_scale": "model-00023-of-00031.safetensors",
432
- "transformer.blocks.29.ffn.experts.mlp.w2.weight": "model-00023-of-00031.safetensors",
433
- "transformer.blocks.29.ffn.experts.mlp.w2.weight_scale": "model-00023-of-00031.safetensors",
434
  "transformer.blocks.29.ffn.router.layer.weight": "model-00022-of-00031.safetensors",
435
  "transformer.blocks.29.norm_attn_norm.attn.Wqkv.input_scale": "model-00022-of-00031.safetensors",
436
  "transformer.blocks.29.norm_attn_norm.attn.Wqkv.weight": "model-00022-of-00031.safetensors",
@@ -441,15 +441,15 @@
441
  "transformer.blocks.29.norm_attn_norm.attn.out_proj.weight_scale": "model-00022-of-00031.safetensors",
442
  "transformer.blocks.29.norm_attn_norm.norm_1.weight": "model-00022-of-00031.safetensors",
443
  "transformer.blocks.29.norm_attn_norm.norm_2.weight": "model-00022-of-00031.safetensors",
444
- "transformer.blocks.3.ffn.experts.mlp.v1.input_scale": "model-00003-of-00031.safetensors",
445
- "transformer.blocks.3.ffn.experts.mlp.v1.weight": "model-00003-of-00031.safetensors",
446
- "transformer.blocks.3.ffn.experts.mlp.v1.weight_scale": "model-00003-of-00031.safetensors",
447
- "transformer.blocks.3.ffn.experts.mlp.w1.input_scale": "model-00003-of-00031.safetensors",
448
- "transformer.blocks.3.ffn.experts.mlp.w1.weight": "model-00003-of-00031.safetensors",
449
- "transformer.blocks.3.ffn.experts.mlp.w1.weight_scale": "model-00003-of-00031.safetensors",
450
- "transformer.blocks.3.ffn.experts.mlp.w2.input_scale": "model-00004-of-00031.safetensors",
451
- "transformer.blocks.3.ffn.experts.mlp.w2.weight": "model-00004-of-00031.safetensors",
452
- "transformer.blocks.3.ffn.experts.mlp.w2.weight_scale": "model-00004-of-00031.safetensors",
453
  "transformer.blocks.3.ffn.router.layer.weight": "model-00003-of-00031.safetensors",
454
  "transformer.blocks.3.norm_attn_norm.attn.Wqkv.input_scale": "model-00003-of-00031.safetensors",
455
  "transformer.blocks.3.norm_attn_norm.attn.Wqkv.weight": "model-00003-of-00031.safetensors",
@@ -460,15 +460,15 @@
460
  "transformer.blocks.3.norm_attn_norm.attn.out_proj.weight_scale": "model-00003-of-00031.safetensors",
461
  "transformer.blocks.3.norm_attn_norm.norm_1.weight": "model-00003-of-00031.safetensors",
462
  "transformer.blocks.3.norm_attn_norm.norm_2.weight": "model-00003-of-00031.safetensors",
463
- "transformer.blocks.30.ffn.experts.mlp.v1.input_scale": "model-00024-of-00031.safetensors",
464
- "transformer.blocks.30.ffn.experts.mlp.v1.weight": "model-00024-of-00031.safetensors",
465
- "transformer.blocks.30.ffn.experts.mlp.v1.weight_scale": "model-00024-of-00031.safetensors",
466
- "transformer.blocks.30.ffn.experts.mlp.w1.input_scale": "model-00023-of-00031.safetensors",
467
- "transformer.blocks.30.ffn.experts.mlp.w1.weight": "model-00023-of-00031.safetensors",
468
- "transformer.blocks.30.ffn.experts.mlp.w1.weight_scale": "model-00023-of-00031.safetensors",
469
- "transformer.blocks.30.ffn.experts.mlp.w2.input_scale": "model-00024-of-00031.safetensors",
470
- "transformer.blocks.30.ffn.experts.mlp.w2.weight": "model-00024-of-00031.safetensors",
471
- "transformer.blocks.30.ffn.experts.mlp.w2.weight_scale": "model-00024-of-00031.safetensors",
472
  "transformer.blocks.30.ffn.router.layer.weight": "model-00023-of-00031.safetensors",
473
  "transformer.blocks.30.norm_attn_norm.attn.Wqkv.input_scale": "model-00023-of-00031.safetensors",
474
  "transformer.blocks.30.norm_attn_norm.attn.Wqkv.weight": "model-00023-of-00031.safetensors",
@@ -479,15 +479,15 @@
479
  "transformer.blocks.30.norm_attn_norm.attn.out_proj.weight_scale": "model-00023-of-00031.safetensors",
480
  "transformer.blocks.30.norm_attn_norm.norm_1.weight": "model-00023-of-00031.safetensors",
481
  "transformer.blocks.30.norm_attn_norm.norm_2.weight": "model-00023-of-00031.safetensors",
482
- "transformer.blocks.31.ffn.experts.mlp.v1.input_scale": "model-00024-of-00031.safetensors",
483
- "transformer.blocks.31.ffn.experts.mlp.v1.weight": "model-00024-of-00031.safetensors",
484
- "transformer.blocks.31.ffn.experts.mlp.v1.weight_scale": "model-00024-of-00031.safetensors",
485
- "transformer.blocks.31.ffn.experts.mlp.w1.input_scale": "model-00024-of-00031.safetensors",
486
- "transformer.blocks.31.ffn.experts.mlp.w1.weight": "model-00024-of-00031.safetensors",
487
- "transformer.blocks.31.ffn.experts.mlp.w1.weight_scale": "model-00024-of-00031.safetensors",
488
- "transformer.blocks.31.ffn.experts.mlp.w2.input_scale": "model-00025-of-00031.safetensors",
489
- "transformer.blocks.31.ffn.experts.mlp.w2.weight": "model-00025-of-00031.safetensors",
490
- "transformer.blocks.31.ffn.experts.mlp.w2.weight_scale": "model-00025-of-00031.safetensors",
491
  "transformer.blocks.31.ffn.router.layer.weight": "model-00024-of-00031.safetensors",
492
  "transformer.blocks.31.norm_attn_norm.attn.Wqkv.input_scale": "model-00024-of-00031.safetensors",
493
  "transformer.blocks.31.norm_attn_norm.attn.Wqkv.weight": "model-00024-of-00031.safetensors",
@@ -498,15 +498,15 @@
498
  "transformer.blocks.31.norm_attn_norm.attn.out_proj.weight_scale": "model-00024-of-00031.safetensors",
499
  "transformer.blocks.31.norm_attn_norm.norm_1.weight": "model-00024-of-00031.safetensors",
500
  "transformer.blocks.31.norm_attn_norm.norm_2.weight": "model-00024-of-00031.safetensors",
501
- "transformer.blocks.32.ffn.experts.mlp.v1.input_scale": "model-00025-of-00031.safetensors",
502
- "transformer.blocks.32.ffn.experts.mlp.v1.weight": "model-00025-of-00031.safetensors",
503
- "transformer.blocks.32.ffn.experts.mlp.v1.weight_scale": "model-00025-of-00031.safetensors",
504
- "transformer.blocks.32.ffn.experts.mlp.w1.input_scale": "model-00025-of-00031.safetensors",
505
- "transformer.blocks.32.ffn.experts.mlp.w1.weight": "model-00025-of-00031.safetensors",
506
- "transformer.blocks.32.ffn.experts.mlp.w1.weight_scale": "model-00025-of-00031.safetensors",
507
- "transformer.blocks.32.ffn.experts.mlp.w2.input_scale": "model-00025-of-00031.safetensors",
508
- "transformer.blocks.32.ffn.experts.mlp.w2.weight": "model-00025-of-00031.safetensors",
509
- "transformer.blocks.32.ffn.experts.mlp.w2.weight_scale": "model-00025-of-00031.safetensors",
510
  "transformer.blocks.32.ffn.router.layer.weight": "model-00025-of-00031.safetensors",
511
  "transformer.blocks.32.norm_attn_norm.attn.Wqkv.input_scale": "model-00025-of-00031.safetensors",
512
  "transformer.blocks.32.norm_attn_norm.attn.Wqkv.weight": "model-00025-of-00031.safetensors",
@@ -517,15 +517,15 @@
517
  "transformer.blocks.32.norm_attn_norm.attn.out_proj.weight_scale": "model-00025-of-00031.safetensors",
518
  "transformer.blocks.32.norm_attn_norm.norm_1.weight": "model-00025-of-00031.safetensors",
519
  "transformer.blocks.32.norm_attn_norm.norm_2.weight": "model-00025-of-00031.safetensors",
520
- "transformer.blocks.33.ffn.experts.mlp.v1.input_scale": "model-00026-of-00031.safetensors",
521
- "transformer.blocks.33.ffn.experts.mlp.v1.weight": "model-00026-of-00031.safetensors",
522
- "transformer.blocks.33.ffn.experts.mlp.v1.weight_scale": "model-00026-of-00031.safetensors",
523
- "transformer.blocks.33.ffn.experts.mlp.w1.input_scale": "model-00026-of-00031.safetensors",
524
- "transformer.blocks.33.ffn.experts.mlp.w1.weight": "model-00026-of-00031.safetensors",
525
- "transformer.blocks.33.ffn.experts.mlp.w1.weight_scale": "model-00026-of-00031.safetensors",
526
- "transformer.blocks.33.ffn.experts.mlp.w2.input_scale": "model-00026-of-00031.safetensors",
527
- "transformer.blocks.33.ffn.experts.mlp.w2.weight": "model-00026-of-00031.safetensors",
528
- "transformer.blocks.33.ffn.experts.mlp.w2.weight_scale": "model-00026-of-00031.safetensors",
529
  "transformer.blocks.33.ffn.router.layer.weight": "model-00025-of-00031.safetensors",
530
  "transformer.blocks.33.norm_attn_norm.attn.Wqkv.input_scale": "model-00025-of-00031.safetensors",
531
  "transformer.blocks.33.norm_attn_norm.attn.Wqkv.weight": "model-00025-of-00031.safetensors",
@@ -536,15 +536,15 @@
536
  "transformer.blocks.33.norm_attn_norm.attn.out_proj.weight_scale": "model-00025-of-00031.safetensors",
537
  "transformer.blocks.33.norm_attn_norm.norm_1.weight": "model-00025-of-00031.safetensors",
538
  "transformer.blocks.33.norm_attn_norm.norm_2.weight": "model-00025-of-00031.safetensors",
539
- "transformer.blocks.34.ffn.experts.mlp.v1.input_scale": "model-00027-of-00031.safetensors",
540
- "transformer.blocks.34.ffn.experts.mlp.v1.weight": "model-00027-of-00031.safetensors",
541
- "transformer.blocks.34.ffn.experts.mlp.v1.weight_scale": "model-00027-of-00031.safetensors",
542
- "transformer.blocks.34.ffn.experts.mlp.w1.input_scale": "model-00026-of-00031.safetensors",
543
- "transformer.blocks.34.ffn.experts.mlp.w1.weight": "model-00026-of-00031.safetensors",
544
- "transformer.blocks.34.ffn.experts.mlp.w1.weight_scale": "model-00026-of-00031.safetensors",
545
- "transformer.blocks.34.ffn.experts.mlp.w2.input_scale": "model-00027-of-00031.safetensors",
546
- "transformer.blocks.34.ffn.experts.mlp.w2.weight": "model-00027-of-00031.safetensors",
547
- "transformer.blocks.34.ffn.experts.mlp.w2.weight_scale": "model-00027-of-00031.safetensors",
548
  "transformer.blocks.34.ffn.router.layer.weight": "model-00026-of-00031.safetensors",
549
  "transformer.blocks.34.norm_attn_norm.attn.Wqkv.input_scale": "model-00026-of-00031.safetensors",
550
  "transformer.blocks.34.norm_attn_norm.attn.Wqkv.weight": "model-00026-of-00031.safetensors",
@@ -555,15 +555,15 @@
555
  "transformer.blocks.34.norm_attn_norm.attn.out_proj.weight_scale": "model-00026-of-00031.safetensors",
556
  "transformer.blocks.34.norm_attn_norm.norm_1.weight": "model-00026-of-00031.safetensors",
557
  "transformer.blocks.34.norm_attn_norm.norm_2.weight": "model-00026-of-00031.safetensors",
558
- "transformer.blocks.35.ffn.experts.mlp.v1.input_scale": "model-00027-of-00031.safetensors",
559
- "transformer.blocks.35.ffn.experts.mlp.v1.weight": "model-00027-of-00031.safetensors",
560
- "transformer.blocks.35.ffn.experts.mlp.v1.weight_scale": "model-00027-of-00031.safetensors",
561
- "transformer.blocks.35.ffn.experts.mlp.w1.input_scale": "model-00027-of-00031.safetensors",
562
- "transformer.blocks.35.ffn.experts.mlp.w1.weight": "model-00027-of-00031.safetensors",
563
- "transformer.blocks.35.ffn.experts.mlp.w1.weight_scale": "model-00027-of-00031.safetensors",
564
- "transformer.blocks.35.ffn.experts.mlp.w2.input_scale": "model-00028-of-00031.safetensors",
565
- "transformer.blocks.35.ffn.experts.mlp.w2.weight": "model-00028-of-00031.safetensors",
566
- "transformer.blocks.35.ffn.experts.mlp.w2.weight_scale": "model-00028-of-00031.safetensors",
567
  "transformer.blocks.35.ffn.router.layer.weight": "model-00027-of-00031.safetensors",
568
  "transformer.blocks.35.norm_attn_norm.attn.Wqkv.input_scale": "model-00027-of-00031.safetensors",
569
  "transformer.blocks.35.norm_attn_norm.attn.Wqkv.weight": "model-00027-of-00031.safetensors",
@@ -574,15 +574,15 @@
574
  "transformer.blocks.35.norm_attn_norm.attn.out_proj.weight_scale": "model-00027-of-00031.safetensors",
575
  "transformer.blocks.35.norm_attn_norm.norm_1.weight": "model-00027-of-00031.safetensors",
576
  "transformer.blocks.35.norm_attn_norm.norm_2.weight": "model-00027-of-00031.safetensors",
577
- "transformer.blocks.36.ffn.experts.mlp.v1.input_scale": "model-00028-of-00031.safetensors",
578
- "transformer.blocks.36.ffn.experts.mlp.v1.weight": "model-00028-of-00031.safetensors",
579
- "transformer.blocks.36.ffn.experts.mlp.v1.weight_scale": "model-00028-of-00031.safetensors",
580
- "transformer.blocks.36.ffn.experts.mlp.w1.input_scale": "model-00028-of-00031.safetensors",
581
- "transformer.blocks.36.ffn.experts.mlp.w1.weight": "model-00028-of-00031.safetensors",
582
- "transformer.blocks.36.ffn.experts.mlp.w1.weight_scale": "model-00028-of-00031.safetensors",
583
- "transformer.blocks.36.ffn.experts.mlp.w2.input_scale": "model-00028-of-00031.safetensors",
584
- "transformer.blocks.36.ffn.experts.mlp.w2.weight": "model-00028-of-00031.safetensors",
585
- "transformer.blocks.36.ffn.experts.mlp.w2.weight_scale": "model-00028-of-00031.safetensors",
586
  "transformer.blocks.36.ffn.router.layer.weight": "model-00028-of-00031.safetensors",
587
  "transformer.blocks.36.norm_attn_norm.attn.Wqkv.input_scale": "model-00028-of-00031.safetensors",
588
  "transformer.blocks.36.norm_attn_norm.attn.Wqkv.weight": "model-00028-of-00031.safetensors",
@@ -593,15 +593,15 @@
593
  "transformer.blocks.36.norm_attn_norm.attn.out_proj.weight_scale": "model-00028-of-00031.safetensors",
594
  "transformer.blocks.36.norm_attn_norm.norm_1.weight": "model-00028-of-00031.safetensors",
595
  "transformer.blocks.36.norm_attn_norm.norm_2.weight": "model-00028-of-00031.safetensors",
596
- "transformer.blocks.37.ffn.experts.mlp.v1.input_scale": "model-00029-of-00031.safetensors",
597
- "transformer.blocks.37.ffn.experts.mlp.v1.weight": "model-00029-of-00031.safetensors",
598
- "transformer.blocks.37.ffn.experts.mlp.v1.weight_scale": "model-00029-of-00031.safetensors",
599
- "transformer.blocks.37.ffn.experts.mlp.w1.input_scale": "model-00029-of-00031.safetensors",
600
- "transformer.blocks.37.ffn.experts.mlp.w1.weight": "model-00029-of-00031.safetensors",
601
- "transformer.blocks.37.ffn.experts.mlp.w1.weight_scale": "model-00029-of-00031.safetensors",
602
- "transformer.blocks.37.ffn.experts.mlp.w2.input_scale": "model-00029-of-00031.safetensors",
603
- "transformer.blocks.37.ffn.experts.mlp.w2.weight": "model-00029-of-00031.safetensors",
604
- "transformer.blocks.37.ffn.experts.mlp.w2.weight_scale": "model-00029-of-00031.safetensors",
605
  "transformer.blocks.37.ffn.router.layer.weight": "model-00028-of-00031.safetensors",
606
  "transformer.blocks.37.norm_attn_norm.attn.Wqkv.input_scale": "model-00028-of-00031.safetensors",
607
  "transformer.blocks.37.norm_attn_norm.attn.Wqkv.weight": "model-00028-of-00031.safetensors",
@@ -612,15 +612,15 @@
612
  "transformer.blocks.37.norm_attn_norm.attn.out_proj.weight_scale": "model-00028-of-00031.safetensors",
613
  "transformer.blocks.37.norm_attn_norm.norm_1.weight": "model-00028-of-00031.safetensors",
614
  "transformer.blocks.37.norm_attn_norm.norm_2.weight": "model-00028-of-00031.safetensors",
615
- "transformer.blocks.38.ffn.experts.mlp.v1.input_scale": "model-00030-of-00031.safetensors",
616
- "transformer.blocks.38.ffn.experts.mlp.v1.weight": "model-00030-of-00031.safetensors",
617
- "transformer.blocks.38.ffn.experts.mlp.v1.weight_scale": "model-00030-of-00031.safetensors",
618
- "transformer.blocks.38.ffn.experts.mlp.w1.input_scale": "model-00029-of-00031.safetensors",
619
- "transformer.blocks.38.ffn.experts.mlp.w1.weight": "model-00029-of-00031.safetensors",
620
- "transformer.blocks.38.ffn.experts.mlp.w1.weight_scale": "model-00029-of-00031.safetensors",
621
- "transformer.blocks.38.ffn.experts.mlp.w2.input_scale": "model-00030-of-00031.safetensors",
622
- "transformer.blocks.38.ffn.experts.mlp.w2.weight": "model-00030-of-00031.safetensors",
623
- "transformer.blocks.38.ffn.experts.mlp.w2.weight_scale": "model-00030-of-00031.safetensors",
624
  "transformer.blocks.38.ffn.router.layer.weight": "model-00029-of-00031.safetensors",
625
  "transformer.blocks.38.norm_attn_norm.attn.Wqkv.input_scale": "model-00029-of-00031.safetensors",
626
  "transformer.blocks.38.norm_attn_norm.attn.Wqkv.weight": "model-00029-of-00031.safetensors",
@@ -631,15 +631,15 @@
631
  "transformer.blocks.38.norm_attn_norm.attn.out_proj.weight_scale": "model-00029-of-00031.safetensors",
632
  "transformer.blocks.38.norm_attn_norm.norm_1.weight": "model-00029-of-00031.safetensors",
633
  "transformer.blocks.38.norm_attn_norm.norm_2.weight": "model-00029-of-00031.safetensors",
634
- "transformer.blocks.39.ffn.experts.mlp.v1.input_scale": "model-00030-of-00031.safetensors",
635
- "transformer.blocks.39.ffn.experts.mlp.v1.weight": "model-00030-of-00031.safetensors",
636
- "transformer.blocks.39.ffn.experts.mlp.v1.weight_scale": "model-00030-of-00031.safetensors",
637
- "transformer.blocks.39.ffn.experts.mlp.w1.input_scale": "model-00030-of-00031.safetensors",
638
- "transformer.blocks.39.ffn.experts.mlp.w1.weight": "model-00030-of-00031.safetensors",
639
- "transformer.blocks.39.ffn.experts.mlp.w1.weight_scale": "model-00030-of-00031.safetensors",
640
- "transformer.blocks.39.ffn.experts.mlp.w2.input_scale": "model-00031-of-00031.safetensors",
641
- "transformer.blocks.39.ffn.experts.mlp.w2.weight": "model-00031-of-00031.safetensors",
642
- "transformer.blocks.39.ffn.experts.mlp.w2.weight_scale": "model-00031-of-00031.safetensors",
643
  "transformer.blocks.39.ffn.router.layer.weight": "model-00030-of-00031.safetensors",
644
  "transformer.blocks.39.norm_attn_norm.attn.Wqkv.input_scale": "model-00030-of-00031.safetensors",
645
  "transformer.blocks.39.norm_attn_norm.attn.Wqkv.weight": "model-00030-of-00031.safetensors",
@@ -650,15 +650,15 @@
650
  "transformer.blocks.39.norm_attn_norm.attn.out_proj.weight_scale": "model-00030-of-00031.safetensors",
651
  "transformer.blocks.39.norm_attn_norm.norm_1.weight": "model-00030-of-00031.safetensors",
652
  "transformer.blocks.39.norm_attn_norm.norm_2.weight": "model-00030-of-00031.safetensors",
653
- "transformer.blocks.4.ffn.experts.mlp.v1.input_scale": "model-00004-of-00031.safetensors",
654
- "transformer.blocks.4.ffn.experts.mlp.v1.weight": "model-00004-of-00031.safetensors",
655
- "transformer.blocks.4.ffn.experts.mlp.v1.weight_scale": "model-00004-of-00031.safetensors",
656
- "transformer.blocks.4.ffn.experts.mlp.w1.input_scale": "model-00004-of-00031.safetensors",
657
- "transformer.blocks.4.ffn.experts.mlp.w1.weight": "model-00004-of-00031.safetensors",
658
- "transformer.blocks.4.ffn.experts.mlp.w1.weight_scale": "model-00004-of-00031.safetensors",
659
- "transformer.blocks.4.ffn.experts.mlp.w2.input_scale": "model-00004-of-00031.safetensors",
660
- "transformer.blocks.4.ffn.experts.mlp.w2.weight": "model-00004-of-00031.safetensors",
661
- "transformer.blocks.4.ffn.experts.mlp.w2.weight_scale": "model-00004-of-00031.safetensors",
662
  "transformer.blocks.4.ffn.router.layer.weight": "model-00004-of-00031.safetensors",
663
  "transformer.blocks.4.norm_attn_norm.attn.Wqkv.input_scale": "model-00004-of-00031.safetensors",
664
  "transformer.blocks.4.norm_attn_norm.attn.Wqkv.weight": "model-00004-of-00031.safetensors",
@@ -669,15 +669,15 @@
669
  "transformer.blocks.4.norm_attn_norm.attn.out_proj.weight_scale": "model-00004-of-00031.safetensors",
670
  "transformer.blocks.4.norm_attn_norm.norm_1.weight": "model-00004-of-00031.safetensors",
671
  "transformer.blocks.4.norm_attn_norm.norm_2.weight": "model-00004-of-00031.safetensors",
672
- "transformer.blocks.5.ffn.experts.mlp.v1.input_scale": "model-00005-of-00031.safetensors",
673
- "transformer.blocks.5.ffn.experts.mlp.v1.weight": "model-00005-of-00031.safetensors",
674
- "transformer.blocks.5.ffn.experts.mlp.v1.weight_scale": "model-00005-of-00031.safetensors",
675
- "transformer.blocks.5.ffn.experts.mlp.w1.input_scale": "model-00005-of-00031.safetensors",
676
- "transformer.blocks.5.ffn.experts.mlp.w1.weight": "model-00005-of-00031.safetensors",
677
- "transformer.blocks.5.ffn.experts.mlp.w1.weight_scale": "model-00005-of-00031.safetensors",
678
- "transformer.blocks.5.ffn.experts.mlp.w2.input_scale": "model-00005-of-00031.safetensors",
679
- "transformer.blocks.5.ffn.experts.mlp.w2.weight": "model-00005-of-00031.safetensors",
680
- "transformer.blocks.5.ffn.experts.mlp.w2.weight_scale": "model-00005-of-00031.safetensors",
681
  "transformer.blocks.5.ffn.router.layer.weight": "model-00004-of-00031.safetensors",
682
  "transformer.blocks.5.norm_attn_norm.attn.Wqkv.input_scale": "model-00004-of-00031.safetensors",
683
  "transformer.blocks.5.norm_attn_norm.attn.Wqkv.weight": "model-00004-of-00031.safetensors",
@@ -688,15 +688,15 @@
688
  "transformer.blocks.5.norm_attn_norm.attn.out_proj.weight_scale": "model-00004-of-00031.safetensors",
689
  "transformer.blocks.5.norm_attn_norm.norm_1.weight": "model-00004-of-00031.safetensors",
690
  "transformer.blocks.5.norm_attn_norm.norm_2.weight": "model-00004-of-00031.safetensors",
691
- "transformer.blocks.6.ffn.experts.mlp.v1.input_scale": "model-00006-of-00031.safetensors",
692
- "transformer.blocks.6.ffn.experts.mlp.v1.weight": "model-00006-of-00031.safetensors",
693
- "transformer.blocks.6.ffn.experts.mlp.v1.weight_scale": "model-00006-of-00031.safetensors",
694
- "transformer.blocks.6.ffn.experts.mlp.w1.input_scale": "model-00005-of-00031.safetensors",
695
- "transformer.blocks.6.ffn.experts.mlp.w1.weight": "model-00005-of-00031.safetensors",
696
- "transformer.blocks.6.ffn.experts.mlp.w1.weight_scale": "model-00005-of-00031.safetensors",
697
- "transformer.blocks.6.ffn.experts.mlp.w2.input_scale": "model-00006-of-00031.safetensors",
698
- "transformer.blocks.6.ffn.experts.mlp.w2.weight": "model-00006-of-00031.safetensors",
699
- "transformer.blocks.6.ffn.experts.mlp.w2.weight_scale": "model-00006-of-00031.safetensors",
700
  "transformer.blocks.6.ffn.router.layer.weight": "model-00005-of-00031.safetensors",
701
  "transformer.blocks.6.norm_attn_norm.attn.Wqkv.input_scale": "model-00005-of-00031.safetensors",
702
  "transformer.blocks.6.norm_attn_norm.attn.Wqkv.weight": "model-00005-of-00031.safetensors",
@@ -707,15 +707,15 @@
707
  "transformer.blocks.6.norm_attn_norm.attn.out_proj.weight_scale": "model-00005-of-00031.safetensors",
708
  "transformer.blocks.6.norm_attn_norm.norm_1.weight": "model-00005-of-00031.safetensors",
709
  "transformer.blocks.6.norm_attn_norm.norm_2.weight": "model-00005-of-00031.safetensors",
710
- "transformer.blocks.7.ffn.experts.mlp.v1.input_scale": "model-00006-of-00031.safetensors",
711
- "transformer.blocks.7.ffn.experts.mlp.v1.weight": "model-00006-of-00031.safetensors",
712
- "transformer.blocks.7.ffn.experts.mlp.v1.weight_scale": "model-00006-of-00031.safetensors",
713
- "transformer.blocks.7.ffn.experts.mlp.w1.input_scale": "model-00006-of-00031.safetensors",
714
- "transformer.blocks.7.ffn.experts.mlp.w1.weight": "model-00006-of-00031.safetensors",
715
- "transformer.blocks.7.ffn.experts.mlp.w1.weight_scale": "model-00006-of-00031.safetensors",
716
- "transformer.blocks.7.ffn.experts.mlp.w2.input_scale": "model-00007-of-00031.safetensors",
717
- "transformer.blocks.7.ffn.experts.mlp.w2.weight": "model-00007-of-00031.safetensors",
718
- "transformer.blocks.7.ffn.experts.mlp.w2.weight_scale": "model-00007-of-00031.safetensors",
719
  "transformer.blocks.7.ffn.router.layer.weight": "model-00006-of-00031.safetensors",
720
  "transformer.blocks.7.norm_attn_norm.attn.Wqkv.input_scale": "model-00006-of-00031.safetensors",
721
  "transformer.blocks.7.norm_attn_norm.attn.Wqkv.weight": "model-00006-of-00031.safetensors",
@@ -726,15 +726,15 @@
726
  "transformer.blocks.7.norm_attn_norm.attn.out_proj.weight_scale": "model-00006-of-00031.safetensors",
727
  "transformer.blocks.7.norm_attn_norm.norm_1.weight": "model-00006-of-00031.safetensors",
728
  "transformer.blocks.7.norm_attn_norm.norm_2.weight": "model-00006-of-00031.safetensors",
729
- "transformer.blocks.8.ffn.experts.mlp.v1.input_scale": "model-00007-of-00031.safetensors",
730
- "transformer.blocks.8.ffn.experts.mlp.v1.weight": "model-00007-of-00031.safetensors",
731
- "transformer.blocks.8.ffn.experts.mlp.v1.weight_scale": "model-00007-of-00031.safetensors",
732
- "transformer.blocks.8.ffn.experts.mlp.w1.input_scale": "model-00007-of-00031.safetensors",
733
- "transformer.blocks.8.ffn.experts.mlp.w1.weight": "model-00007-of-00031.safetensors",
734
- "transformer.blocks.8.ffn.experts.mlp.w1.weight_scale": "model-00007-of-00031.safetensors",
735
- "transformer.blocks.8.ffn.experts.mlp.w2.input_scale": "model-00007-of-00031.safetensors",
736
- "transformer.blocks.8.ffn.experts.mlp.w2.weight": "model-00007-of-00031.safetensors",
737
- "transformer.blocks.8.ffn.experts.mlp.w2.weight_scale": "model-00007-of-00031.safetensors",
738
  "transformer.blocks.8.ffn.router.layer.weight": "model-00007-of-00031.safetensors",
739
  "transformer.blocks.8.norm_attn_norm.attn.Wqkv.input_scale": "model-00007-of-00031.safetensors",
740
  "transformer.blocks.8.norm_attn_norm.attn.Wqkv.weight": "model-00007-of-00031.safetensors",
@@ -745,15 +745,15 @@
745
  "transformer.blocks.8.norm_attn_norm.attn.out_proj.weight_scale": "model-00007-of-00031.safetensors",
746
  "transformer.blocks.8.norm_attn_norm.norm_1.weight": "model-00007-of-00031.safetensors",
747
  "transformer.blocks.8.norm_attn_norm.norm_2.weight": "model-00007-of-00031.safetensors",
748
- "transformer.blocks.9.ffn.experts.mlp.v1.input_scale": "model-00008-of-00031.safetensors",
749
- "transformer.blocks.9.ffn.experts.mlp.v1.weight": "model-00008-of-00031.safetensors",
750
- "transformer.blocks.9.ffn.experts.mlp.v1.weight_scale": "model-00008-of-00031.safetensors",
751
- "transformer.blocks.9.ffn.experts.mlp.w1.input_scale": "model-00008-of-00031.safetensors",
752
- "transformer.blocks.9.ffn.experts.mlp.w1.weight": "model-00008-of-00031.safetensors",
753
- "transformer.blocks.9.ffn.experts.mlp.w1.weight_scale": "model-00008-of-00031.safetensors",
754
- "transformer.blocks.9.ffn.experts.mlp.w2.input_scale": "model-00008-of-00031.safetensors",
755
- "transformer.blocks.9.ffn.experts.mlp.w2.weight": "model-00008-of-00031.safetensors",
756
- "transformer.blocks.9.ffn.experts.mlp.w2.weight_scale": "model-00008-of-00031.safetensors",
757
  "transformer.blocks.9.ffn.router.layer.weight": "model-00007-of-00031.safetensors",
758
  "transformer.blocks.9.norm_attn_norm.attn.Wqkv.input_scale": "model-00007-of-00031.safetensors",
759
  "transformer.blocks.9.norm_attn_norm.attn.Wqkv.weight": "model-00007-of-00031.safetensors",
 
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00031-of-00031.safetensors",
7
+ "transformer.blocks.0.ffn.experts.mlp.v1_input_scale": "model-00001-of-00031.safetensors",
8
+ "transformer.blocks.0.ffn.experts.mlp.v1_weight": "model-00001-of-00031.safetensors",
9
+ "transformer.blocks.0.ffn.experts.mlp.v1_weight_scale": "model-00001-of-00031.safetensors",
10
+ "transformer.blocks.0.ffn.experts.mlp.w1_input_scale": "model-00001-of-00031.safetensors",
11
+ "transformer.blocks.0.ffn.experts.mlp.w1_weight": "model-00001-of-00031.safetensors",
12
+ "transformer.blocks.0.ffn.experts.mlp.w1_weight_scale": "model-00001-of-00031.safetensors",
13
+ "transformer.blocks.0.ffn.experts.mlp.w2_input_scale": "model-00001-of-00031.safetensors",
14
+ "transformer.blocks.0.ffn.experts.mlp.w2_weight": "model-00001-of-00031.safetensors",
15
+ "transformer.blocks.0.ffn.experts.mlp.w2_weight_scale": "model-00001-of-00031.safetensors",
16
  "transformer.blocks.0.ffn.router.layer.weight": "model-00001-of-00031.safetensors",
17
  "transformer.blocks.0.norm_attn_norm.attn.Wqkv.input_scale": "model-00001-of-00031.safetensors",
18
  "transformer.blocks.0.norm_attn_norm.attn.Wqkv.weight": "model-00001-of-00031.safetensors",
 
23
  "transformer.blocks.0.norm_attn_norm.attn.out_proj.weight_scale": "model-00001-of-00031.safetensors",
24
  "transformer.blocks.0.norm_attn_norm.norm_1.weight": "model-00001-of-00031.safetensors",
25
  "transformer.blocks.0.norm_attn_norm.norm_2.weight": "model-00001-of-00031.safetensors",
26
+ "transformer.blocks.1.ffn.experts.mlp.v1_input_scale": "model-00002-of-00031.safetensors",
27
+ "transformer.blocks.1.ffn.experts.mlp.v1_weight": "model-00002-of-00031.safetensors",
28
+ "transformer.blocks.1.ffn.experts.mlp.v1_weight_scale": "model-00002-of-00031.safetensors",
29
+ "transformer.blocks.1.ffn.experts.mlp.w1_input_scale": "model-00002-of-00031.safetensors",
30
+ "transformer.blocks.1.ffn.experts.mlp.w1_weight": "model-00002-of-00031.safetensors",
31
+ "transformer.blocks.1.ffn.experts.mlp.w1_weight_scale": "model-00002-of-00031.safetensors",
32
+ "transformer.blocks.1.ffn.experts.mlp.w2_input_scale": "model-00002-of-00031.safetensors",
33
+ "transformer.blocks.1.ffn.experts.mlp.w2_weight": "model-00002-of-00031.safetensors",
34
+ "transformer.blocks.1.ffn.experts.mlp.w2_weight_scale": "model-00002-of-00031.safetensors",
35
  "transformer.blocks.1.ffn.router.layer.weight": "model-00001-of-00031.safetensors",
36
  "transformer.blocks.1.norm_attn_norm.attn.Wqkv.input_scale": "model-00001-of-00031.safetensors",
37
  "transformer.blocks.1.norm_attn_norm.attn.Wqkv.weight": "model-00001-of-00031.safetensors",
 
42
  "transformer.blocks.1.norm_attn_norm.attn.out_proj.weight_scale": "model-00001-of-00031.safetensors",
43
  "transformer.blocks.1.norm_attn_norm.norm_1.weight": "model-00001-of-00031.safetensors",
44
  "transformer.blocks.1.norm_attn_norm.norm_2.weight": "model-00001-of-00031.safetensors",
45
+ "transformer.blocks.10.ffn.experts.mlp.v1_input_scale": "model-00009-of-00031.safetensors",
46
+ "transformer.blocks.10.ffn.experts.mlp.v1_weight": "model-00009-of-00031.safetensors",
47
+ "transformer.blocks.10.ffn.experts.mlp.v1_weight_scale": "model-00009-of-00031.safetensors",
48
+ "transformer.blocks.10.ffn.experts.mlp.w1_input_scale": "model-00008-of-00031.safetensors",
49
+ "transformer.blocks.10.ffn.experts.mlp.w1_weight": "model-00008-of-00031.safetensors",
50
+ "transformer.blocks.10.ffn.experts.mlp.w1_weight_scale": "model-00008-of-00031.safetensors",
51
+ "transformer.blocks.10.ffn.experts.mlp.w2_input_scale": "model-00009-of-00031.safetensors",
52
+ "transformer.blocks.10.ffn.experts.mlp.w2_weight": "model-00009-of-00031.safetensors",
53
+ "transformer.blocks.10.ffn.experts.mlp.w2_weight_scale": "model-00009-of-00031.safetensors",
54
  "transformer.blocks.10.ffn.router.layer.weight": "model-00008-of-00031.safetensors",
55
  "transformer.blocks.10.norm_attn_norm.attn.Wqkv.input_scale": "model-00008-of-00031.safetensors",
56
  "transformer.blocks.10.norm_attn_norm.attn.Wqkv.weight": "model-00008-of-00031.safetensors",
 
61
  "transformer.blocks.10.norm_attn_norm.attn.out_proj.weight_scale": "model-00008-of-00031.safetensors",
62
  "transformer.blocks.10.norm_attn_norm.norm_1.weight": "model-00008-of-00031.safetensors",
63
  "transformer.blocks.10.norm_attn_norm.norm_2.weight": "model-00008-of-00031.safetensors",
64
+ "transformer.blocks.11.ffn.experts.mlp.v1_input_scale": "model-00009-of-00031.safetensors",
65
+ "transformer.blocks.11.ffn.experts.mlp.v1_weight": "model-00009-of-00031.safetensors",
66
+ "transformer.blocks.11.ffn.experts.mlp.v1_weight_scale": "model-00009-of-00031.safetensors",
67
+ "transformer.blocks.11.ffn.experts.mlp.w1_input_scale": "model-00009-of-00031.safetensors",
68
+ "transformer.blocks.11.ffn.experts.mlp.w1_weight": "model-00009-of-00031.safetensors",
69
+ "transformer.blocks.11.ffn.experts.mlp.w1_weight_scale": "model-00009-of-00031.safetensors",
70
+ "transformer.blocks.11.ffn.experts.mlp.w2_input_scale": "model-00010-of-00031.safetensors",
71
+ "transformer.blocks.11.ffn.experts.mlp.w2_weight": "model-00010-of-00031.safetensors",
72
+ "transformer.blocks.11.ffn.experts.mlp.w2_weight_scale": "model-00010-of-00031.safetensors",
73
  "transformer.blocks.11.ffn.router.layer.weight": "model-00009-of-00031.safetensors",
74
  "transformer.blocks.11.norm_attn_norm.attn.Wqkv.input_scale": "model-00009-of-00031.safetensors",
75
  "transformer.blocks.11.norm_attn_norm.attn.Wqkv.weight": "model-00009-of-00031.safetensors",
 
80
  "transformer.blocks.11.norm_attn_norm.attn.out_proj.weight_scale": "model-00009-of-00031.safetensors",
81
  "transformer.blocks.11.norm_attn_norm.norm_1.weight": "model-00009-of-00031.safetensors",
82
  "transformer.blocks.11.norm_attn_norm.norm_2.weight": "model-00009-of-00031.safetensors",
83
+ "transformer.blocks.12.ffn.experts.mlp.v1_input_scale": "model-00010-of-00031.safetensors",
84
+ "transformer.blocks.12.ffn.experts.mlp.v1_weight": "model-00010-of-00031.safetensors",
85
+ "transformer.blocks.12.ffn.experts.mlp.v1_weight_scale": "model-00010-of-00031.safetensors",
86
+ "transformer.blocks.12.ffn.experts.mlp.w1_input_scale": "model-00010-of-00031.safetensors",
87
+ "transformer.blocks.12.ffn.experts.mlp.w1_weight": "model-00010-of-00031.safetensors",
88
+ "transformer.blocks.12.ffn.experts.mlp.w1_weight_scale": "model-00010-of-00031.safetensors",
89
+ "transformer.blocks.12.ffn.experts.mlp.w2_input_scale": "model-00010-of-00031.safetensors",
90
+ "transformer.blocks.12.ffn.experts.mlp.w2_weight": "model-00010-of-00031.safetensors",
91
+ "transformer.blocks.12.ffn.experts.mlp.w2_weight_scale": "model-00010-of-00031.safetensors",
92
  "transformer.blocks.12.ffn.router.layer.weight": "model-00010-of-00031.safetensors",
93
  "transformer.blocks.12.norm_attn_norm.attn.Wqkv.input_scale": "model-00010-of-00031.safetensors",
94
  "transformer.blocks.12.norm_attn_norm.attn.Wqkv.weight": "model-00010-of-00031.safetensors",
 
99
  "transformer.blocks.12.norm_attn_norm.attn.out_proj.weight_scale": "model-00010-of-00031.safetensors",
100
  "transformer.blocks.12.norm_attn_norm.norm_1.weight": "model-00010-of-00031.safetensors",
101
  "transformer.blocks.12.norm_attn_norm.norm_2.weight": "model-00010-of-00031.safetensors",
102
+ "transformer.blocks.13.ffn.experts.mlp.v1_input_scale": "model-00011-of-00031.safetensors",
103
+ "transformer.blocks.13.ffn.experts.mlp.v1_weight": "model-00011-of-00031.safetensors",
104
+ "transformer.blocks.13.ffn.experts.mlp.v1_weight_scale": "model-00011-of-00031.safetensors",
105
+ "transformer.blocks.13.ffn.experts.mlp.w1_input_scale": "model-00011-of-00031.safetensors",
106
+ "transformer.blocks.13.ffn.experts.mlp.w1_weight": "model-00011-of-00031.safetensors",
107
+ "transformer.blocks.13.ffn.experts.mlp.w1_weight_scale": "model-00011-of-00031.safetensors",
108
+ "transformer.blocks.13.ffn.experts.mlp.w2_input_scale": "model-00011-of-00031.safetensors",
109
+ "transformer.blocks.13.ffn.experts.mlp.w2_weight": "model-00011-of-00031.safetensors",
110
+ "transformer.blocks.13.ffn.experts.mlp.w2_weight_scale": "model-00011-of-00031.safetensors",
111
  "transformer.blocks.13.ffn.router.layer.weight": "model-00010-of-00031.safetensors",
112
  "transformer.blocks.13.norm_attn_norm.attn.Wqkv.input_scale": "model-00010-of-00031.safetensors",
113
  "transformer.blocks.13.norm_attn_norm.attn.Wqkv.weight": "model-00010-of-00031.safetensors",
 
118
  "transformer.blocks.13.norm_attn_norm.attn.out_proj.weight_scale": "model-00010-of-00031.safetensors",
119
  "transformer.blocks.13.norm_attn_norm.norm_1.weight": "model-00010-of-00031.safetensors",
120
  "transformer.blocks.13.norm_attn_norm.norm_2.weight": "model-00010-of-00031.safetensors",
121
+ "transformer.blocks.14.ffn.experts.mlp.v1_input_scale": "model-00012-of-00031.safetensors",
122
+ "transformer.blocks.14.ffn.experts.mlp.v1_weight": "model-00012-of-00031.safetensors",
123
+ "transformer.blocks.14.ffn.experts.mlp.v1_weight_scale": "model-00012-of-00031.safetensors",
124
+ "transformer.blocks.14.ffn.experts.mlp.w1_input_scale": "model-00011-of-00031.safetensors",
125
+ "transformer.blocks.14.ffn.experts.mlp.w1_weight": "model-00011-of-00031.safetensors",
126
+ "transformer.blocks.14.ffn.experts.mlp.w1_weight_scale": "model-00011-of-00031.safetensors",
127
+ "transformer.blocks.14.ffn.experts.mlp.w2_input_scale": "model-00012-of-00031.safetensors",
128
+ "transformer.blocks.14.ffn.experts.mlp.w2_weight": "model-00012-of-00031.safetensors",
129
+ "transformer.blocks.14.ffn.experts.mlp.w2_weight_scale": "model-00012-of-00031.safetensors",
130
  "transformer.blocks.14.ffn.router.layer.weight": "model-00011-of-00031.safetensors",
131
  "transformer.blocks.14.norm_attn_norm.attn.Wqkv.input_scale": "model-00011-of-00031.safetensors",
132
  "transformer.blocks.14.norm_attn_norm.attn.Wqkv.weight": "model-00011-of-00031.safetensors",
 
137
  "transformer.blocks.14.norm_attn_norm.attn.out_proj.weight_scale": "model-00011-of-00031.safetensors",
138
  "transformer.blocks.14.norm_attn_norm.norm_1.weight": "model-00011-of-00031.safetensors",
139
  "transformer.blocks.14.norm_attn_norm.norm_2.weight": "model-00011-of-00031.safetensors",
140
+ "transformer.blocks.15.ffn.experts.mlp.v1_input_scale": "model-00012-of-00031.safetensors",
141
+ "transformer.blocks.15.ffn.experts.mlp.v1_weight": "model-00012-of-00031.safetensors",
142
+ "transformer.blocks.15.ffn.experts.mlp.v1_weight_scale": "model-00012-of-00031.safetensors",
143
+ "transformer.blocks.15.ffn.experts.mlp.w1_input_scale": "model-00012-of-00031.safetensors",
144
+ "transformer.blocks.15.ffn.experts.mlp.w1_weight": "model-00012-of-00031.safetensors",
145
+ "transformer.blocks.15.ffn.experts.mlp.w1_weight_scale": "model-00012-of-00031.safetensors",
146
+ "transformer.blocks.15.ffn.experts.mlp.w2_input_scale": "model-00013-of-00031.safetensors",
147
+ "transformer.blocks.15.ffn.experts.mlp.w2_weight": "model-00013-of-00031.safetensors",
148
+ "transformer.blocks.15.ffn.experts.mlp.w2_weight_scale": "model-00013-of-00031.safetensors",
149
  "transformer.blocks.15.ffn.router.layer.weight": "model-00012-of-00031.safetensors",
150
  "transformer.blocks.15.norm_attn_norm.attn.Wqkv.input_scale": "model-00012-of-00031.safetensors",
151
  "transformer.blocks.15.norm_attn_norm.attn.Wqkv.weight": "model-00012-of-00031.safetensors",
 
156
  "transformer.blocks.15.norm_attn_norm.attn.out_proj.weight_scale": "model-00012-of-00031.safetensors",
157
  "transformer.blocks.15.norm_attn_norm.norm_1.weight": "model-00012-of-00031.safetensors",
158
  "transformer.blocks.15.norm_attn_norm.norm_2.weight": "model-00012-of-00031.safetensors",
159
+ "transformer.blocks.16.ffn.experts.mlp.v1_input_scale": "model-00013-of-00031.safetensors",
160
+ "transformer.blocks.16.ffn.experts.mlp.v1_weight": "model-00013-of-00031.safetensors",
161
+ "transformer.blocks.16.ffn.experts.mlp.v1_weight_scale": "model-00013-of-00031.safetensors",
162
+ "transformer.blocks.16.ffn.experts.mlp.w1_input_scale": "model-00013-of-00031.safetensors",
163
+ "transformer.blocks.16.ffn.experts.mlp.w1_weight": "model-00013-of-00031.safetensors",
164
+ "transformer.blocks.16.ffn.experts.mlp.w1_weight_scale": "model-00013-of-00031.safetensors",
165
+ "transformer.blocks.16.ffn.experts.mlp.w2_input_scale": "model-00013-of-00031.safetensors",
166
+ "transformer.blocks.16.ffn.experts.mlp.w2_weight": "model-00013-of-00031.safetensors",
167
+ "transformer.blocks.16.ffn.experts.mlp.w2_weight_scale": "model-00013-of-00031.safetensors",
168
  "transformer.blocks.16.ffn.router.layer.weight": "model-00013-of-00031.safetensors",
169
  "transformer.blocks.16.norm_attn_norm.attn.Wqkv.input_scale": "model-00013-of-00031.safetensors",
170
  "transformer.blocks.16.norm_attn_norm.attn.Wqkv.weight": "model-00013-of-00031.safetensors",
 
175
  "transformer.blocks.16.norm_attn_norm.attn.out_proj.weight_scale": "model-00013-of-00031.safetensors",
176
  "transformer.blocks.16.norm_attn_norm.norm_1.weight": "model-00013-of-00031.safetensors",
177
  "transformer.blocks.16.norm_attn_norm.norm_2.weight": "model-00013-of-00031.safetensors",
178
+ "transformer.blocks.17.ffn.experts.mlp.v1_input_scale": "model-00014-of-00031.safetensors",
179
+ "transformer.blocks.17.ffn.experts.mlp.v1_weight": "model-00014-of-00031.safetensors",
180
+ "transformer.blocks.17.ffn.experts.mlp.v1_weight_scale": "model-00014-of-00031.safetensors",
181
+ "transformer.blocks.17.ffn.experts.mlp.w1_input_scale": "model-00014-of-00031.safetensors",
182
+ "transformer.blocks.17.ffn.experts.mlp.w1_weight": "model-00014-of-00031.safetensors",
183
+ "transformer.blocks.17.ffn.experts.mlp.w1_weight_scale": "model-00014-of-00031.safetensors",
184
+ "transformer.blocks.17.ffn.experts.mlp.w2_input_scale": "model-00014-of-00031.safetensors",
185
+ "transformer.blocks.17.ffn.experts.mlp.w2_weight": "model-00014-of-00031.safetensors",
186
+ "transformer.blocks.17.ffn.experts.mlp.w2_weight_scale": "model-00014-of-00031.safetensors",
187
  "transformer.blocks.17.ffn.router.layer.weight": "model-00013-of-00031.safetensors",
188
  "transformer.blocks.17.norm_attn_norm.attn.Wqkv.input_scale": "model-00013-of-00031.safetensors",
189
  "transformer.blocks.17.norm_attn_norm.attn.Wqkv.weight": "model-00013-of-00031.safetensors",
 
194
  "transformer.blocks.17.norm_attn_norm.attn.out_proj.weight_scale": "model-00013-of-00031.safetensors",
195
  "transformer.blocks.17.norm_attn_norm.norm_1.weight": "model-00013-of-00031.safetensors",
196
  "transformer.blocks.17.norm_attn_norm.norm_2.weight": "model-00013-of-00031.safetensors",
197
+ "transformer.blocks.18.ffn.experts.mlp.v1_input_scale": "model-00015-of-00031.safetensors",
198
+ "transformer.blocks.18.ffn.experts.mlp.v1_weight": "model-00015-of-00031.safetensors",
199
+ "transformer.blocks.18.ffn.experts.mlp.v1_weight_scale": "model-00015-of-00031.safetensors",
200
+ "transformer.blocks.18.ffn.experts.mlp.w1_input_scale": "model-00014-of-00031.safetensors",
201
+ "transformer.blocks.18.ffn.experts.mlp.w1_weight": "model-00014-of-00031.safetensors",
202
+ "transformer.blocks.18.ffn.experts.mlp.w1_weight_scale": "model-00014-of-00031.safetensors",
203
+ "transformer.blocks.18.ffn.experts.mlp.w2_input_scale": "model-00015-of-00031.safetensors",
204
+ "transformer.blocks.18.ffn.experts.mlp.w2_weight": "model-00015-of-00031.safetensors",
205
+ "transformer.blocks.18.ffn.experts.mlp.w2_weight_scale": "model-00015-of-00031.safetensors",
206
  "transformer.blocks.18.ffn.router.layer.weight": "model-00014-of-00031.safetensors",
207
  "transformer.blocks.18.norm_attn_norm.attn.Wqkv.input_scale": "model-00014-of-00031.safetensors",
208
  "transformer.blocks.18.norm_attn_norm.attn.Wqkv.weight": "model-00014-of-00031.safetensors",
 
213
  "transformer.blocks.18.norm_attn_norm.attn.out_proj.weight_scale": "model-00014-of-00031.safetensors",
214
  "transformer.blocks.18.norm_attn_norm.norm_1.weight": "model-00014-of-00031.safetensors",
215
  "transformer.blocks.18.norm_attn_norm.norm_2.weight": "model-00014-of-00031.safetensors",
216
+ "transformer.blocks.19.ffn.experts.mlp.v1_input_scale": "model-00015-of-00031.safetensors",
217
+ "transformer.blocks.19.ffn.experts.mlp.v1_weight": "model-00015-of-00031.safetensors",
218
+ "transformer.blocks.19.ffn.experts.mlp.v1_weight_scale": "model-00015-of-00031.safetensors",
219
+ "transformer.blocks.19.ffn.experts.mlp.w1_input_scale": "model-00015-of-00031.safetensors",
220
+ "transformer.blocks.19.ffn.experts.mlp.w1_weight": "model-00015-of-00031.safetensors",
221
+ "transformer.blocks.19.ffn.experts.mlp.w1_weight_scale": "model-00015-of-00031.safetensors",
222
+ "transformer.blocks.19.ffn.experts.mlp.w2_input_scale": "model-00016-of-00031.safetensors",
223
+ "transformer.blocks.19.ffn.experts.mlp.w2_weight": "model-00016-of-00031.safetensors",
224
+ "transformer.blocks.19.ffn.experts.mlp.w2_weight_scale": "model-00016-of-00031.safetensors",
225
  "transformer.blocks.19.ffn.router.layer.weight": "model-00015-of-00031.safetensors",
226
  "transformer.blocks.19.norm_attn_norm.attn.Wqkv.input_scale": "model-00015-of-00031.safetensors",
227
  "transformer.blocks.19.norm_attn_norm.attn.Wqkv.weight": "model-00015-of-00031.safetensors",
 
232
  "transformer.blocks.19.norm_attn_norm.attn.out_proj.weight_scale": "model-00015-of-00031.safetensors",
233
  "transformer.blocks.19.norm_attn_norm.norm_1.weight": "model-00015-of-00031.safetensors",
234
  "transformer.blocks.19.norm_attn_norm.norm_2.weight": "model-00015-of-00031.safetensors",
235
+ "transformer.blocks.2.ffn.experts.mlp.v1_input_scale": "model-00003-of-00031.safetensors",
236
+ "transformer.blocks.2.ffn.experts.mlp.v1_weight": "model-00003-of-00031.safetensors",
237
+ "transformer.blocks.2.ffn.experts.mlp.v1_weight_scale": "model-00003-of-00031.safetensors",
238
+ "transformer.blocks.2.ffn.experts.mlp.w1_input_scale": "model-00002-of-00031.safetensors",
239
+ "transformer.blocks.2.ffn.experts.mlp.w1_weight": "model-00002-of-00031.safetensors",
240
+ "transformer.blocks.2.ffn.experts.mlp.w1_weight_scale": "model-00002-of-00031.safetensors",
241
+ "transformer.blocks.2.ffn.experts.mlp.w2_input_scale": "model-00003-of-00031.safetensors",
242
+ "transformer.blocks.2.ffn.experts.mlp.w2_weight": "model-00003-of-00031.safetensors",
243
+ "transformer.blocks.2.ffn.experts.mlp.w2_weight_scale": "model-00003-of-00031.safetensors",
244
  "transformer.blocks.2.ffn.router.layer.weight": "model-00002-of-00031.safetensors",
245
  "transformer.blocks.2.norm_attn_norm.attn.Wqkv.input_scale": "model-00002-of-00031.safetensors",
246
  "transformer.blocks.2.norm_attn_norm.attn.Wqkv.weight": "model-00002-of-00031.safetensors",
 
251
  "transformer.blocks.2.norm_attn_norm.attn.out_proj.weight_scale": "model-00002-of-00031.safetensors",
252
  "transformer.blocks.2.norm_attn_norm.norm_1.weight": "model-00002-of-00031.safetensors",
253
  "transformer.blocks.2.norm_attn_norm.norm_2.weight": "model-00002-of-00031.safetensors",
254
+ "transformer.blocks.20.ffn.experts.mlp.v1_input_scale": "model-00016-of-00031.safetensors",
255
+ "transformer.blocks.20.ffn.experts.mlp.v1_weight": "model-00016-of-00031.safetensors",
256
+ "transformer.blocks.20.ffn.experts.mlp.v1_weight_scale": "model-00016-of-00031.safetensors",
257
+ "transformer.blocks.20.ffn.experts.mlp.w1_input_scale": "model-00016-of-00031.safetensors",
258
+ "transformer.blocks.20.ffn.experts.mlp.w1_weight": "model-00016-of-00031.safetensors",
259
+ "transformer.blocks.20.ffn.experts.mlp.w1_weight_scale": "model-00016-of-00031.safetensors",
260
+ "transformer.blocks.20.ffn.experts.mlp.w2_input_scale": "model-00016-of-00031.safetensors",
261
+ "transformer.blocks.20.ffn.experts.mlp.w2_weight": "model-00016-of-00031.safetensors",
262
+ "transformer.blocks.20.ffn.experts.mlp.w2_weight_scale": "model-00016-of-00031.safetensors",
263
  "transformer.blocks.20.ffn.router.layer.weight": "model-00016-of-00031.safetensors",
264
  "transformer.blocks.20.norm_attn_norm.attn.Wqkv.input_scale": "model-00016-of-00031.safetensors",
265
  "transformer.blocks.20.norm_attn_norm.attn.Wqkv.weight": "model-00016-of-00031.safetensors",
 
270
  "transformer.blocks.20.norm_attn_norm.attn.out_proj.weight_scale": "model-00016-of-00031.safetensors",
271
  "transformer.blocks.20.norm_attn_norm.norm_1.weight": "model-00016-of-00031.safetensors",
272
  "transformer.blocks.20.norm_attn_norm.norm_2.weight": "model-00016-of-00031.safetensors",
273
+ "transformer.blocks.21.ffn.experts.mlp.v1_input_scale": "model-00017-of-00031.safetensors",
274
+ "transformer.blocks.21.ffn.experts.mlp.v1_weight": "model-00017-of-00031.safetensors",
275
+ "transformer.blocks.21.ffn.experts.mlp.v1_weight_scale": "model-00017-of-00031.safetensors",
276
+ "transformer.blocks.21.ffn.experts.mlp.w1_input_scale": "model-00017-of-00031.safetensors",
277
+ "transformer.blocks.21.ffn.experts.mlp.w1_weight": "model-00017-of-00031.safetensors",
278
+ "transformer.blocks.21.ffn.experts.mlp.w1_weight_scale": "model-00017-of-00031.safetensors",
279
+ "transformer.blocks.21.ffn.experts.mlp.w2_input_scale": "model-00017-of-00031.safetensors",
280
+ "transformer.blocks.21.ffn.experts.mlp.w2_weight": "model-00017-of-00031.safetensors",
281
+ "transformer.blocks.21.ffn.experts.mlp.w2_weight_scale": "model-00017-of-00031.safetensors",
282
  "transformer.blocks.21.ffn.router.layer.weight": "model-00016-of-00031.safetensors",
283
  "transformer.blocks.21.norm_attn_norm.attn.Wqkv.input_scale": "model-00016-of-00031.safetensors",
284
  "transformer.blocks.21.norm_attn_norm.attn.Wqkv.weight": "model-00016-of-00031.safetensors",
 
289
  "transformer.blocks.21.norm_attn_norm.attn.out_proj.weight_scale": "model-00016-of-00031.safetensors",
290
  "transformer.blocks.21.norm_attn_norm.norm_1.weight": "model-00016-of-00031.safetensors",
291
  "transformer.blocks.21.norm_attn_norm.norm_2.weight": "model-00016-of-00031.safetensors",
292
+ "transformer.blocks.22.ffn.experts.mlp.v1_input_scale": "model-00018-of-00031.safetensors",
293
+ "transformer.blocks.22.ffn.experts.mlp.v1_weight": "model-00018-of-00031.safetensors",
294
+ "transformer.blocks.22.ffn.experts.mlp.v1_weight_scale": "model-00018-of-00031.safetensors",
295
+ "transformer.blocks.22.ffn.experts.mlp.w1_input_scale": "model-00017-of-00031.safetensors",
296
+ "transformer.blocks.22.ffn.experts.mlp.w1_weight": "model-00017-of-00031.safetensors",
297
+ "transformer.blocks.22.ffn.experts.mlp.w1_weight_scale": "model-00017-of-00031.safetensors",
298
+ "transformer.blocks.22.ffn.experts.mlp.w2_input_scale": "model-00018-of-00031.safetensors",
299
+ "transformer.blocks.22.ffn.experts.mlp.w2_weight": "model-00018-of-00031.safetensors",
300
+ "transformer.blocks.22.ffn.experts.mlp.w2_weight_scale": "model-00018-of-00031.safetensors",
301
  "transformer.blocks.22.ffn.router.layer.weight": "model-00017-of-00031.safetensors",
302
  "transformer.blocks.22.norm_attn_norm.attn.Wqkv.input_scale": "model-00017-of-00031.safetensors",
303
  "transformer.blocks.22.norm_attn_norm.attn.Wqkv.weight": "model-00017-of-00031.safetensors",
 
308
  "transformer.blocks.22.norm_attn_norm.attn.out_proj.weight_scale": "model-00017-of-00031.safetensors",
309
  "transformer.blocks.22.norm_attn_norm.norm_1.weight": "model-00017-of-00031.safetensors",
310
  "transformer.blocks.22.norm_attn_norm.norm_2.weight": "model-00017-of-00031.safetensors",
311
+ "transformer.blocks.23.ffn.experts.mlp.v1_input_scale": "model-00018-of-00031.safetensors",
312
+ "transformer.blocks.23.ffn.experts.mlp.v1_weight": "model-00018-of-00031.safetensors",
313
+ "transformer.blocks.23.ffn.experts.mlp.v1_weight_scale": "model-00018-of-00031.safetensors",
314
+ "transformer.blocks.23.ffn.experts.mlp.w1_input_scale": "model-00018-of-00031.safetensors",
315
+ "transformer.blocks.23.ffn.experts.mlp.w1_weight": "model-00018-of-00031.safetensors",
316
+ "transformer.blocks.23.ffn.experts.mlp.w1_weight_scale": "model-00018-of-00031.safetensors",
317
+ "transformer.blocks.23.ffn.experts.mlp.w2_input_scale": "model-00019-of-00031.safetensors",
318
+ "transformer.blocks.23.ffn.experts.mlp.w2_weight": "model-00019-of-00031.safetensors",
319
+ "transformer.blocks.23.ffn.experts.mlp.w2_weight_scale": "model-00019-of-00031.safetensors",
320
  "transformer.blocks.23.ffn.router.layer.weight": "model-00018-of-00031.safetensors",
321
  "transformer.blocks.23.norm_attn_norm.attn.Wqkv.input_scale": "model-00018-of-00031.safetensors",
322
  "transformer.blocks.23.norm_attn_norm.attn.Wqkv.weight": "model-00018-of-00031.safetensors",
 
327
  "transformer.blocks.23.norm_attn_norm.attn.out_proj.weight_scale": "model-00018-of-00031.safetensors",
328
  "transformer.blocks.23.norm_attn_norm.norm_1.weight": "model-00018-of-00031.safetensors",
329
  "transformer.blocks.23.norm_attn_norm.norm_2.weight": "model-00018-of-00031.safetensors",
330
+ "transformer.blocks.24.ffn.experts.mlp.v1_input_scale": "model-00019-of-00031.safetensors",
331
+ "transformer.blocks.24.ffn.experts.mlp.v1_weight": "model-00019-of-00031.safetensors",
332
+ "transformer.blocks.24.ffn.experts.mlp.v1_weight_scale": "model-00019-of-00031.safetensors",
333
+ "transformer.blocks.24.ffn.experts.mlp.w1_input_scale": "model-00019-of-00031.safetensors",
334
+ "transformer.blocks.24.ffn.experts.mlp.w1_weight": "model-00019-of-00031.safetensors",
335
+ "transformer.blocks.24.ffn.experts.mlp.w1_weight_scale": "model-00019-of-00031.safetensors",
336
+ "transformer.blocks.24.ffn.experts.mlp.w2_input_scale": "model-00019-of-00031.safetensors",
337
+ "transformer.blocks.24.ffn.experts.mlp.w2_weight": "model-00019-of-00031.safetensors",
338
+ "transformer.blocks.24.ffn.experts.mlp.w2_weight_scale": "model-00019-of-00031.safetensors",
339
  "transformer.blocks.24.ffn.router.layer.weight": "model-00019-of-00031.safetensors",
340
  "transformer.blocks.24.norm_attn_norm.attn.Wqkv.input_scale": "model-00019-of-00031.safetensors",
341
  "transformer.blocks.24.norm_attn_norm.attn.Wqkv.weight": "model-00019-of-00031.safetensors",
 
346
  "transformer.blocks.24.norm_attn_norm.attn.out_proj.weight_scale": "model-00019-of-00031.safetensors",
347
  "transformer.blocks.24.norm_attn_norm.norm_1.weight": "model-00019-of-00031.safetensors",
348
  "transformer.blocks.24.norm_attn_norm.norm_2.weight": "model-00019-of-00031.safetensors",
349
+ "transformer.blocks.25.ffn.experts.mlp.v1_input_scale": "model-00020-of-00031.safetensors",
350
+ "transformer.blocks.25.ffn.experts.mlp.v1_weight": "model-00020-of-00031.safetensors",
351
+ "transformer.blocks.25.ffn.experts.mlp.v1_weight_scale": "model-00020-of-00031.safetensors",
352
+ "transformer.blocks.25.ffn.experts.mlp.w1_input_scale": "model-00020-of-00031.safetensors",
353
+ "transformer.blocks.25.ffn.experts.mlp.w1_weight": "model-00020-of-00031.safetensors",
354
+ "transformer.blocks.25.ffn.experts.mlp.w1_weight_scale": "model-00020-of-00031.safetensors",
355
+ "transformer.blocks.25.ffn.experts.mlp.w2_input_scale": "model-00020-of-00031.safetensors",
356
+ "transformer.blocks.25.ffn.experts.mlp.w2_weight": "model-00020-of-00031.safetensors",
357
+ "transformer.blocks.25.ffn.experts.mlp.w2_weight_scale": "model-00020-of-00031.safetensors",
358
  "transformer.blocks.25.ffn.router.layer.weight": "model-00019-of-00031.safetensors",
359
  "transformer.blocks.25.norm_attn_norm.attn.Wqkv.input_scale": "model-00019-of-00031.safetensors",
360
  "transformer.blocks.25.norm_attn_norm.attn.Wqkv.weight": "model-00019-of-00031.safetensors",
 
365
  "transformer.blocks.25.norm_attn_norm.attn.out_proj.weight_scale": "model-00019-of-00031.safetensors",
366
  "transformer.blocks.25.norm_attn_norm.norm_1.weight": "model-00019-of-00031.safetensors",
367
  "transformer.blocks.25.norm_attn_norm.norm_2.weight": "model-00019-of-00031.safetensors",
368
+ "transformer.blocks.26.ffn.experts.mlp.v1_input_scale": "model-00021-of-00031.safetensors",
369
+ "transformer.blocks.26.ffn.experts.mlp.v1_weight": "model-00021-of-00031.safetensors",
370
+ "transformer.blocks.26.ffn.experts.mlp.v1_weight_scale": "model-00021-of-00031.safetensors",
371
+ "transformer.blocks.26.ffn.experts.mlp.w1_input_scale": "model-00020-of-00031.safetensors",
372
+ "transformer.blocks.26.ffn.experts.mlp.w1_weight": "model-00020-of-00031.safetensors",
373
+ "transformer.blocks.26.ffn.experts.mlp.w1_weight_scale": "model-00020-of-00031.safetensors",
374
+ "transformer.blocks.26.ffn.experts.mlp.w2_input_scale": "model-00021-of-00031.safetensors",
375
+ "transformer.blocks.26.ffn.experts.mlp.w2_weight": "model-00021-of-00031.safetensors",
376
+ "transformer.blocks.26.ffn.experts.mlp.w2_weight_scale": "model-00021-of-00031.safetensors",
377
  "transformer.blocks.26.ffn.router.layer.weight": "model-00020-of-00031.safetensors",
378
  "transformer.blocks.26.norm_attn_norm.attn.Wqkv.input_scale": "model-00020-of-00031.safetensors",
379
  "transformer.blocks.26.norm_attn_norm.attn.Wqkv.weight": "model-00020-of-00031.safetensors",
 
384
  "transformer.blocks.26.norm_attn_norm.attn.out_proj.weight_scale": "model-00020-of-00031.safetensors",
385
  "transformer.blocks.26.norm_attn_norm.norm_1.weight": "model-00020-of-00031.safetensors",
386
  "transformer.blocks.26.norm_attn_norm.norm_2.weight": "model-00020-of-00031.safetensors",
387
+ "transformer.blocks.27.ffn.experts.mlp.v1_input_scale": "model-00021-of-00031.safetensors",
388
+ "transformer.blocks.27.ffn.experts.mlp.v1_weight": "model-00021-of-00031.safetensors",
389
+ "transformer.blocks.27.ffn.experts.mlp.v1_weight_scale": "model-00021-of-00031.safetensors",
390
+ "transformer.blocks.27.ffn.experts.mlp.w1_input_scale": "model-00021-of-00031.safetensors",
391
+ "transformer.blocks.27.ffn.experts.mlp.w1_weight": "model-00021-of-00031.safetensors",
392
+ "transformer.blocks.27.ffn.experts.mlp.w1_weight_scale": "model-00021-of-00031.safetensors",
393
+ "transformer.blocks.27.ffn.experts.mlp.w2_input_scale": "model-00022-of-00031.safetensors",
394
+ "transformer.blocks.27.ffn.experts.mlp.w2_weight": "model-00022-of-00031.safetensors",
395
+ "transformer.blocks.27.ffn.experts.mlp.w2_weight_scale": "model-00022-of-00031.safetensors",
396
  "transformer.blocks.27.ffn.router.layer.weight": "model-00021-of-00031.safetensors",
397
  "transformer.blocks.27.norm_attn_norm.attn.Wqkv.input_scale": "model-00021-of-00031.safetensors",
398
  "transformer.blocks.27.norm_attn_norm.attn.Wqkv.weight": "model-00021-of-00031.safetensors",
 
403
  "transformer.blocks.27.norm_attn_norm.attn.out_proj.weight_scale": "model-00021-of-00031.safetensors",
404
  "transformer.blocks.27.norm_attn_norm.norm_1.weight": "model-00021-of-00031.safetensors",
405
  "transformer.blocks.27.norm_attn_norm.norm_2.weight": "model-00021-of-00031.safetensors",
406
+ "transformer.blocks.28.ffn.experts.mlp.v1_input_scale": "model-00022-of-00031.safetensors",
407
+ "transformer.blocks.28.ffn.experts.mlp.v1_weight": "model-00022-of-00031.safetensors",
408
+ "transformer.blocks.28.ffn.experts.mlp.v1_weight_scale": "model-00022-of-00031.safetensors",
409
+ "transformer.blocks.28.ffn.experts.mlp.w1_input_scale": "model-00022-of-00031.safetensors",
410
+ "transformer.blocks.28.ffn.experts.mlp.w1_weight": "model-00022-of-00031.safetensors",
411
+ "transformer.blocks.28.ffn.experts.mlp.w1_weight_scale": "model-00022-of-00031.safetensors",
412
+ "transformer.blocks.28.ffn.experts.mlp.w2_input_scale": "model-00022-of-00031.safetensors",
413
+ "transformer.blocks.28.ffn.experts.mlp.w2_weight": "model-00022-of-00031.safetensors",
414
+ "transformer.blocks.28.ffn.experts.mlp.w2_weight_scale": "model-00022-of-00031.safetensors",
415
  "transformer.blocks.28.ffn.router.layer.weight": "model-00022-of-00031.safetensors",
416
  "transformer.blocks.28.norm_attn_norm.attn.Wqkv.input_scale": "model-00022-of-00031.safetensors",
417
  "transformer.blocks.28.norm_attn_norm.attn.Wqkv.weight": "model-00022-of-00031.safetensors",
 
422
  "transformer.blocks.28.norm_attn_norm.attn.out_proj.weight_scale": "model-00022-of-00031.safetensors",
423
  "transformer.blocks.28.norm_attn_norm.norm_1.weight": "model-00022-of-00031.safetensors",
424
  "transformer.blocks.28.norm_attn_norm.norm_2.weight": "model-00022-of-00031.safetensors",
425
+ "transformer.blocks.29.ffn.experts.mlp.v1_input_scale": "model-00023-of-00031.safetensors",
426
+ "transformer.blocks.29.ffn.experts.mlp.v1_weight": "model-00023-of-00031.safetensors",
427
+ "transformer.blocks.29.ffn.experts.mlp.v1_weight_scale": "model-00023-of-00031.safetensors",
428
+ "transformer.blocks.29.ffn.experts.mlp.w1_input_scale": "model-00023-of-00031.safetensors",
429
+ "transformer.blocks.29.ffn.experts.mlp.w1_weight": "model-00023-of-00031.safetensors",
430
+ "transformer.blocks.29.ffn.experts.mlp.w1_weight_scale": "model-00023-of-00031.safetensors",
431
+ "transformer.blocks.29.ffn.experts.mlp.w2_input_scale": "model-00023-of-00031.safetensors",
432
+ "transformer.blocks.29.ffn.experts.mlp.w2_weight": "model-00023-of-00031.safetensors",
433
+ "transformer.blocks.29.ffn.experts.mlp.w2_weight_scale": "model-00023-of-00031.safetensors",
434
  "transformer.blocks.29.ffn.router.layer.weight": "model-00022-of-00031.safetensors",
435
  "transformer.blocks.29.norm_attn_norm.attn.Wqkv.input_scale": "model-00022-of-00031.safetensors",
436
  "transformer.blocks.29.norm_attn_norm.attn.Wqkv.weight": "model-00022-of-00031.safetensors",
 
441
  "transformer.blocks.29.norm_attn_norm.attn.out_proj.weight_scale": "model-00022-of-00031.safetensors",
442
  "transformer.blocks.29.norm_attn_norm.norm_1.weight": "model-00022-of-00031.safetensors",
443
  "transformer.blocks.29.norm_attn_norm.norm_2.weight": "model-00022-of-00031.safetensors",
444
+ "transformer.blocks.3.ffn.experts.mlp.v1_input_scale": "model-00003-of-00031.safetensors",
445
+ "transformer.blocks.3.ffn.experts.mlp.v1_weight": "model-00003-of-00031.safetensors",
446
+ "transformer.blocks.3.ffn.experts.mlp.v1_weight_scale": "model-00003-of-00031.safetensors",
447
+ "transformer.blocks.3.ffn.experts.mlp.w1_input_scale": "model-00003-of-00031.safetensors",
448
+ "transformer.blocks.3.ffn.experts.mlp.w1_weight": "model-00003-of-00031.safetensors",
449
+ "transformer.blocks.3.ffn.experts.mlp.w1_weight_scale": "model-00003-of-00031.safetensors",
450
+ "transformer.blocks.3.ffn.experts.mlp.w2_input_scale": "model-00004-of-00031.safetensors",
451
+ "transformer.blocks.3.ffn.experts.mlp.w2_weight": "model-00004-of-00031.safetensors",
452
+ "transformer.blocks.3.ffn.experts.mlp.w2_weight_scale": "model-00004-of-00031.safetensors",
453
  "transformer.blocks.3.ffn.router.layer.weight": "model-00003-of-00031.safetensors",
454
  "transformer.blocks.3.norm_attn_norm.attn.Wqkv.input_scale": "model-00003-of-00031.safetensors",
455
  "transformer.blocks.3.norm_attn_norm.attn.Wqkv.weight": "model-00003-of-00031.safetensors",
 
460
  "transformer.blocks.3.norm_attn_norm.attn.out_proj.weight_scale": "model-00003-of-00031.safetensors",
461
  "transformer.blocks.3.norm_attn_norm.norm_1.weight": "model-00003-of-00031.safetensors",
462
  "transformer.blocks.3.norm_attn_norm.norm_2.weight": "model-00003-of-00031.safetensors",
463
+ "transformer.blocks.30.ffn.experts.mlp.v1_input_scale": "model-00024-of-00031.safetensors",
464
+ "transformer.blocks.30.ffn.experts.mlp.v1_weight": "model-00024-of-00031.safetensors",
465
+ "transformer.blocks.30.ffn.experts.mlp.v1_weight_scale": "model-00024-of-00031.safetensors",
466
+ "transformer.blocks.30.ffn.experts.mlp.w1_input_scale": "model-00023-of-00031.safetensors",
467
+ "transformer.blocks.30.ffn.experts.mlp.w1_weight": "model-00023-of-00031.safetensors",
468
+ "transformer.blocks.30.ffn.experts.mlp.w1_weight_scale": "model-00023-of-00031.safetensors",
469
+ "transformer.blocks.30.ffn.experts.mlp.w2_input_scale": "model-00024-of-00031.safetensors",
470
+ "transformer.blocks.30.ffn.experts.mlp.w2_weight": "model-00024-of-00031.safetensors",
471
+ "transformer.blocks.30.ffn.experts.mlp.w2_weight_scale": "model-00024-of-00031.safetensors",
472
  "transformer.blocks.30.ffn.router.layer.weight": "model-00023-of-00031.safetensors",
473
  "transformer.blocks.30.norm_attn_norm.attn.Wqkv.input_scale": "model-00023-of-00031.safetensors",
474
  "transformer.blocks.30.norm_attn_norm.attn.Wqkv.weight": "model-00023-of-00031.safetensors",
 
479
  "transformer.blocks.30.norm_attn_norm.attn.out_proj.weight_scale": "model-00023-of-00031.safetensors",
480
  "transformer.blocks.30.norm_attn_norm.norm_1.weight": "model-00023-of-00031.safetensors",
481
  "transformer.blocks.30.norm_attn_norm.norm_2.weight": "model-00023-of-00031.safetensors",
482
+ "transformer.blocks.31.ffn.experts.mlp.v1_input_scale": "model-00024-of-00031.safetensors",
483
+ "transformer.blocks.31.ffn.experts.mlp.v1_weight": "model-00024-of-00031.safetensors",
484
+ "transformer.blocks.31.ffn.experts.mlp.v1_weight_scale": "model-00024-of-00031.safetensors",
485
+ "transformer.blocks.31.ffn.experts.mlp.w1_input_scale": "model-00024-of-00031.safetensors",
486
+ "transformer.blocks.31.ffn.experts.mlp.w1_weight": "model-00024-of-00031.safetensors",
487
+ "transformer.blocks.31.ffn.experts.mlp.w1_weight_scale": "model-00024-of-00031.safetensors",
488
+ "transformer.blocks.31.ffn.experts.mlp.w2_input_scale": "model-00025-of-00031.safetensors",
489
+ "transformer.blocks.31.ffn.experts.mlp.w2_weight": "model-00025-of-00031.safetensors",
490
+ "transformer.blocks.31.ffn.experts.mlp.w2_weight_scale": "model-00025-of-00031.safetensors",
491
  "transformer.blocks.31.ffn.router.layer.weight": "model-00024-of-00031.safetensors",
492
  "transformer.blocks.31.norm_attn_norm.attn.Wqkv.input_scale": "model-00024-of-00031.safetensors",
493
  "transformer.blocks.31.norm_attn_norm.attn.Wqkv.weight": "model-00024-of-00031.safetensors",
 
498
  "transformer.blocks.31.norm_attn_norm.attn.out_proj.weight_scale": "model-00024-of-00031.safetensors",
499
  "transformer.blocks.31.norm_attn_norm.norm_1.weight": "model-00024-of-00031.safetensors",
500
  "transformer.blocks.31.norm_attn_norm.norm_2.weight": "model-00024-of-00031.safetensors",
501
+ "transformer.blocks.32.ffn.experts.mlp.v1_input_scale": "model-00025-of-00031.safetensors",
502
+ "transformer.blocks.32.ffn.experts.mlp.v1_weight": "model-00025-of-00031.safetensors",
503
+ "transformer.blocks.32.ffn.experts.mlp.v1_weight_scale": "model-00025-of-00031.safetensors",
504
+ "transformer.blocks.32.ffn.experts.mlp.w1_input_scale": "model-00025-of-00031.safetensors",
505
+ "transformer.blocks.32.ffn.experts.mlp.w1_weight": "model-00025-of-00031.safetensors",
506
+ "transformer.blocks.32.ffn.experts.mlp.w1_weight_scale": "model-00025-of-00031.safetensors",
507
+ "transformer.blocks.32.ffn.experts.mlp.w2_input_scale": "model-00025-of-00031.safetensors",
508
+ "transformer.blocks.32.ffn.experts.mlp.w2_weight": "model-00025-of-00031.safetensors",
509
+ "transformer.blocks.32.ffn.experts.mlp.w2_weight_scale": "model-00025-of-00031.safetensors",
510
  "transformer.blocks.32.ffn.router.layer.weight": "model-00025-of-00031.safetensors",
511
  "transformer.blocks.32.norm_attn_norm.attn.Wqkv.input_scale": "model-00025-of-00031.safetensors",
512
  "transformer.blocks.32.norm_attn_norm.attn.Wqkv.weight": "model-00025-of-00031.safetensors",
 
517
  "transformer.blocks.32.norm_attn_norm.attn.out_proj.weight_scale": "model-00025-of-00031.safetensors",
518
  "transformer.blocks.32.norm_attn_norm.norm_1.weight": "model-00025-of-00031.safetensors",
519
  "transformer.blocks.32.norm_attn_norm.norm_2.weight": "model-00025-of-00031.safetensors",
520
+ "transformer.blocks.33.ffn.experts.mlp.v1_input_scale": "model-00026-of-00031.safetensors",
521
+ "transformer.blocks.33.ffn.experts.mlp.v1_weight": "model-00026-of-00031.safetensors",
522
+ "transformer.blocks.33.ffn.experts.mlp.v1_weight_scale": "model-00026-of-00031.safetensors",
523
+ "transformer.blocks.33.ffn.experts.mlp.w1_input_scale": "model-00026-of-00031.safetensors",
524
+ "transformer.blocks.33.ffn.experts.mlp.w1_weight": "model-00026-of-00031.safetensors",
525
+ "transformer.blocks.33.ffn.experts.mlp.w1_weight_scale": "model-00026-of-00031.safetensors",
526
+ "transformer.blocks.33.ffn.experts.mlp.w2_input_scale": "model-00026-of-00031.safetensors",
527
+ "transformer.blocks.33.ffn.experts.mlp.w2_weight": "model-00026-of-00031.safetensors",
528
+ "transformer.blocks.33.ffn.experts.mlp.w2_weight_scale": "model-00026-of-00031.safetensors",
529
  "transformer.blocks.33.ffn.router.layer.weight": "model-00025-of-00031.safetensors",
530
  "transformer.blocks.33.norm_attn_norm.attn.Wqkv.input_scale": "model-00025-of-00031.safetensors",
531
  "transformer.blocks.33.norm_attn_norm.attn.Wqkv.weight": "model-00025-of-00031.safetensors",
 
536
  "transformer.blocks.33.norm_attn_norm.attn.out_proj.weight_scale": "model-00025-of-00031.safetensors",
537
  "transformer.blocks.33.norm_attn_norm.norm_1.weight": "model-00025-of-00031.safetensors",
538
  "transformer.blocks.33.norm_attn_norm.norm_2.weight": "model-00025-of-00031.safetensors",
539
+ "transformer.blocks.34.ffn.experts.mlp.v1_input_scale": "model-00027-of-00031.safetensors",
540
+ "transformer.blocks.34.ffn.experts.mlp.v1_weight": "model-00027-of-00031.safetensors",
541
+ "transformer.blocks.34.ffn.experts.mlp.v1_weight_scale": "model-00027-of-00031.safetensors",
542
+ "transformer.blocks.34.ffn.experts.mlp.w1_input_scale": "model-00026-of-00031.safetensors",
543
+ "transformer.blocks.34.ffn.experts.mlp.w1_weight": "model-00026-of-00031.safetensors",
544
+ "transformer.blocks.34.ffn.experts.mlp.w1_weight_scale": "model-00026-of-00031.safetensors",
545
+ "transformer.blocks.34.ffn.experts.mlp.w2_input_scale": "model-00027-of-00031.safetensors",
546
+ "transformer.blocks.34.ffn.experts.mlp.w2_weight": "model-00027-of-00031.safetensors",
547
+ "transformer.blocks.34.ffn.experts.mlp.w2_weight_scale": "model-00027-of-00031.safetensors",
548
  "transformer.blocks.34.ffn.router.layer.weight": "model-00026-of-00031.safetensors",
549
  "transformer.blocks.34.norm_attn_norm.attn.Wqkv.input_scale": "model-00026-of-00031.safetensors",
550
  "transformer.blocks.34.norm_attn_norm.attn.Wqkv.weight": "model-00026-of-00031.safetensors",
 
555
  "transformer.blocks.34.norm_attn_norm.attn.out_proj.weight_scale": "model-00026-of-00031.safetensors",
556
  "transformer.blocks.34.norm_attn_norm.norm_1.weight": "model-00026-of-00031.safetensors",
557
  "transformer.blocks.34.norm_attn_norm.norm_2.weight": "model-00026-of-00031.safetensors",
558
+ "transformer.blocks.35.ffn.experts.mlp.v1_input_scale": "model-00027-of-00031.safetensors",
559
+ "transformer.blocks.35.ffn.experts.mlp.v1_weight": "model-00027-of-00031.safetensors",
560
+ "transformer.blocks.35.ffn.experts.mlp.v1_weight_scale": "model-00027-of-00031.safetensors",
561
+ "transformer.blocks.35.ffn.experts.mlp.w1_input_scale": "model-00027-of-00031.safetensors",
562
+ "transformer.blocks.35.ffn.experts.mlp.w1_weight": "model-00027-of-00031.safetensors",
563
+ "transformer.blocks.35.ffn.experts.mlp.w1_weight_scale": "model-00027-of-00031.safetensors",
564
+ "transformer.blocks.35.ffn.experts.mlp.w2_input_scale": "model-00028-of-00031.safetensors",
565
+ "transformer.blocks.35.ffn.experts.mlp.w2_weight": "model-00028-of-00031.safetensors",
566
+ "transformer.blocks.35.ffn.experts.mlp.w2_weight_scale": "model-00028-of-00031.safetensors",
567
  "transformer.blocks.35.ffn.router.layer.weight": "model-00027-of-00031.safetensors",
568
  "transformer.blocks.35.norm_attn_norm.attn.Wqkv.input_scale": "model-00027-of-00031.safetensors",
569
  "transformer.blocks.35.norm_attn_norm.attn.Wqkv.weight": "model-00027-of-00031.safetensors",
 
574
  "transformer.blocks.35.norm_attn_norm.attn.out_proj.weight_scale": "model-00027-of-00031.safetensors",
575
  "transformer.blocks.35.norm_attn_norm.norm_1.weight": "model-00027-of-00031.safetensors",
576
  "transformer.blocks.35.norm_attn_norm.norm_2.weight": "model-00027-of-00031.safetensors",
577
+ "transformer.blocks.36.ffn.experts.mlp.v1_input_scale": "model-00028-of-00031.safetensors",
578
+ "transformer.blocks.36.ffn.experts.mlp.v1_weight": "model-00028-of-00031.safetensors",
579
+ "transformer.blocks.36.ffn.experts.mlp.v1_weight_scale": "model-00028-of-00031.safetensors",
580
+ "transformer.blocks.36.ffn.experts.mlp.w1_input_scale": "model-00028-of-00031.safetensors",
581
+ "transformer.blocks.36.ffn.experts.mlp.w1_weight": "model-00028-of-00031.safetensors",
582
+ "transformer.blocks.36.ffn.experts.mlp.w1_weight_scale": "model-00028-of-00031.safetensors",
583
+ "transformer.blocks.36.ffn.experts.mlp.w2_input_scale": "model-00028-of-00031.safetensors",
584
+ "transformer.blocks.36.ffn.experts.mlp.w2_weight": "model-00028-of-00031.safetensors",
585
+ "transformer.blocks.36.ffn.experts.mlp.w2_weight_scale": "model-00028-of-00031.safetensors",
586
  "transformer.blocks.36.ffn.router.layer.weight": "model-00028-of-00031.safetensors",
587
  "transformer.blocks.36.norm_attn_norm.attn.Wqkv.input_scale": "model-00028-of-00031.safetensors",
588
  "transformer.blocks.36.norm_attn_norm.attn.Wqkv.weight": "model-00028-of-00031.safetensors",
 
593
  "transformer.blocks.36.norm_attn_norm.attn.out_proj.weight_scale": "model-00028-of-00031.safetensors",
594
  "transformer.blocks.36.norm_attn_norm.norm_1.weight": "model-00028-of-00031.safetensors",
595
  "transformer.blocks.36.norm_attn_norm.norm_2.weight": "model-00028-of-00031.safetensors",
596
+ "transformer.blocks.37.ffn.experts.mlp.v1_input_scale": "model-00029-of-00031.safetensors",
597
+ "transformer.blocks.37.ffn.experts.mlp.v1_weight": "model-00029-of-00031.safetensors",
598
+ "transformer.blocks.37.ffn.experts.mlp.v1_weight_scale": "model-00029-of-00031.safetensors",
599
+ "transformer.blocks.37.ffn.experts.mlp.w1_input_scale": "model-00029-of-00031.safetensors",
600
+ "transformer.blocks.37.ffn.experts.mlp.w1_weight": "model-00029-of-00031.safetensors",
601
+ "transformer.blocks.37.ffn.experts.mlp.w1_weight_scale": "model-00029-of-00031.safetensors",
602
+ "transformer.blocks.37.ffn.experts.mlp.w2_input_scale": "model-00029-of-00031.safetensors",
603
+ "transformer.blocks.37.ffn.experts.mlp.w2_weight": "model-00029-of-00031.safetensors",
604
+ "transformer.blocks.37.ffn.experts.mlp.w2_weight_scale": "model-00029-of-00031.safetensors",
605
  "transformer.blocks.37.ffn.router.layer.weight": "model-00028-of-00031.safetensors",
606
  "transformer.blocks.37.norm_attn_norm.attn.Wqkv.input_scale": "model-00028-of-00031.safetensors",
607
  "transformer.blocks.37.norm_attn_norm.attn.Wqkv.weight": "model-00028-of-00031.safetensors",
 
612
  "transformer.blocks.37.norm_attn_norm.attn.out_proj.weight_scale": "model-00028-of-00031.safetensors",
613
  "transformer.blocks.37.norm_attn_norm.norm_1.weight": "model-00028-of-00031.safetensors",
614
  "transformer.blocks.37.norm_attn_norm.norm_2.weight": "model-00028-of-00031.safetensors",
615
+ "transformer.blocks.38.ffn.experts.mlp.v1_input_scale": "model-00030-of-00031.safetensors",
616
+ "transformer.blocks.38.ffn.experts.mlp.v1_weight": "model-00030-of-00031.safetensors",
617
+ "transformer.blocks.38.ffn.experts.mlp.v1_weight_scale": "model-00030-of-00031.safetensors",
618
+ "transformer.blocks.38.ffn.experts.mlp.w1_input_scale": "model-00029-of-00031.safetensors",
619
+ "transformer.blocks.38.ffn.experts.mlp.w1_weight": "model-00029-of-00031.safetensors",
620
+ "transformer.blocks.38.ffn.experts.mlp.w1_weight_scale": "model-00029-of-00031.safetensors",
621
+ "transformer.blocks.38.ffn.experts.mlp.w2_input_scale": "model-00030-of-00031.safetensors",
622
+ "transformer.blocks.38.ffn.experts.mlp.w2_weight": "model-00030-of-00031.safetensors",
623
+ "transformer.blocks.38.ffn.experts.mlp.w2_weight_scale": "model-00030-of-00031.safetensors",
624
  "transformer.blocks.38.ffn.router.layer.weight": "model-00029-of-00031.safetensors",
625
  "transformer.blocks.38.norm_attn_norm.attn.Wqkv.input_scale": "model-00029-of-00031.safetensors",
626
  "transformer.blocks.38.norm_attn_norm.attn.Wqkv.weight": "model-00029-of-00031.safetensors",
 
631
  "transformer.blocks.38.norm_attn_norm.attn.out_proj.weight_scale": "model-00029-of-00031.safetensors",
632
  "transformer.blocks.38.norm_attn_norm.norm_1.weight": "model-00029-of-00031.safetensors",
633
  "transformer.blocks.38.norm_attn_norm.norm_2.weight": "model-00029-of-00031.safetensors",
634
+ "transformer.blocks.39.ffn.experts.mlp.v1_input_scale": "model-00030-of-00031.safetensors",
635
+ "transformer.blocks.39.ffn.experts.mlp.v1_weight": "model-00030-of-00031.safetensors",
636
+ "transformer.blocks.39.ffn.experts.mlp.v1_weight_scale": "model-00030-of-00031.safetensors",
637
+ "transformer.blocks.39.ffn.experts.mlp.w1_input_scale": "model-00030-of-00031.safetensors",
638
+ "transformer.blocks.39.ffn.experts.mlp.w1_weight": "model-00030-of-00031.safetensors",
639
+ "transformer.blocks.39.ffn.experts.mlp.w1_weight_scale": "model-00030-of-00031.safetensors",
640
+ "transformer.blocks.39.ffn.experts.mlp.w2_input_scale": "model-00031-of-00031.safetensors",
641
+ "transformer.blocks.39.ffn.experts.mlp.w2_weight": "model-00031-of-00031.safetensors",
642
+ "transformer.blocks.39.ffn.experts.mlp.w2_weight_scale": "model-00031-of-00031.safetensors",
643
  "transformer.blocks.39.ffn.router.layer.weight": "model-00030-of-00031.safetensors",
644
  "transformer.blocks.39.norm_attn_norm.attn.Wqkv.input_scale": "model-00030-of-00031.safetensors",
645
  "transformer.blocks.39.norm_attn_norm.attn.Wqkv.weight": "model-00030-of-00031.safetensors",
 
650
  "transformer.blocks.39.norm_attn_norm.attn.out_proj.weight_scale": "model-00030-of-00031.safetensors",
651
  "transformer.blocks.39.norm_attn_norm.norm_1.weight": "model-00030-of-00031.safetensors",
652
  "transformer.blocks.39.norm_attn_norm.norm_2.weight": "model-00030-of-00031.safetensors",
653
+ "transformer.blocks.4.ffn.experts.mlp.v1_input_scale": "model-00004-of-00031.safetensors",
654
+ "transformer.blocks.4.ffn.experts.mlp.v1_weight": "model-00004-of-00031.safetensors",
655
+ "transformer.blocks.4.ffn.experts.mlp.v1_weight_scale": "model-00004-of-00031.safetensors",
656
+ "transformer.blocks.4.ffn.experts.mlp.w1_input_scale": "model-00004-of-00031.safetensors",
657
+ "transformer.blocks.4.ffn.experts.mlp.w1_weight": "model-00004-of-00031.safetensors",
658
+ "transformer.blocks.4.ffn.experts.mlp.w1_weight_scale": "model-00004-of-00031.safetensors",
659
+ "transformer.blocks.4.ffn.experts.mlp.w2_input_scale": "model-00004-of-00031.safetensors",
660
+ "transformer.blocks.4.ffn.experts.mlp.w2_weight": "model-00004-of-00031.safetensors",
661
+ "transformer.blocks.4.ffn.experts.mlp.w2_weight_scale": "model-00004-of-00031.safetensors",
662
  "transformer.blocks.4.ffn.router.layer.weight": "model-00004-of-00031.safetensors",
663
  "transformer.blocks.4.norm_attn_norm.attn.Wqkv.input_scale": "model-00004-of-00031.safetensors",
664
  "transformer.blocks.4.norm_attn_norm.attn.Wqkv.weight": "model-00004-of-00031.safetensors",
 
669
  "transformer.blocks.4.norm_attn_norm.attn.out_proj.weight_scale": "model-00004-of-00031.safetensors",
670
  "transformer.blocks.4.norm_attn_norm.norm_1.weight": "model-00004-of-00031.safetensors",
671
  "transformer.blocks.4.norm_attn_norm.norm_2.weight": "model-00004-of-00031.safetensors",
672
+ "transformer.blocks.5.ffn.experts.mlp.v1_input_scale": "model-00005-of-00031.safetensors",
673
+ "transformer.blocks.5.ffn.experts.mlp.v1_weight": "model-00005-of-00031.safetensors",
674
+ "transformer.blocks.5.ffn.experts.mlp.v1_weight_scale": "model-00005-of-00031.safetensors",
675
+ "transformer.blocks.5.ffn.experts.mlp.w1_input_scale": "model-00005-of-00031.safetensors",
676
+ "transformer.blocks.5.ffn.experts.mlp.w1_weight": "model-00005-of-00031.safetensors",
677
+ "transformer.blocks.5.ffn.experts.mlp.w1_weight_scale": "model-00005-of-00031.safetensors",
678
+ "transformer.blocks.5.ffn.experts.mlp.w2_input_scale": "model-00005-of-00031.safetensors",
679
+ "transformer.blocks.5.ffn.experts.mlp.w2_weight": "model-00005-of-00031.safetensors",
680
+ "transformer.blocks.5.ffn.experts.mlp.w2_weight_scale": "model-00005-of-00031.safetensors",
681
  "transformer.blocks.5.ffn.router.layer.weight": "model-00004-of-00031.safetensors",
682
  "transformer.blocks.5.norm_attn_norm.attn.Wqkv.input_scale": "model-00004-of-00031.safetensors",
683
  "transformer.blocks.5.norm_attn_norm.attn.Wqkv.weight": "model-00004-of-00031.safetensors",
 
688
  "transformer.blocks.5.norm_attn_norm.attn.out_proj.weight_scale": "model-00004-of-00031.safetensors",
689
  "transformer.blocks.5.norm_attn_norm.norm_1.weight": "model-00004-of-00031.safetensors",
690
  "transformer.blocks.5.norm_attn_norm.norm_2.weight": "model-00004-of-00031.safetensors",
691
+ "transformer.blocks.6.ffn.experts.mlp.v1_input_scale": "model-00006-of-00031.safetensors",
692
+ "transformer.blocks.6.ffn.experts.mlp.v1_weight": "model-00006-of-00031.safetensors",
693
+ "transformer.blocks.6.ffn.experts.mlp.v1_weight_scale": "model-00006-of-00031.safetensors",
694
+ "transformer.blocks.6.ffn.experts.mlp.w1_input_scale": "model-00005-of-00031.safetensors",
695
+ "transformer.blocks.6.ffn.experts.mlp.w1_weight": "model-00005-of-00031.safetensors",
696
+ "transformer.blocks.6.ffn.experts.mlp.w1_weight_scale": "model-00005-of-00031.safetensors",
697
+ "transformer.blocks.6.ffn.experts.mlp.w2_input_scale": "model-00006-of-00031.safetensors",
698
+ "transformer.blocks.6.ffn.experts.mlp.w2_weight": "model-00006-of-00031.safetensors",
699
+ "transformer.blocks.6.ffn.experts.mlp.w2_weight_scale": "model-00006-of-00031.safetensors",
700
  "transformer.blocks.6.ffn.router.layer.weight": "model-00005-of-00031.safetensors",
701
  "transformer.blocks.6.norm_attn_norm.attn.Wqkv.input_scale": "model-00005-of-00031.safetensors",
702
  "transformer.blocks.6.norm_attn_norm.attn.Wqkv.weight": "model-00005-of-00031.safetensors",
 
707
  "transformer.blocks.6.norm_attn_norm.attn.out_proj.weight_scale": "model-00005-of-00031.safetensors",
708
  "transformer.blocks.6.norm_attn_norm.norm_1.weight": "model-00005-of-00031.safetensors",
709
  "transformer.blocks.6.norm_attn_norm.norm_2.weight": "model-00005-of-00031.safetensors",
710
+ "transformer.blocks.7.ffn.experts.mlp.v1_input_scale": "model-00006-of-00031.safetensors",
711
+ "transformer.blocks.7.ffn.experts.mlp.v1_weight": "model-00006-of-00031.safetensors",
712
+ "transformer.blocks.7.ffn.experts.mlp.v1_weight_scale": "model-00006-of-00031.safetensors",
713
+ "transformer.blocks.7.ffn.experts.mlp.w1_input_scale": "model-00006-of-00031.safetensors",
714
+ "transformer.blocks.7.ffn.experts.mlp.w1_weight": "model-00006-of-00031.safetensors",
715
+ "transformer.blocks.7.ffn.experts.mlp.w1_weight_scale": "model-00006-of-00031.safetensors",
716
+ "transformer.blocks.7.ffn.experts.mlp.w2_input_scale": "model-00007-of-00031.safetensors",
717
+ "transformer.blocks.7.ffn.experts.mlp.w2_weight": "model-00007-of-00031.safetensors",
718
+ "transformer.blocks.7.ffn.experts.mlp.w2_weight_scale": "model-00007-of-00031.safetensors",
719
  "transformer.blocks.7.ffn.router.layer.weight": "model-00006-of-00031.safetensors",
720
  "transformer.blocks.7.norm_attn_norm.attn.Wqkv.input_scale": "model-00006-of-00031.safetensors",
721
  "transformer.blocks.7.norm_attn_norm.attn.Wqkv.weight": "model-00006-of-00031.safetensors",
 
726
  "transformer.blocks.7.norm_attn_norm.attn.out_proj.weight_scale": "model-00006-of-00031.safetensors",
727
  "transformer.blocks.7.norm_attn_norm.norm_1.weight": "model-00006-of-00031.safetensors",
728
  "transformer.blocks.7.norm_attn_norm.norm_2.weight": "model-00006-of-00031.safetensors",
729
+ "transformer.blocks.8.ffn.experts.mlp.v1_input_scale": "model-00007-of-00031.safetensors",
730
+ "transformer.blocks.8.ffn.experts.mlp.v1_weight": "model-00007-of-00031.safetensors",
731
+ "transformer.blocks.8.ffn.experts.mlp.v1_weight_scale": "model-00007-of-00031.safetensors",
732
+ "transformer.blocks.8.ffn.experts.mlp.w1_input_scale": "model-00007-of-00031.safetensors",
733
+ "transformer.blocks.8.ffn.experts.mlp.w1_weight": "model-00007-of-00031.safetensors",
734
+ "transformer.blocks.8.ffn.experts.mlp.w1_weight_scale": "model-00007-of-00031.safetensors",
735
+ "transformer.blocks.8.ffn.experts.mlp.w2_input_scale": "model-00007-of-00031.safetensors",
736
+ "transformer.blocks.8.ffn.experts.mlp.w2_weight": "model-00007-of-00031.safetensors",
737
+ "transformer.blocks.8.ffn.experts.mlp.w2_weight_scale": "model-00007-of-00031.safetensors",
738
  "transformer.blocks.8.ffn.router.layer.weight": "model-00007-of-00031.safetensors",
739
  "transformer.blocks.8.norm_attn_norm.attn.Wqkv.input_scale": "model-00007-of-00031.safetensors",
740
  "transformer.blocks.8.norm_attn_norm.attn.Wqkv.weight": "model-00007-of-00031.safetensors",
 
745
  "transformer.blocks.8.norm_attn_norm.attn.out_proj.weight_scale": "model-00007-of-00031.safetensors",
746
  "transformer.blocks.8.norm_attn_norm.norm_1.weight": "model-00007-of-00031.safetensors",
747
  "transformer.blocks.8.norm_attn_norm.norm_2.weight": "model-00007-of-00031.safetensors",
748
+ "transformer.blocks.9.ffn.experts.mlp.v1_input_scale": "model-00008-of-00031.safetensors",
749
+ "transformer.blocks.9.ffn.experts.mlp.v1_weight": "model-00008-of-00031.safetensors",
750
+ "transformer.blocks.9.ffn.experts.mlp.v1_weight_scale": "model-00008-of-00031.safetensors",
751
+ "transformer.blocks.9.ffn.experts.mlp.w1_input_scale": "model-00008-of-00031.safetensors",
752
+ "transformer.blocks.9.ffn.experts.mlp.w1_weight": "model-00008-of-00031.safetensors",
753
+ "transformer.blocks.9.ffn.experts.mlp.w1_weight_scale": "model-00008-of-00031.safetensors",
754
+ "transformer.blocks.9.ffn.experts.mlp.w2_input_scale": "model-00008-of-00031.safetensors",
755
+ "transformer.blocks.9.ffn.experts.mlp.w2_weight": "model-00008-of-00031.safetensors",
756
+ "transformer.blocks.9.ffn.experts.mlp.w2_weight_scale": "model-00008-of-00031.safetensors",
757
  "transformer.blocks.9.ffn.router.layer.weight": "model-00007-of-00031.safetensors",
758
  "transformer.blocks.9.norm_attn_norm.attn.Wqkv.input_scale": "model-00007-of-00031.safetensors",
759
  "transformer.blocks.9.norm_attn_norm.attn.Wqkv.weight": "model-00007-of-00031.safetensors",