Qwen2.5-14B-Instruct_q4f16_1-MLC / ndarray-cache.json
smalinin's picture
Add model files
e661c60
{
"metadata": {
"ParamSize": 533,
"ParamBytes": 8309352448.0,
"BitsPerParam": 4.50065457508222
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "3d7e52df74fccf9e437be071e071141e"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "5c08d18c8fc389d879adf3fda35199f5"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "702b06193862d16142acccee4d48a77a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "18d4f787cff5676a78c1085739339bfa"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "ab2e38a70c77cf70496930a9ba57e6ec"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bfb932e969a6705906d9a38593bb3cdf"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d602cb3d0124e8480934e4f252ebf774"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "2f6489725d57a0d6e8d72549de378f5b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33140736,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4423680
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4433920
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 4444160
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 8867840
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17715200
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 17725440
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 17739776
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20033536
}
],
"md5sum": "8943a542c123006234164cabc69382c8"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a442e398cc128d80287c254e999d5669"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b98a743b899c68a91e5e08c9b39075d4"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33294336,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1638400
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 1648640
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 6072320
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14919680
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 14929920
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14944256
}
],
"md5sum": "c20d9e054e27eae5f79d9fdc59019403"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a649de424d6415a6e5970868512f060a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9e9e8eb8be042e273b0501071b7a518a"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0468307e918075ae09d5e0935b718478"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "a466432efca283edaedca0064568fc52"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0db0241ea46ef9ef62d04d4cd0845b2e"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "430fd3721a6b707bcbfa7e16b7770964"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "1a1a2111958a01e632f2eb6831f49589"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "0d84d6b1b17dbad48c640423952a9e3b"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33130496,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14755840
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 14766080
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14780416
}
],
"md5sum": "878c512576652d21c5b90a5d8cdf6a89"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "18cc913695c8575a258ec3dd02150afa"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6bc30106db4d205201dcce79cfdf95fb"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ec908d57846092f1f9cd1ddef77aac71"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "918becb476ea82ea27b95debf9a9dcc2"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "68cc6510ec3c10dc0cca470edd028c22"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "bb25b65159e2c195001d7479aeb49cc2"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "17771e856d35f6860ef01c3bf038ef24"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "081ec2ab7c5d52c986a0564169421d77"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "21dd2126f8d3b92f59460246fb59ccc6"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "296c90b864ccc46a6b71c3d013c12b85"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ae54b0b22826872721c9cc3e3054b42b"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19169280
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28026880
}
],
"md5sum": "72707cc4e92010ccf475d99f14daf2d8"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29515776,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 8857600
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 8871936
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 27222016
}
],
"md5sum": "ef53b2ef25140b356d482f833b29668f"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a4d6f99305507db11fd31e95f66c0502"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8e300d2f7cda96846fb624ddae407b71"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "6f5367a1f52aac649d004dbf4fe64d8e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "3ba65dc004ac68b0bbc57653a50305b8"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a18d8a3a667ba43056ae8d1ac4eb082a"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e9a84db889ac4bdddc343cfe878fe371"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c9988e909547358a556c94c363158498"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8a0fc4b7d74a2d4766d13f6cd9840eb5"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "fbc2eaa6b8c8e7c29758361daae9179b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "42558b8000aca00ccb3c7426c7a8606c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4d0bbaf5f8a51e6e70aedca6bd581d4a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "5afc45c6f081ca3fcc1d1250ae8b588f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c6e901890a205b9a5e1deeecfb2e37c5"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bbd7b5b8fe2cdefdd1205f9904b950b7"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c561b473ae73c1af86cb57c861f1cdcf"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "07381e6e3dd2c63452e690f2fdf7338b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "05e1a13c54af730f2fd0e7131f55cf8c"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "892b7a111f0b59e30878be51e3cfe5a6"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6a0894ffa5f310d71c47ca699b71fe3c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7ffd723b412d4ea5004b640e818c136f"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32475136,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19169280
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 19179520
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 23603200
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32450560
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32460800
}
],
"md5sum": "e0f6317c1e4de21d87a526533d42398c"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2c695bf6c5093bf851a0b6e1815ccf10"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "848844ec50385f2a4c3c248e1385bd75"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "270aa31d8105a9fb58b6f472fd683585"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "30c8341c5fe316211dfcbfe4f8816af9"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "085bc710afae24d6467854d9083af3ae"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "760f71ce2ec41d00bc55a0e6c75757ce"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8cdc4922beeb748eb86c8013f6c66ad5"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8f031e5174a365a4b500580e29bc5f4f"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c278d54471e98b083cee607fe8fb11bd"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5d583654b6de58277916e826426d69f7"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7bd1736d75fe651f21759671f2b3d3ff"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "b7bfee3d7114f3e5d5b5e3a1caa371d8"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f03bb6d8e83b1b14cf840bebe3c948c0"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4831744ca9e54c24c71998fd572a9460"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e37b2d49450a2ab3f3f4e790f2ee0930"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "2662508d1d3b9969378523c780602533"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6ca03dce887e2c8e080c03fec714a1f5"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e2d88890d3df9d2fdadf55091c95aa10"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4eed57e015da4f6c3c0c6a1372914038"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "e36fa7da389229d1a254e47591ba5438"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "49ff462e5f36fa406610c000fc785881"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "699d826fedbe9bbc2fe086fecfbc50f0"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "818f276776a307083cea89e151be15ee"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "083c662b52353054d18a5a555b7c0b6b"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a389d455f2fa7a8f7d92d4f69f367e9e"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "668f1bd0bbca77bc9b63d2c08fb1b5c2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "2e9b686d5d5b7ecffbb2337a219c932d"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32475136,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19169280
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 19179520
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 23603200
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32450560
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32460800
}
],
"md5sum": "7b08af282605c2245c855cd256969756"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9cd646514b794aa5d32573e765b8e983"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b20817e3bc0aa420b1ab4b373a4b13e3"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5e146b41f9e86385a7cdf2c9e7af0b8f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "4acafd3d37d2b70dd1f5f6be1dff7ee6"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1684ceb3e25e0e71973a2832134735b3"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d37826000ee63da17cec9519d58aa57d"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "87c0bb1521aed276a2da898292b4086e"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "25bc72c997cec0661780cc054c36a550"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "33bca6805c0718e2a588f2f386ee8103"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4497e04d877b2242ee29262b392ee1b6"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8809d618b64bd55a6aae5b9482488e6a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a1859811c9d99d1411095455a098b11d"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a2f9b9da43d4c041248ff1415ac7d288"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "96ed3e84ae41ff45720ca3dcd66537f0"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "acae8a35b45a4bd4bc1c0172278ecf06"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "d6bb5f1d76534aeee655d103f01f63f9"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9543e63f4a7ac74b79ed747818b383ee"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b3868c3689a9afa329fa3558aaa0d099"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "53d29f57d2a780871bd5081c4fe222ca"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ecc8b9a97d7f954a51cb697215477753"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0713abfac4aa870c94299fdaf885ab06"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "40ec1156992dff4b17e395de8d0ed3e9"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "728d81545a862a35c2026007b8fe9142"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "60e774f53df89f64e5c994d6e7ea0e12"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0b32c3e496086a09f74063ed4818e848"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "adb931b4ed92274ede26220302fa0fdd"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5369dc3e54587b43a9a364ee7c9d12fb"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "e45ecfb6e06de31cf168d14c4f1daac0"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a8ef7013fe3fd7ab106c34338eb65c75"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2501f8d862a3a2c6395f27876f181af0"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "abd3508d072db25c21928a323cd334a8"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "eeb549f5d0c26d603befee7c06d24d0a"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b8553b1d45e9923e670fa09e8189118c"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6d2d2afddda373be31150109f8c1f930"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ff03418f07bfa5ca7fbf013e82389757"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "7f7c727e45dbb13eb47b0774fa7e1aea"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "aac9bc665ab206ffc20be60859fbd1ce"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ef3d3673cbe12910b46198b410e3210d"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "60fbc7dfb414c7bf0dd4e6adc651b685"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "90b4d9cb9cc65c06393817055ef1141f"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3e401bd41b890b6926ca1a2e024ab8b1"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bcc4796123a9e24b089596a7c0d68e86"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "fe0cd5ce6bde1fa6e529e1690dd20b4d"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "944fb7de49cdad704aefb381f6d8d61e"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b5e9a8aa8fe737c6237508f61084d1be"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0a719b751e17534ebe039f0641ef86ee"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3eed2fc9f152426fff51122cbf8bd06d"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "47a07358a0f34727f77eb3efed5f7c73"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "16f0ee431df227f1017423038a4b5ead"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "689251e4a4315ff380de693dd3a54221"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c31959e850c90e62cbfa9e8ddad8e8fa"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "f3a82a71d42528c3844e5568d754355b"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 33130496,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14755840
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 14766080
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14780416
}
],
"md5sum": "afeafe416ae9c87f1bf2e76e9e3440f0"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5492749c71b5c052dd8f8e6e49b708ac"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "aec9bd86846fcf2f48a71d8df25b0b07"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b38b645910b5e1ec6788d90b60bc3ccb"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 30320640,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17039360
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21463040
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30310400
}
],
"md5sum": "130a6cc8cf8009cb58ddea47cf5ae8bc"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5e32e35f6ef00ac39af59087a9b8491f"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 31645696,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 13281280
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 13295616
}
],
"md5sum": "1a9eb888b28f64ea6fde5bf22ad8b6d2"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d6518d0fdd83b95568b068bdf2425464"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2c8d459b58e0e7c975c5070ffa336caa"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e428a9712f53210bec689572026dc0d8"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "1935457b3bf7f504e914940dc2f2e876"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "95fd69cf8c97057b9e753d57fb6af7e4"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "408e318f0a7f0ddc1c1e29e32d5eaf15"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "71e2f118471f406d2b461f73d7cd9448"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a0094ba3395f0cd4024498505c7f6ed2"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "998f31781efd5cb0ce33e9bdc5578fd5"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8d9cd4f1aa0403b8ef514e1ca553d329"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "802211bcd280bd484ecca8ffbd36d452"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "0ec9a9553f6b7e3493ed068f0ddd7853"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8c4ce6e1a6ee0c8132ae0ced35989c9f"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0302412b27290abb375998d1be352069"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7c8cb0a8cdebdbaf04ead04c141e9e41"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "69e461de52a7def231466438045e2bac"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d668437c347d9a8344ad5e584df293e2"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b46f147f03ced584afbb72dbf564b3be"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "576d477ccbf25ad4cbe3fff45bc1addc"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "6b2f31fff8c9cfd946d97f76cc2ad588"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2414cf8f4fe67d8312c96e804f9151d9"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ca40143739bc66611ce3a9e46eb22c12"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.40.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "85cc545160e6860fb4993c3d337d92d0"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ddf1d235dc91e874b58f25ca03a6ddb1"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4f8092ef1609780c684b855730c70218"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "07f6c07ae4fecbc0e288b9bb1857b699"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "bb378212d0f42d969854778b96a846ab"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 32475136,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19169280
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 19179520
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 23603200
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32450560
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32460800
}
],
"md5sum": "b9f6769cf851043ff218a4f2182e516d"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7db3487cb533b06b1da384c0b6004d43"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b6818a33197bd18f602e8c941acbb383"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7fd9fd8c3ac4bfe45fcf378a85563a9e"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.42.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "1f3305a831e8d381accb7bbb528a8f8e"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "fca52444f6b340968838f5081f3349c8"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "cc4b568b3e31128eea7d4a14e00ad87d"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "9c97f80a4b6fb564f73c1273e6e3370e"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.43.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "6858701539605a67d463ea8c35269b14"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8b0bb112d2af4e3eaaf0cc867115ad38"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "979652b96b36f6f7e78024ce4d87df94"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "a5312aed3a3eef2192d41a1ec90d5dd1"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.44.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ca25139e6affee8c2f8d61b8fa4b8d8c"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f655b8a96f6e7f7acf00483f6eba54f6"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "774982965f583f3475a3270e03a7286a"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e1a23599f22556011ae52724c29d5d64"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.45.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "10ceaa1eeb114b395c61581315b09331"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f2e413168cf1a0588d8b349100c05c3d"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "95916793e9dad54a361df034475226fb"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "18fa47bfa9b554be54d139ec767b5346"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.46.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "6c52aa11a82ee998be804a8b7d1a7545"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ca82684e7af64745a9d35b083d6e1701"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f7fcbac3a48e396884e7542a1d2c8643"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.47.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "86c4a5c5a6ea9a13f4da10a7c12aef45"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 14745600,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
}
],
"md5sum": "c2b503224f25bd6fb813152560bf474c"
}
]
}