CharlieFRuan's picture
Initial commit
2f020a8 verified
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 8136314880.0,
"BitsPerParam": 5.00086029169671
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 81920000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 81920000,
"byteOffset": 0
}
],
"md5sum": "864de78a8348047218285c990bdbcb48"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "01e1af245f704a5d9e6a99b94c789600"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d3bea6e772458b8bd4d241b6c78cd510"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a9f5e4a1fc88a60150054f24ec02f165"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "97b08d01db5e926952b0fa8724a570a9"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32890880,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240000,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 10240000
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 10250240
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14673920
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14684160
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14694400
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19118080
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 27965440
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 27975680
}
],
"md5sum": "e0d3405cd817035e7c761d8f58713353"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b273aee4a2253683e624b0d38e8cfeb8"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "90e92ed28868d84a078e06413eb8c5ef"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9ea191d13e873445289b7151dde81423"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "e558d0946df1be8256ec6e6ca1b975d0"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0e5022a82f5f3048f89249771da01870"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "57c7d293b65f5c6553f039be3af76963"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e05e9c4baab0c43105314c5e71f3e387"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "55dcdecd9698e7c6c7174165390cd0bd"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "296c1d885ac8736654cb8a09dadccd9d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a0a9b584eb153a731e733a4cab057d8a"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d062e4f9fcab6864e4b6fbe36e8c5b71"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "a60f564608bf35bf693140eb52295054"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9b20e5c19a5a07f7d198e400c64035e6"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1f12323488d29474c3988830f4591e43"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d434fe2d46072b69d19cb36e0630aa72"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "50e26a6a5b7b7fd33122f8487b24da00"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0ece3448cc6af01b3fcd2725e1e14a3f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "cdf4b211773911b035bd50340866b064"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b5c92e21b4b2f49ba20029dcebcb3f78"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "cf0b3d4027136e00750a2647994ba1db"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b936d427a828ef337d4616da2fc12b3e"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0079645efbe7c6b8184a1df24431df0f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "42c6275d92335b39d60470e0f8157623"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "39e03eb85b95400e0f1136ba801aa128"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bd0e5c2d76eb91121347368078a2aa3e"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c0d3faf60f47c4ace91756bb89dc511b"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2394a3656acd3270ae2c6ae776870d09"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "7bc9ec9cab711faefd74662d0f42e4c1"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "76401432d0e32a7a65285d719257ce6b"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e65014ec8465b482eb0da29ef8921b24"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "779e8833ee4c9e3d0ea45bfff0b646ec"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "aa28c139d09b82a37e43fdb6a48bb5b0"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 81920000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 81920000,
"byteOffset": 0
}
],
"md5sum": "03f14e673a60dfd8de7d80639b2047a0"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "91d9953b831990f026e0a84cadd02bdb"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7cd60a1d562542b4dd6f76393573054b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29429760,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240000,
"byteOffset": 14755840
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 24995840
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 25006080
}
],
"md5sum": "08a259b4b35992db7339b6b69917d5f8"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6c7b70c6ad7c2081c67f02c792833dbe"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a0977dde72fc3394fb70f1284506b1d6"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ae30f0defaba0472b5ade36eb3b5fb6f"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "7b99b3817b642b7369c68c957b64245d"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "3ed51ccd2d7b180a4eaeae8bdee2b37b"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b50a3723cf3b26f260dbb3cc4c574608"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "81467a29690f015d95641160c056c99a"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "e12837288033c868c86d70d8b85981bc"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0124ff0a1f200b45be936dc179df4c23"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "92c1ab1a49076b5af8b3aadfc001bdd9"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "beba158cdcfb06f698c64a7b69709ff6"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "c0d1b8ba58ce57b55ffc41e14d1561d2"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "bf0779de9d628e7fa9e91cba26b599c0"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d648acd04637f82776ac87470793e3df"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6c1144587888ed4d0629be0791c317b1"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "cecf234721af54bd97960b87218f71e5"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e0fd9423830f5eda37a237a7e7b3448c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2b56623ed72b8aef469b4e53ff339511"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "46749cc536875f210a27db89c286ac43"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "1fec7a9facb13d78990a4b8639900106"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "aec4e5f01f4cb5a2b7ee24f6425f9c9b"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "402b745321b1b0faac91cebad99b1a11"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0cef5a19bbfee4fa73a7886511f0b54a"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "8c300e9b1a07e3f319cf9d728f7995bb"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2e08cb9d8ebbc5e07cb5bbb95aca25e6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "071489bda2def9867369c4d0216666c2"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "853a7b98bfa98c77188f160902d21f43"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33443840,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28518400
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 33433600
}
],
"md5sum": "0b285f01a5c721279bcaaee1f89ff21b"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4dd7e495b57d55620e78df9e4bdf031d"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "53a946229e9c71a65db0381f7e73e01a"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "fa3374f843c92e71285bfe0498b4d562"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "204e17c68c1fa9c9be518069b855674a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "16cb38d4ac7c07e70fc1f59f1e29b89a"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0098fb3a0661e255da1f3130b37f727f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a80a6289b630c7d36ac836fc6051d1aa"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "1449425c1662e8c86f291b3b17c838a6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8804c9b1ddef2b74480bbefd0ca526ad"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "fde3cf0a1e37a93f814b5d3fa0316487"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9b501907731e23ce2f53d279606267a5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "5aa8e8fff2e5c685f5256e6ce38bd917"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1f571a827b9d08f39bd6ec36a4eeda8c"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e2956ffc60adee997003a56e86e54387"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "57d3fc32b6950fd8beeaeabf9ed25086"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "6c7decfb07b030de19e1aa0dffab794a"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f9e3ae975ead02e6d33c23cbcbc82276"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a810e7d2ff7ef792f145f610c65aa113"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b7d436e7c49877eac0ec5090c09da34f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "d25c27ca526ea6612964ea588f384d9d"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "18cc6f6540c99d5611e81d8670b3fa58"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "93490d9bf78130de2d26c91131fdd4d5"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d7c79758dc7ed4e097fcd56a430376a5"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "06a934bf96618bfa1cddc4a734f166c4"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ed26b40d86f6c14b4d1e1781620e0a64"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c9bcbbdc687083b6d3f0754da66a9e13"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bc338927785cb470efe4c478f9ae2bfb"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "d253981360c70557f2c8c4b04c4e17f6"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1bd738d8142ae51e48718f9af0f7de64"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "bdf8cfc45750283c61427dc55daa5eda"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "264d1dd895ee0b3dc848e8c67967256a"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "b0faec33ac52c57c9c18e06fd1388110"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b5e90f6ccb30b9b5f868d84791b56670"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3d81dc383212799e91bdb37c77d8eae8"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a8996dab0b33742cda0b9d9ad1476655"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 32460800,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13281280
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26388480
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28037120
}
],
"md5sum": "61fdc25e126552f209f3971a3f658791"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ba8441e6c47801e445e336127628adc1"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "209def3eaf81c6dfcee4ad414f480b86"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6cb60d3bc83d89ea9de8217d9c8f3444"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "bdf9334abfd0313dab26150968a162a4"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d772dad7a595599a2e71b643bccd11cd"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0f49c2777cb517a629013f11ba890f2b"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3137fda2d3217408be76128524152d73"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "9419fa1f1d3950ff854b5129490515c8"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "38ed07c8bce1704270dc871b7b113ed3"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8b8ece753ffab7e15e4533468d55256a"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "33dd9b7c1aabf973feb2f3fa3f6bd0cf"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "6c66da8f0b1e4076916b91db6f732f67"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "05dca5c15deeb50087c4a7d3ee01a9f0"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "278b12a6722a888c096149cf8554eaaa"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "31440dd4e596d91ff7e3a260ca8eb647"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "f72c33fa513d73b048a6d33e627601ca"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e21da711dc588f258e2247b88be8e7a3"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "942e5c36a8fd84cd049faf78ba4a4859"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d766658b6c398d0d637ed32621658451"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "be82347b072974fa29455c8166902484"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "3fa479f854863abce1f47c8dec5e52c0"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6070bf75baec452579deb3f35ae1df5f"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f8f31fa0a2c7c96ef4839a77124ab881"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "637aa2c05d10ebf13aec3912b6d88fb6"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1200ca920b31a2644c1ca15173a9c7dc"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a9c7ae47877415e84f491bd3b4cc798e"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "08d15d2df956f79baa1113c4bfe18822"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "e0f606134215671adc2070827766b0df"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ee3f29e528e185f5c77e67b72f43de4c"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a89f7d60339a27f8d91ddae09dfce251"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1a814f4b919e6a034ec95a01b4b52266"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "cfbce5bf75c3fe38bb18a6c844e7914f"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d8810fe22817b93cf2731858c6dcac76"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "be317671eb0452669efeb209d08cb228"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4437b4d9b153eecfd044ea2e742cf81a"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "e4f882f623ea782e0da18b15ba8c0c20"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "05b0c783d1ee1aada1f2ed016de689d7"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5ab60195305eca7940dceddce5a08d9e"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8a3d90a4ebb8797b2ef6e845b727cdb5"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "52c9b97d0cc918cdf8942b268913d792"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "100a4a31b51943f7d3cab403b83d22af"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "97e1ce8b53ff5163d11760554d5a2347"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "36d2cdacf2abb015505ba59a85686910"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "98ee7cb56fae05e990d2a98ae5ad3cfc"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a8b88d52c6e6a081c84ca860408c6617"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1701c6ca61a567d86ed0a61617710b8f"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b7060e7017a404a0a9b509f604eba9d9"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "580af7a89a3fad3a0bd216a4cec1a674"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2bdf834315b55a0986e14c1f5456fe12"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4dc961c79fba4f304d09af165070e9d3"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "846a65b982271674a36ee4db0de1e009"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "b31221c4e958539803caa2c3fa13b0ef"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "301bfec3fd564b58c59b408f2f0563da"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4dca7b49e0fba1a9aa1e764f802b0866"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 28518400,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
}
],
"md5sum": "0ccd63e259838d4514c2b8954135cb21"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e770e24cb342c0369c7fe100d540b3a8"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 28508160,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8847360
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13762560
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26869760
}
],
"md5sum": "f5d2754d1b5f53ae8d7827e9141e08ff"
}
]
}