EVA-Qwen2.5-14B-v0.2-GPTQ-Int4 / ndarray-cache.json
numen-tech's picture
Add weights
7ea4273
{
"metadata": {
"ParamSize": 533,
"ParamBytes": 7617046528.0,
"BitsPerParam": 4.125675919921857
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
640,
152064
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "7a9a286748b4df814a1c8d03ce3be916"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4aff9dcddcbfbefb807326305b86a706"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "66ea03c49eaa7b431632eaf2097b6730"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d727e20f32ac00a55b86a22a8bfd7924"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b9e99b5b1877b499a6624d6e0b1164ec"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5ca7e1d8136377977d741d28c7f281b3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a1213a3400e6b2969a33f161b0b23965"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32956416,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
40,
152064
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12165120,
"byteOffset": 0
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 12165120
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 12175360
},
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13281280
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 15493120
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 15503360
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15513600
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 16619520
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18831360
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 18841600
},
{
"name": "model.layers.43.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 18855936
},
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19429376
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 32536576
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32946176
}
],
"md5sum": "8457a437f0f3be57e5407cdb2bb460d9"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4be8e8774ba8f1dcf055092a384debdc"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 22265856,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 1105920
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3317760
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3328000
},
{
"name": "model.layers.44.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3342336
},
{
"name": "model.layers.44.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 21692416
}
],
"md5sum": "1c22821d9b1665bf10808974ff14095d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ef08df1cdb9846780f4adac99303f42c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "decb8b8302a7901de59a392538e4500b"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "108dc6ece0a4469558ca76f78becc144"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "82cbde6d68a65805d8d266369df81f28"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ced93d2cb7d7a9fc5079313c028db945"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.45.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "e45dba2873a7ae194e19560279ef35d5"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.46.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.46.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "ea2874cd47557e0274490f59f93a0592"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "066110eac7a0efb9635183db84e80ec7"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f7515e12d9873622d4aead332779c846"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "eca31758ff7928f470605f2c82573c28"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "396df257e3ead589623aa8b9377845a8"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30969856,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.47.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
}
],
"md5sum": "b3257b8a35970bc8c776aead2e4859fb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ae597428dd135dcadec9d97c26c4b484"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "80fa8f69a2c623e16cd7ff0ead9bca20"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7ff56586de8acd67d976b31781735101"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bb234d5b2f48facc76659e6dc6d5d0f4"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7687223a138db2142285d4ed35f7272a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ba44c5f7f920380434060786bf313b25"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33533952,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
40
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12165120,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 12165120
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 12175360
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13281280
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 15493120
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 15503360
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 15517696
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 16091136
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 29198336
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29607936
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 29618176
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30724096
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32935936
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32946176
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 32960512
}
],
"md5sum": "1c25c8bb3ffdb4969525cf51bcc7560f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8be2ac5669a783e1b0ada72be8ae0dcc"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "cfc5f04604811c122a9896fa569f897f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "2f9856f6df264630929c44a9afe6aaae"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "19dfbdd47783461525282498d94c68b6"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c01cb1edd83848f05b58bdbb628cb9c3"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "dc77a4fae98db2b54939d17a1336d41e"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "a523f4c099e47049762ba85be09b59cb"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6d5fb69fdfac386ae4cf817fb7bf535c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9da172f9e9b513a5200a70b8fafa78d0"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e5b6ae782366f31dd493641a40ec903f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8e7ad599035540a0807d3ca1beb06575"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "94636243bb8ba55dfdfc7774cf99c8f8"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "03918d731e1cd6dee628f9d7e1e672b0"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "1fac7ba55cb7f70f8f3e3bd69223cd5e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32454656,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 13516800
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 13531136
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 31881216
}
],
"md5sum": "6da2533a4ce5f42600d76308618125cf"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "69920a0f3a2b36c916fbd081de956d63"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "be2059d8096205d23cd621920d7adde8"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "31b653f3dadc11e205ce0708a510e815"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a740997d6dc04ea7f3a300ac6d28d64b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3d5c408f4d9a4fe5810668b8726cc701"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "56d6d4b91ad81d6cad0ba6f89ccab776"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "ee3eb86b085da59e7e9a519a07edd797"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c0ecf6d9c24d0dbc177b0cf4ef02b010"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f2550392a5c0465dc5d3a7701dab9d14"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d260bca7690960dceda20b3552768a1c"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d3071d115308e7356cc51bfddc2877e2"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "cb71ef1bc793b1c851b30cda3e0e2052"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "eb8d672ed70dbb2ea6ee428ec195705b"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "d35c62820b0128e3bdd38d7e114f7ab3"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0ba19107004473e6be7fef4964be97b5"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "85d9d266f5c4837db8b88b1bedcf97e4"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "783f00e38735be30b42317dd7540033e"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "a646f52d8483f8d1613d95ba27160696"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 31547392,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30959616
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 30973952
}
],
"md5sum": "d4fe78a5cf2efef147bd619154eaaf3f"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "712a06a67f5db4cb5beefabfeb6d2ad8"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "88c42d478efb609d1a8f8dd9d70ed0aa"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "585e8610b8be5f9e07613166077c6075"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d48fd4935c8fd2d5a2b3463a42cba1c6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3bbff52ba50b8703e75a57d1a456436f"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 20781056,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16855040
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 16865280
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 17971200
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20183040
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 20193280
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20207616
}
],
"md5sum": "e3a5d801f52574ca6a1bfe75768f69b7"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "fbfa211f21ae80b4ef5e2a9dd08b1589"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "71a1371e7d9f0f15125ee9261820d31b"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0ddd8bf4a110b3ed0c9eafe27321beed"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7c93b14e647e8b1bdcf86e506b62de22"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7525fa2ead6fad05bed1ad642e1a20d2"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "8d861c5a757d8065f37fc5b011d8de78"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "0fa126cac6d330cc868d341be4db59d2"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "177bf300d1d4109049b2ebfc1b594eb1"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8b39778b2ab49c0922c5df6155a5ae05"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9ea1c2ec121aceb0a35b3ac846b5e68e"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "45e0f6826f960ca3b46cbca94fc9ab75"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e85ca07ca2b50311f9b5f2b77830553a"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 20781056,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16855040
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 16865280
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 17971200
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20183040
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 20193280
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20207616
}
],
"md5sum": "ed36d1e03f72da65e1d73df9d3ae26a3"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0e2b78c07bde8c67076c8bf6d5f1a457"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "32169cf9897ae14e46e6aea6187c09e8"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5d3ca2033853e2f8676654e6f47969f5"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "20e40a9394134df45a8d3cd52828b1bd"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "068f0e8c50ceda5ba09522872c589c1d"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "2cb8a3a0a411141d8bcd924053b3cc06"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "199f04882307de3cebed77072ee6222d"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e4adac834511e5ce7a52b50f46f674b2"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6094e39116f14c82d3881228606a60ad"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "367e6c4ce318254c03fccad8ee6e3260"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5d1f5dc3c2eb9155cba6c9c01197a0c1"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7044ba47e9a1930a78e0bba6b6ebce4a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "ff7a2d0d0cea5c0b7ef93ac13e7cae46"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "87cde4332cf1d5b580c18ffa05da1bc9"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "38fdbd173771f3a624a19076e677725f"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "865f34db46c2a1cf0c3465417d81550f"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4079e49123705ce5afcb7c5144c8b888"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "628414890d2e2706ab9f66bd358df3e9"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8054ab6fc3a489f58200cf58af709d14"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "ee077f26fa1edda070b45746ba19590b"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "1ed1834859a57bd4853cded51355a380"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9adfd2584e55e090998cd4a7e17f96f0"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f315a0d6ac6c8a625abc33771e4977bb"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "00feac10895287be321949d68df1e791"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ee5e42d8a82cf84a89001b25034459bc"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 31547392,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30959616
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 30973952
}
],
"md5sum": "7185155a50645aee422b5c861171cb17"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2b25e48ba937422b912fb0380240cbd9"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "11c831b355a7f30ffbaf6da1b9dcbd2f"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "65dff7056f41a7f3e6991c1e16ce2f91"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "43c02f3da3a05b115c9502a1091291db"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "27ae81ef2db6d9c88ad930c210b7457b"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 20781056,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16855040
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 16865280
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 17971200
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20183040
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 20193280
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20207616
}
],
"md5sum": "c47b1bd10a67b27ff526ec4027c9cf20"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7e4a587765b99fe8443c3c900e3257be"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "729ea078bef9a16bb07ec16b02ff64a5"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e621282172ccffe6887dd7e0370fb8d7"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d9d8f0f61fe7197b07132d3ce77ac755"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2959dfd006bf645e869017dbe4eddbf5"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "3fb7cb246142867e5ddd2de694e10a41"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "50a139a6ce3f022c76e1849020d66dde"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5b98e6ea6e78ad6c3623046812cf2a65"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a86929358038b2cd5a5ffacef07ada89"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "abcf8ec7ecd6af0f171fa4bb26fee134"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e3134533eebed384d41ea522dc997a64"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e9faa1074ec4cab55ab210958c54e6f7"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "feb9881167ac26463c3b08bd77c04f1b"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "ca7b17509fdd483f5e6ff691bb306bff"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "818b7da7203327e5ccd7ae8f2c8423d9"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "42af6184e51a9e796c6af8e7f25737ec"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b06c476b34b44092c68a870691c2e5ab"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "dd29b048dd889b7393faae2076aea695"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "873aeca4e16b9c667591985494d66ed9"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "779b3f346965ec482a6bec0993c94fa5"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "df2398036ab25b0c909958d4724936d4"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bc9942eeed3a0084bbaea8862dea1734"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3496da19fc4e107cea14aba54f224184"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "85d045b9def2e38aa96b8d01af2d4f10"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "9dd10f0564d27c740c0a1f0d2842aee1"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 31547392,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30959616
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 30973952
}
],
"md5sum": "b38820b9bcfa85000903651ebbea79b8"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8407077a31f604fe262d70d10c848a47"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0aacc5809b7ac20b3247f22acd597651"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "96b2501d48856d89080979278eff6f94"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "dd30c271fb41a57f9a86960e2cc4ef79"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5628e6264007f8a06aef910c683b88e7"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 20781056,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16855040
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 16865280
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 17971200
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20183040
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 20193280
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20207616
}
],
"md5sum": "1739ae899ba2d136e36e2efce899a9c8"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "aab797b70eb81b0eed84f18c20ba73b6"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c464045f236e3c1935336221064eb13d"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0159b399a5754b346b21215c5089cea2"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "da9b58348ae20067669d8e994749fd72"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c1008264634bcbdda1df779d3d8e5e3c"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "cc8510ce07abbcf1e1c08f2398446a0b"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "7088cfe905759d49940c3a32f7f2e02a"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "863510d6228455c9b59662cf72f23c04"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9dca87a1f9d60abe82823c6f3711aa53"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "dc7a18be1c98cbd8e0c3c577bd179364"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "726f918b1ce770cd85bfa1a783f3ec70"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1b11d8b2f1be4fd1f57bac9fedcd2025"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "c2603f77e13f22011aec17399406430f"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "7b12e585c83c91f493e96f89654497d4"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "391243342e013d0aca12a7ff0a91b19f"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "788050f2273aa074513878016ac231dd"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7aeda34dc7807b74765bce8d99e9e9e4"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "531af9c25a81ea55662f35bd06aabbb0"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f371ad0de197dfc31226486efbf87165"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 32075776,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30959616
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 30969856
}
],
"md5sum": "efe2b22d7d22b2417feb88f3462e231c"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 21159936,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 0
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2211840
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2222080
},
{
"name": "model.layers.40.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 2236416
},
{
"name": "model.layers.40.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 20586496
}
],
"md5sum": "db0d20497d976a417a969d3637de5b6e"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "51c7c3dab50e4cdce240f114b0083604"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "dc53216cf0095d17f3b14caebdb2e93d"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "971566013ab9eaad18dfb9375ff95608"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c7589a155932682b476494a45c443e26"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 31547392,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13516800
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13527040
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 14632960
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.41.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30959616
},
{
"name": "model.layers.42.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 30973952
}
],
"md5sum": "18b0e7df68561928ea5ce1b2933c6f17"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 13516800,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
}
],
"md5sum": "ad2b67b2ad43dea4d2fae00d836e7329"
}
]
}