|
{ |
|
"metadata": { |
|
"ParamSize": 533, |
|
"ParamBytes": 7617046528.0, |
|
"BitsPerParam": 4.125675919921857 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 389283840, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
640, |
|
152064 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 389283840, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a9a286748b4df814a1c8d03ce3be916" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4aff9dcddcbfbefb807326305b86a706" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66ea03c49eaa7b431632eaf2097b6730" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d727e20f32ac00a55b86a22a8bfd7924" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9e99b5b1877b499a6624d6e0b1164ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ca7e1d8136377977d741d28c7f281b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a1213a3400e6b2969a33f161b0b23965" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32956416, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
40, |
|
152064 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12165120, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 12165120 |
|
}, |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 12175360 |
|
}, |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 13281280 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 15493120 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 15503360 |
|
}, |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 15513600 |
|
}, |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 16619520 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 18831360 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 18841600 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 18855936 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 19429376 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 32536576 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 32946176 |
|
} |
|
], |
|
"md5sum": "8457a437f0f3be57e5407cdb2bb460d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4be8e8774ba8f1dcf055092a384debdc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22265856, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 1105920 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 3317760 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 3328000 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 3342336 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 21692416 |
|
} |
|
], |
|
"md5sum": "1c22821d9b1665bf10808974ff14095d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef08df1cdb9846780f4adac99303f42c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "decb8b8302a7901de59a392538e4500b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "108dc6ece0a4469558ca76f78becc144" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82cbde6d68a65805d8d266369df81f28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ced93d2cb7d7a9fc5079313c028db945" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.46.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "e45dba2873a7ae194e19560279ef35d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "ea2874cd47557e0274490f59f93a0592" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "066110eac7a0efb9635183db84e80ec7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7515e12d9873622d4aead332779c846" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eca31758ff7928f470605f2c82573c28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 389283840, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
152064, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 389283840, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "396df257e3ead589623aa8b9377845a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30969856, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.47.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.47.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
} |
|
], |
|
"md5sum": "b3257b8a35970bc8c776aead2e4859fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae597428dd135dcadec9d97c26c4b484" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80fa8f69a2c623e16cd7ff0ead9bca20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7ff56586de8acd67d976b31781735101" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb234d5b2f48facc76659e6dc6d5d0f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7687223a138db2142285d4ed35f7272a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba44c5f7f920380434060786bf313b25" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33533952, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
152064, |
|
40 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12165120, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 12165120 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 12175360 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 13281280 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 15493120 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 15503360 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 15517696 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 16091136 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 29198336 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29607936 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 29618176 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 30724096 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 32935936 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 32946176 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 32960512 |
|
} |
|
], |
|
"md5sum": "1c25c8bb3ffdb4969525cf51bcc7560f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8be2ac5669a783e1b0ada72be8ae0dcc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cfc5f04604811c122a9896fa569f897f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f9856f6df264630929c44a9afe6aaae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19dfbdd47783461525282498d94c68b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c01cb1edd83848f05b58bdbb628cb9c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "dc77a4fae98db2b54939d17a1336d41e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "a523f4c099e47049762ba85be09b59cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d5fb69fdfac386ae4cf817fb7bf535c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9da172f9e9b513a5200a70b8fafa78d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5b6ae782366f31dd493641a40ec903f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e7ad599035540a0807d3ca1beb06575" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94636243bb8ba55dfdfc7774cf99c8f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "03918d731e1cd6dee628f9d7e1e672b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "1fac7ba55cb7f70f8f3e3bd69223cd5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32454656, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 13531136 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 31881216 |
|
} |
|
], |
|
"md5sum": "6da2533a4ce5f42600d76308618125cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69920a0f3a2b36c916fbd081de956d63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be2059d8096205d23cd621920d7adde8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31b653f3dadc11e205ce0708a510e815" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a740997d6dc04ea7f3a300ac6d28d64b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d5c408f4d9a4fe5810668b8726cc701" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "56d6d4b91ad81d6cad0ba6f89ccab776" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "ee3eb86b085da59e7e9a519a07edd797" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0ecf6d9c24d0dbc177b0cf4ef02b010" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2550392a5c0465dc5d3a7701dab9d14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d260bca7690960dceda20b3552768a1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d3071d115308e7356cc51bfddc2877e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb71ef1bc793b1c851b30cda3e0e2052" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "eb8d672ed70dbb2ea6ee428ec195705b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "d35c62820b0128e3bdd38d7e114f7ab3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ba19107004473e6be7fef4964be97b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85d9d266f5c4837db8b88b1bedcf97e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "783f00e38735be30b42317dd7540033e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a646f52d8483f8d1613d95ba27160696" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31547392, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 30973952 |
|
} |
|
], |
|
"md5sum": "d4fe78a5cf2efef147bd619154eaaf3f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "712a06a67f5db4cb5beefabfeb6d2ad8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "88c42d478efb609d1a8f8dd9d70ed0aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "585e8610b8be5f9e07613166077c6075" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d48fd4935c8fd2d5a2b3463a42cba1c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3bbff52ba50b8703e75a57d1a456436f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20781056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 16865280 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 17971200 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 20183040 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 20193280 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20207616 |
|
} |
|
], |
|
"md5sum": "e3a5d801f52574ca6a1bfe75768f69b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbfa211f21ae80b4ef5e2a9dd08b1589" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71a1371e7d9f0f15125ee9261820d31b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ddd8bf4a110b3ed0c9eafe27321beed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c93b14e647e8b1bdcf86e506b62de22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7525fa2ead6fad05bed1ad642e1a20d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "8d861c5a757d8065f37fc5b011d8de78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "0fa126cac6d330cc868d341be4db59d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "177bf300d1d4109049b2ebfc1b594eb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b39778b2ab49c0922c5df6155a5ae05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ea1c2ec121aceb0a35b3ac846b5e68e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45e0f6826f960ca3b46cbca94fc9ab75" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e85ca07ca2b50311f9b5f2b77830553a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20781056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 16865280 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 17971200 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 20183040 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 20193280 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20207616 |
|
} |
|
], |
|
"md5sum": "ed36d1e03f72da65e1d73df9d3ae26a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e2b78c07bde8c67076c8bf6d5f1a457" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32169cf9897ae14e46e6aea6187c09e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d3ca2033853e2f8676654e6f47969f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20e40a9394134df45a8d3cd52828b1bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "068f0e8c50ceda5ba09522872c589c1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "2cb8a3a0a411141d8bcd924053b3cc06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "199f04882307de3cebed77072ee6222d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4adac834511e5ce7a52b50f46f674b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6094e39116f14c82d3881228606a60ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "367e6c4ce318254c03fccad8ee6e3260" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d1f5dc3c2eb9155cba6c9c01197a0c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7044ba47e9a1930a78e0bba6b6ebce4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "ff7a2d0d0cea5c0b7ef93ac13e7cae46" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "87cde4332cf1d5b580c18ffa05da1bc9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38fdbd173771f3a624a19076e677725f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "865f34db46c2a1cf0c3465417d81550f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4079e49123705ce5afcb7c5144c8b888" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "628414890d2e2706ab9f66bd358df3e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8054ab6fc3a489f58200cf58af709d14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "ee077f26fa1edda070b45746ba19590b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "1ed1834859a57bd4853cded51355a380" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9adfd2584e55e090998cd4a7e17f96f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f315a0d6ac6c8a625abc33771e4977bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00feac10895287be321949d68df1e791" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee5e42d8a82cf84a89001b25034459bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31547392, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 30973952 |
|
} |
|
], |
|
"md5sum": "7185155a50645aee422b5c861171cb17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b25e48ba937422b912fb0380240cbd9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "11c831b355a7f30ffbaf6da1b9dcbd2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65dff7056f41a7f3e6991c1e16ce2f91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43c02f3da3a05b115c9502a1091291db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "27ae81ef2db6d9c88ad930c210b7457b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20781056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 16865280 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 17971200 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 20183040 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 20193280 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20207616 |
|
} |
|
], |
|
"md5sum": "c47b1bd10a67b27ff526ec4027c9cf20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e4a587765b99fe8443c3c900e3257be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "729ea078bef9a16bb07ec16b02ff64a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e621282172ccffe6887dd7e0370fb8d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d9d8f0f61fe7197b07132d3ce77ac755" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2959dfd006bf645e869017dbe4eddbf5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "3fb7cb246142867e5ddd2de694e10a41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "50a139a6ce3f022c76e1849020d66dde" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b98e6ea6e78ad6c3623046812cf2a65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a86929358038b2cd5a5ffacef07ada89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "abcf8ec7ecd6af0f171fa4bb26fee134" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e3134533eebed384d41ea522dc997a64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9faa1074ec4cab55ab210958c54e6f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "feb9881167ac26463c3b08bd77c04f1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "ca7b17509fdd483f5e6ff691bb306bff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "818b7da7203327e5ccd7ae8f2c8423d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "42af6184e51a9e796c6af8e7f25737ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b06c476b34b44092c68a870691c2e5ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd29b048dd889b7393faae2076aea695" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "873aeca4e16b9c667591985494d66ed9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "779b3f346965ec482a6bec0993c94fa5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "df2398036ab25b0c909958d4724936d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc9942eeed3a0084bbaea8862dea1734" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3496da19fc4e107cea14aba54f224184" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85d045b9def2e38aa96b8d01af2d4f10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9dd10f0564d27c740c0a1f0d2842aee1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31547392, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 30973952 |
|
} |
|
], |
|
"md5sum": "b38820b9bcfa85000903651ebbea79b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8407077a31f604fe262d70d10c848a47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0aacc5809b7ac20b3247f22acd597651" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96b2501d48856d89080979278eff6f94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd30c271fb41a57f9a86960e2cc4ef79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5628e6264007f8a06aef910c683b88e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20781056, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 16865280 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 17971200 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 20183040 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 20193280 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20207616 |
|
} |
|
], |
|
"md5sum": "1739ae899ba2d136e36e2efce899a9c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aab797b70eb81b0eed84f18c20ba73b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c464045f236e3c1935336221064eb13d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0159b399a5754b346b21215c5089cea2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "da9b58348ae20067669d8e994749fd72" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1008264634bcbdda1df779d3d8e5e3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "cc8510ce07abbcf1e1c08f2398446a0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "7088cfe905759d49940c3a32f7f2e02a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "863510d6228455c9b59662cf72f23c04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9dca87a1f9d60abe82823c6f3711aa53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc7a18be1c98cbd8e0c3c577bd179364" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "726f918b1ce770cd85bfa1a783f3ec70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b11d8b2f1be4fd1f57bac9fedcd2025" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "c2603f77e13f22011aec17399406430f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "7b12e585c83c91f493e96f89654497d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "391243342e013d0aca12a7ff0a91b19f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "788050f2273aa074513878016ac231dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7aeda34dc7807b74765bce8d99e9e9e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "531af9c25a81ea55662f35bd06aabbb0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f371ad0de197dfc31226486efbf87165" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32075776, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 30969856 |
|
} |
|
], |
|
"md5sum": "efe2b22d7d22b2417feb88f3462e231c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21159936, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2211840 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2222080 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 2236416 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 20586496 |
|
} |
|
], |
|
"md5sum": "db0d20497d976a417a969d3637de5b6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35389440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1728, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35389440, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51c7c3dab50e4cdce240f114b0083604" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 70778880, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
27648 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 70778880, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc53216cf0095d17f3b14caebdb2e93d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "971566013ab9eaad18dfb9375ff95608" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18350080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
640, |
|
7168 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18350080, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7589a155932682b476494a45c443e26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31547392, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.q_scale", |
|
"shape": [ |
|
108, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1105920, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
40, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2211840, |
|
"byteOffset": 14632960 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 16844800 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 16869376 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 17442816 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 30550016 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 30959616 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
40, |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 573440, |
|
"byteOffset": 30973952 |
|
} |
|
], |
|
"md5sum": "18b0e7df68561928ea5ce1b2933c6f17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 13516800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
640, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
40, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 409600, |
|
"byteOffset": 13107200 |
|
} |
|
], |
|
"md5sum": "ad2b67b2ad43dea4d2fae00d836e7329" |
|
} |
|
] |
|
} |