{ "metadata": { "ParamSize": 533, "ParamBytes": 7617046528.0, "BitsPerParam": 4.125675919921857 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 152064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "7a9a286748b4df814a1c8d03ce3be916" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "4aff9dcddcbfbefb807326305b86a706" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "66ea03c49eaa7b431632eaf2097b6730" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d727e20f32ac00a55b86a22a8bfd7924" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b9e99b5b1877b499a6624d6e0b1164ec" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5ca7e1d8136377977d741d28c7f281b3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a1213a3400e6b2969a33f161b0b23965" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32956416, "records": [ { "name": "lm_head.q_scale", "shape": [ 40, 152064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 0 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12165120 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 12175360 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13281280 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15493120 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15503360 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 15513600 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 16619520 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18831360 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 18841600 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 18855936 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19429376 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 32536576 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32946176 } ], "md5sum": "8457a437f0f3be57e5407cdb2bb460d9" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4be8e8774ba8f1dcf055092a384debdc" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22265856, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 0 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 1105920 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 3317760 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 3328000 }, { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 3342336 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 21692416 } ], "md5sum": "1c22821d9b1665bf10808974ff14095d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ef08df1cdb9846780f4adac99303f42c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "decb8b8302a7901de59a392538e4500b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "108dc6ece0a4469558ca76f78becc144" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "82cbde6d68a65805d8d266369df81f28" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ced93d2cb7d7a9fc5079313c028db945" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "e45dba2873a7ae194e19560279ef35d5" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "ea2874cd47557e0274490f59f93a0592" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "066110eac7a0efb9635183db84e80ec7" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f7515e12d9873622d4aead332779c846" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "eca31758ff7928f470605f2c82573c28" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "396df257e3ead589623aa8b9377845a8" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30969856, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 } ], "md5sum": "b3257b8a35970bc8c776aead2e4859fb" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ae597428dd135dcadec9d97c26c4b484" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "80fa8f69a2c623e16cd7ff0ead9bca20" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7ff56586de8acd67d976b31781735101" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bb234d5b2f48facc76659e6dc6d5d0f4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7687223a138db2142285d4ed35f7272a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ba44c5f7f920380434060786bf313b25" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33533952, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 40 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12165120 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 12175360 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13281280 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15493120 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 15503360 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 15517696 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 16091136 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29198336 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29607936 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 29618176 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 30724096 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32935936 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32946176 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 32960512 } ], "md5sum": "1c25c8bb3ffdb4969525cf51bcc7560f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8be2ac5669a783e1b0ada72be8ae0dcc" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cfc5f04604811c122a9896fa569f897f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2f9856f6df264630929c44a9afe6aaae" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "19dfbdd47783461525282498d94c68b6" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c01cb1edd83848f05b58bdbb628cb9c3" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "dc77a4fae98db2b54939d17a1336d41e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "a523f4c099e47049762ba85be09b59cb" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6d5fb69fdfac386ae4cf817fb7bf535c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9da172f9e9b513a5200a70b8fafa78d0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e5b6ae782366f31dd493641a40ec903f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8e7ad599035540a0807d3ca1beb06575" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "94636243bb8ba55dfdfc7774cf99c8f8" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "03918d731e1cd6dee628f9d7e1e672b0" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "1fac7ba55cb7f70f8f3e3bd69223cd5e" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13516800 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13531136 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31881216 } ], "md5sum": "6da2533a4ce5f42600d76308618125cf" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "69920a0f3a2b36c916fbd081de956d63" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "be2059d8096205d23cd621920d7adde8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "31b653f3dadc11e205ce0708a510e815" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a740997d6dc04ea7f3a300ac6d28d64b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3d5c408f4d9a4fe5810668b8726cc701" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "56d6d4b91ad81d6cad0ba6f89ccab776" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "ee3eb86b085da59e7e9a519a07edd797" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c0ecf6d9c24d0dbc177b0cf4ef02b010" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f2550392a5c0465dc5d3a7701dab9d14" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d260bca7690960dceda20b3552768a1c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d3071d115308e7356cc51bfddc2877e2" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cb71ef1bc793b1c851b30cda3e0e2052" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "eb8d672ed70dbb2ea6ee428ec195705b" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "d35c62820b0128e3bdd38d7e114f7ab3" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0ba19107004473e6be7fef4964be97b5" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "85d9d266f5c4837db8b88b1bedcf97e4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "783f00e38735be30b42317dd7540033e" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a646f52d8483f8d1613d95ba27160696" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 31547392, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30959616 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 30973952 } ], "md5sum": "d4fe78a5cf2efef147bd619154eaaf3f" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "712a06a67f5db4cb5beefabfeb6d2ad8" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "88c42d478efb609d1a8f8dd9d70ed0aa" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "585e8610b8be5f9e07613166077c6075" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d48fd4935c8fd2d5a2b3463a42cba1c6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3bbff52ba50b8703e75a57d1a456436f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 20781056, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16855040 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 16865280 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 17971200 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20183040 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20193280 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20207616 } ], "md5sum": "e3a5d801f52574ca6a1bfe75768f69b7" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fbfa211f21ae80b4ef5e2a9dd08b1589" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "71a1371e7d9f0f15125ee9261820d31b" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0ddd8bf4a110b3ed0c9eafe27321beed" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7c93b14e647e8b1bdcf86e506b62de22" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7525fa2ead6fad05bed1ad642e1a20d2" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "8d861c5a757d8065f37fc5b011d8de78" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "0fa126cac6d330cc868d341be4db59d2" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "177bf300d1d4109049b2ebfc1b594eb1" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8b39778b2ab49c0922c5df6155a5ae05" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9ea1c2ec121aceb0a35b3ac846b5e68e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "45e0f6826f960ca3b46cbca94fc9ab75" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e85ca07ca2b50311f9b5f2b77830553a" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 20781056, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16855040 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 16865280 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 17971200 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20183040 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20193280 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20207616 } ], "md5sum": "ed36d1e03f72da65e1d73df9d3ae26a3" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0e2b78c07bde8c67076c8bf6d5f1a457" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "32169cf9897ae14e46e6aea6187c09e8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5d3ca2033853e2f8676654e6f47969f5" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "20e40a9394134df45a8d3cd52828b1bd" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "068f0e8c50ceda5ba09522872c589c1d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "2cb8a3a0a411141d8bcd924053b3cc06" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "199f04882307de3cebed77072ee6222d" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e4adac834511e5ce7a52b50f46f674b2" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6094e39116f14c82d3881228606a60ad" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "367e6c4ce318254c03fccad8ee6e3260" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5d1f5dc3c2eb9155cba6c9c01197a0c1" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7044ba47e9a1930a78e0bba6b6ebce4a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "ff7a2d0d0cea5c0b7ef93ac13e7cae46" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "87cde4332cf1d5b580c18ffa05da1bc9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "38fdbd173771f3a624a19076e677725f" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "865f34db46c2a1cf0c3465417d81550f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4079e49123705ce5afcb7c5144c8b888" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "628414890d2e2706ab9f66bd358df3e9" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8054ab6fc3a489f58200cf58af709d14" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "ee077f26fa1edda070b45746ba19590b" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "1ed1834859a57bd4853cded51355a380" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9adfd2584e55e090998cd4a7e17f96f0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f315a0d6ac6c8a625abc33771e4977bb" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "00feac10895287be321949d68df1e791" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ee5e42d8a82cf84a89001b25034459bc" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 31547392, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30959616 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 30973952 } ], "md5sum": "7185155a50645aee422b5c861171cb17" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2b25e48ba937422b912fb0380240cbd9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "11c831b355a7f30ffbaf6da1b9dcbd2f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "65dff7056f41a7f3e6991c1e16ce2f91" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "43c02f3da3a05b115c9502a1091291db" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "27ae81ef2db6d9c88ad930c210b7457b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 20781056, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16855040 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 16865280 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 17971200 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20183040 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20193280 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20207616 } ], "md5sum": "c47b1bd10a67b27ff526ec4027c9cf20" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7e4a587765b99fe8443c3c900e3257be" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "729ea078bef9a16bb07ec16b02ff64a5" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e621282172ccffe6887dd7e0370fb8d7" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d9d8f0f61fe7197b07132d3ce77ac755" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2959dfd006bf645e869017dbe4eddbf5" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "3fb7cb246142867e5ddd2de694e10a41" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "50a139a6ce3f022c76e1849020d66dde" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5b98e6ea6e78ad6c3623046812cf2a65" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a86929358038b2cd5a5ffacef07ada89" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "abcf8ec7ecd6af0f171fa4bb26fee134" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e3134533eebed384d41ea522dc997a64" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e9faa1074ec4cab55ab210958c54e6f7" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "feb9881167ac26463c3b08bd77c04f1b" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "ca7b17509fdd483f5e6ff691bb306bff" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "818b7da7203327e5ccd7ae8f2c8423d9" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "42af6184e51a9e796c6af8e7f25737ec" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b06c476b34b44092c68a870691c2e5ab" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "dd29b048dd889b7393faae2076aea695" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "873aeca4e16b9c667591985494d66ed9" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "779b3f346965ec482a6bec0993c94fa5" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "df2398036ab25b0c909958d4724936d4" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bc9942eeed3a0084bbaea8862dea1734" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3496da19fc4e107cea14aba54f224184" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "85d045b9def2e38aa96b8d01af2d4f10" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9dd10f0564d27c740c0a1f0d2842aee1" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 31547392, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30959616 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 30973952 } ], "md5sum": "b38820b9bcfa85000903651ebbea79b8" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8407077a31f604fe262d70d10c848a47" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0aacc5809b7ac20b3247f22acd597651" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "96b2501d48856d89080979278eff6f94" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dd30c271fb41a57f9a86960e2cc4ef79" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5628e6264007f8a06aef910c683b88e7" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 20781056, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16855040 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 16865280 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 17971200 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20183040 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20193280 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20207616 } ], "md5sum": "1739ae899ba2d136e36e2efce899a9c8" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "aab797b70eb81b0eed84f18c20ba73b6" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c464045f236e3c1935336221064eb13d" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0159b399a5754b346b21215c5089cea2" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "da9b58348ae20067669d8e994749fd72" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c1008264634bcbdda1df779d3d8e5e3c" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "cc8510ce07abbcf1e1c08f2398446a0b" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "7088cfe905759d49940c3a32f7f2e02a" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "863510d6228455c9b59662cf72f23c04" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9dca87a1f9d60abe82823c6f3711aa53" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dc7a18be1c98cbd8e0c3c577bd179364" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "726f918b1ce770cd85bfa1a783f3ec70" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1b11d8b2f1be4fd1f57bac9fedcd2025" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "c2603f77e13f22011aec17399406430f" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "7b12e585c83c91f493e96f89654497d4" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "391243342e013d0aca12a7ff0a91b19f" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "788050f2273aa074513878016ac231dd" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7aeda34dc7807b74765bce8d99e9e9e4" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "531af9c25a81ea55662f35bd06aabbb0" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f371ad0de197dfc31226486efbf87165" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30959616 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 30969856 } ], "md5sum": "efe2b22d7d22b2417feb88f3462e231c" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 21159936, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2222080 }, { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 2236416 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20586496 } ], "md5sum": "db0d20497d976a417a969d3637de5b6e" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 1728, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "51c7c3dab50e4cdce240f114b0083604" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 640, 27648 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dc53216cf0095d17f3b14caebdb2e93d" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "971566013ab9eaad18dfb9375ff95608" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c7589a155932682b476494a45c443e26" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 31547392, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 108, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 13527040 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 40, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 14632960 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16844800 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 16855040 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 16869376 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 17442816 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30550016 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30959616 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 30973952 } ], "md5sum": "18b0e7df68561928ea5ce1b2933c6f17" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 13516800, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 } ], "md5sum": "ad2b67b2ad43dea4d2fae00d836e7329" } ] }