{ "metadata": { "ParamSize": 709, "ParamBytes": 18431289344.0, "BitsPerParam": 3.2800260261877354 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 152064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "29a5f68b210c4b4523b20638a1deadf6" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 160, 152064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "0c988642a75b649ea59b70bf1cd281db" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e673d25273d43bb84df43358c89d5801" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b4862917aa60e1bde718f59fe74f40a9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "98c00e3c4b026241e96cc4a0e656fb2c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "4d4b2e1b6b19c5450970e25df1e70b6f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "89f3531d0a6e452b21c9542e612c4e43" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26583040, "records": [ { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 10240 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8857600 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26552320 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26562560 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26572800 } ], "md5sum": "122e4270a83cc2f3720eba839e67865d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "56f0f3404268d01592c5514098aede84" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f48aec1ba92be5169361d38a6f84308c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "be7465c122becd118e6bcfdfd1593970" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "68dc8962c5ad528e366a2f8df2b35d59" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b4ff51c369196d4deb365ae96acbd492" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "020323de6386dc23ac97445e3b198fa3" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1f5c71be0f0ebf5a6bed7b8d09c7355c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a3cfbd8a586d493c251b120bf42f7c80" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7199e7880c92de6727792bc0543becbc" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1c8ade275e72d9cfcc0d73d20849a45a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "93aeac7e34ba94a629a554340046b3c9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "33417a96aef349ecaca9493d8df74d00" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "0834c82d1042685c3e86afe102f7cdd8" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "71ff9525b45b9572b3b5ed8cfe3feeec" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "846a8ba7445462b2c1596b41ee75fc48" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "784f56aafd99f0dbd5a0fab661926c10" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "df0717941db9a58281b68ec5959b0ba2" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0e772d04717efdc80fe6dca2c344a66c" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ffc67032610fe0cd50dd35202c76e221" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9c38db78b95ad241e84ad7b77a107f2a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "a023f6ed3a445bff91c6df4e098712a1" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "84d05ea805b52792ee1bad0f206b4be0" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5249ab57f1c0d99fba75d6629313ac01" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "93f901bc36e9ff530dd8aa90d9c54c3b" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ec8f909886663a22d696bf81ac15e305" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "7911d13a43563be4343b90691bcceef4" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "394a6f3333be240730f93f3fec281900" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f9c0bba29efc5350f16ea24c27ba301f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9d8029f91b6677993cd0d7e13d31f52f" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a0ad9689af86a16eb658ef365138df73" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "56cf53fc8d900bde16aebe08af494c48" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1ac454988a29f19af2b71fce9d0ee9f1" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e6a1dc4f8d7c9bfca16fb8bafaf18d4f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "7a50e9c5e7416aa0a5ee1965fff1fd80" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fb787441f33d67f75506fa39af884b40" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dc2f21b77ddbdffb1f090590dabe132c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3ddac3945e00a245c8abea852bc62974" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "15eae32962541daba70332ac338a7c01" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "d8b62f51eafc07418455bb44a5645508" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1347bab082e48c0d0c7179d8e6677837" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c5c0cf6d960cd20180fd87939cde6ce8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "ae0b6ab9b1d958eb914df8363aa2d782" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6ae0a54c67e9dc31e17cb5d6812ab20b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "764cfb020d278f5e2c2c762b736c4edf" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "41c530ed61968641509e98a3b963cafc" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4b9a5435f0037ba90bf5ec684cbdfb1b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1220635f11c086cb53a16c592866ee4c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32495616, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23613440 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 23623680 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32471040 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32481280 } ], "md5sum": "76e04a17d500cdfc1b82859bd40f097a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d24551d0068e85eb9e244a365316d043" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "45c4a1e14618a345470845155beb4d3c" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "abb99098b0320bbca12d5f22f929bd70" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8f7d49002b9ee0f52eb9e3712e7acbd2" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "174d5240a05d094bd2db8614aa1b1f38" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3de7f604faa197c89f9bc47b3de40c8d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "187ce65249172291448f5990c744e966" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3caae479fe0043e2cee0ff0e587a548d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3f247771a531f4a1eefed5e9e52e69cc" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c5ab012127146a66406fbaeb1c290efe" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a622c7bb9d7625a781c93b0a9824acfc" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "96dd6b876b330c946c598f87adec7413" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "680e0b19b3ad7452494e67beda08151c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fae67087ec58430c0b8c2c90a469f3b7" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f78a8b1211e1b097d575252a370862b4" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "22a85c2d840aaa3c87a2e6865d64d9c0" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "abc3c7d8b4add9eec393303749e54d2f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "af511636198210dffd03f89afe46ef28" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cfab3f84310c650fb44632d661b94876" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8d5ac4c0b6db85bb316bb8b7a6d01927" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "0277cfcfd88e4e08284d07627028ce1a" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a186d22ddea139fb46d1d11136afb2d2" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d011bc75698e2352028bd33e218cd1dc" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "9dc8ab8cc65050b07058cb937ec90a0b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b16794ba24ab50b9b23d193e7b220cdd" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7cfe169d7b5dfe43ce8b6761963dcd6e" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3f2a1f00322e240fb7f91e0dccd97194" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "36463c34bcea08782b3e1c09cb13d916" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f864bb8e7166db3834a9ccba186a064c" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "737662ccd511af252c55819d771926fa" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "487bd494b618f7caeb1aec4b78643ef4" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6e30a2a81c513b04301f1732e8130ed4" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "08b2a8e82ae6cbec11b9a5fa779f3d42" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "877b6a262f08cd4c9898bf7cd381101e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4406cf86e7e6e698c95cd53048633d91" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d46681eb6362314a236deb691f3d469a" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f37f935716d186168adbe5bbfa49310b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f23f5d84e0b4bdeb68f21d968fbaeb07" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "662d09a7041e058d1f9c522938ed575e" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7e2370f6dac3f4cd59394fd3687c3e9f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3d26f5b78190fed84c4d9802d99faf5c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "8fcf8287bf52aec875f656ad90e2a393" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fcbc8b1d1f8290c402dcf208a07f0ae8" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1bd63afb6800e5a67acc3d6b4b85dd58" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "1adc6e11ffbb4a45de5b87e37bf91e0f" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "847b5c25caab72a4e174c0bc45d7a79c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c502a49728881362286d60b5d2a66104" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "476b6898b940b0475250d84f71d07948" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a6a7e23d9561a6ebe2cf55c114784fdb" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3b8350b47644166e903201c683601839" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "269d34a8f32051e88f70c0d86745e4e4" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c2789e60a82028de5c97bfdf3b627197" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "354f2979e4c0c00bb00bbd985122a5a5" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dcc00147d5ed025cd52c5b21bab7724f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "baf8588c83083e942c47c68861841896" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9dc4eb9a1b43213f046349e653952ef6" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bf9b1a965d23582dcbf520e4fc37a8ca" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "1316e569c0c76bb759c5831a8e503edc" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cdb24916dc743b6852eb55aafb0ba427" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "44b733f0fb214ecd927d2a23b5611636" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ad4c3142f61e20235c883ef2b98e147f" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2773e67e1d2e3773db109b188bc8f013" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ec5388b375c27311ddd377e7be47668d" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4445f037df9aad090a294f463dc8b55c" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e6b33a8a39c7fe0d22cbacde7aa0329b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "751bc665a61c2b0c7c90bb7e16598e3c" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9b15c3cea57b75c38ea9b74618fd5878" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2718da26fedadeb64cb2f5474fe488e0" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "7957b3f4ffd6c7603f8715e57b528a41" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d2eb584ab7d4f91d40a5a9ed4577a071" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "25d1b9df7f2b8f180696726ffca07b88" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "a5ea0e660f23dadc5727d284d38bad71" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "13b46fce9ca001dca4b1fb07c24e5eda" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c680f37e8fcab55f253acc41c0141517" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "45eba1a81c7bba06bf861d022435dcd3" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "87ce1b96e316bcd194bb539969e3369d" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "14e6876288391ab5aea9a6ca89b55e1c" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1dc143045711d9308c4c8893f1b9d516" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "119dc27b8d636ac2fe2c86bd05438eb3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8c89d322275a2f3deca885e0c059393d" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2132397b6ecd2a6a58410976fab53fac" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "4bf83de1cb9ce08d4bb5ad86e73ae219" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "38cf9ac226f1e385dcc1a75d1979adcf" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0f640449b5551024253a25579ee4d295" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "53a91d741615efcf2c5cc82bda1f3cb0" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3c203e3a4b4e7ee4826f2155fb8c62b8" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ca6c4227453b2067cde00392b39310d0" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bbb24a42cce76d0505f6c374f8ea03c6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6b0bb129eda0cc88563ceaf58ac1781e" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "b11453043f910e570b98f3da6135063d" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1205d70a3154034a88ffce6c896cedba" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1835216c40da49700093f2275efd6233" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "023ffa2caf67dc683a3160a109830e8f" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "30e8c5ed6b95f53f6ddad2ddab59e4f9" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f666b9a7750b8f1215eb3537b9c451dc" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "26f36a9734b4fa01e4ff38c694176278" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3ca5e7dc0189af12b3dc443a52b4b556" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9b8098f053fb2b0f9230fe7a81733a2d" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f300636f166ede6c5e550bd7368c417b" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "999833938405dfa54ae3facb7421ff78" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "83d76f3990525c59f34897f7d581cf71" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "45fb1714145baab33dfcf1a03bbdce6b" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cc403cab33e18f282cf77503c4946c73" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "02e1d605959308bd7a2e02a0be536aee" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "44ea2b1bfc071315e8bd28bee64bae06" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ef5d8eaf79666645ffb75e6a9420feac" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1fa54cc75187a2be8e378920e51f9e51" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "26e49c1b96822567f75a03daba85f57b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0d698471759dc78bb1190eff6812abd5" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "91484804b9f09409821feaa429ba7315" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "54639121330c92b71190098959593d85" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "15b9dc0032642b89d7a2d6cacad7396c" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3ce36ba31978638eb52326d05f6a8238" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "444bb7211854d8238aea3d69784b9aa0" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "3c002ea28fa8623d1d832eeee9cf2235" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f35e420dceac550a31516fac474012e6" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4af5a17243127749af65e3621d7ce2b0" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "c545caba0a40826508563419f392e15e" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6c8604bdf47491d50c41b44fd8acd859" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3fdf75b2834d4ddb5ce5b0a61eb7654a" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6e6159bcf70a5315305f856d5103503c" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3602f9268c58f3460d7894fe9fc5790a" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "dbdc68734bdb394076789b48ea0a14cb" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "39839644cf2cc39521eac28be6c801cb" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "74fc2dce4aa0f47b49cc1cdcb6bdf4db" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "32d8916f64d9244664bb768b0852dd5c" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "74b3d8bf329fc890ca5215afdc04c613" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c0a857f7f7e900805e0ab7a755bf2d5d" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "93daa1ad48a45822edf4caf920bca81e" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cb0695487e8a6e80b6750252a92523cd" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "dbe27d14773304382a3967e4f9b1c85b" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5f936b1a4f0a56f66fc09147e5cf107e" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cde938fd1fa3a57926932fd93ec87082" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b86d9bb1bd53609883b6b873a903cda3" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c848ebf4e6e94361d649ec0034d11e05" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "219ed83106338b467c4223352e9729e6" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c161a5a4ae41a8d1b8b110a0e6dde74b" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e851c462142c8047c3ed029e2aeb4505" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "55bde821fe353bcfb77fba9ffb0de73f" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "127ba1a054c6b3295bd8ef879e026b2c" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e1516faa755dcedef1440ea16cbfedb4" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "8aa7e0d2edd649c48b1d22b34980865f" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8d8b714cab6461ecd38eec0d157fa7d8" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "86e49a675e019586a35947c16a456e95" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d2b28ab12bdffda82a540eb0bfefc15b" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4a700c32da7e5797268a1c5a7ff7ff9a" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "8a3e123a35085cd15857679831a5780f" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f06cfd93807fb1b93b7fb6a7befb4ecc" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "19b088c0c3bec3ecfebfce88354a209b" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0c74e8a5cb133045c576fadac50650c9" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "93abd5d0e64b5dff36f0322496dcf46a" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f34b85e60d3440ff2cb263fb07fdbb4a" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a459b6d6f4975cf03830265997e733c7" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "257cbe3dbca30e8384b1f34ee6915605" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "535dc13464e092fbf9207c4810826c16" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e22d9dadbc12f7482218c33d16f225d3" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "996a3d799daed0cf09a5fec8bad005df" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f14220924fe5151a25132f11c5f9250e" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "45a62853edc792ec19dcc2775d5c9cc9" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "13d78c4fc3f4b33cfd8b8e416b5178ec" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "57a9f6349f3c8efad4a9ebb6876f6a40" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4f4b4f502785c5814d0a1b9f80e0051c" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "eee5fa32b1d61b500629aa2218c8db96" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6db73556a1e804c85d7c64a28d7b7133" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e463a160d401118d7cb36fb5b0c567da" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "9f11c8805e3e22ed33879718764801c4" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5d790d225ecb253f06e907f7cd994ddb" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d79cb61bf168e6150f8a149c0b511465" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "2a95393047c1b8fcf3cb2f131df6022c" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "99b55accccaf24aeedf434ec439fc2e0" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e139eb30cfa764677ab35baf1963cab5" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0576b7f563b3ae5a9dd32b83b211018c" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c6b96edacaf815ff286faba14a3154ec" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "dff7091b743bda95929e4c81a0c775f1" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1738cbe7844a734db849e0de41ce15d7" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c4bab74f4d66ccef7f5c8eac6a08f972" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ec6ca89c55fcf5521bde8b1c46de7024" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a7c60383b978cdf007522ec56b255979" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "804a6e215a417259e58510b9058f2283" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9b1ff57212e704d72e650919e92b8f5d" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "23e3fa31b222b0373263671fad5e9794" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0a1d9accfdf7d39bbe65e994d66954e9" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "382b86703cfa939205c40a52d856c5c4" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "3eb8b34b79dfcdd459522e46e60f5e3a" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0d3a4b56bc1486cb8b6d4ed9267f05ab" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5ecde7c90a0a3be42c0230d441b4dd36" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "2fe9291c3fde725fd50a23d4a38df598" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ffe379aac74f21807b244cbc18f6cd0d" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a46355254aa7bae0ec2724fb80bac82b" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "dbed73531b6d19adfed24cc930ede66b" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "15d4666009ec273caadcb3cc0cc09310" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "52b992af91db31850686b36f350b8992" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "bdeccd3397b6a0824df9c25b148854a7" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "39131f1f9447e2cbd4f58faf5a5222be" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "2a4cde65b08bed14171908c730aff832" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c17701f107a5f1bee27cb539b722220c" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9e18dd80b869552ab424aff46121046c" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e8b91767ccd7934c224cfb9a69efe6ab" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c3c4ab1b3fcdaaa5a1c7c3e76c22388f" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cb2a3052efd71b1c5891476625aafd04" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "615b0dc0977de96e8f569595a7ee105d" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9fc12f4a47847d4b83e98fdfcc3e29ac" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "17f4afa6c01ec9017a6cab40269088c1" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f708b8104ae1397a9bd313e7b62c5dc8" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "2c674220f2fb1749a1974e413489711d" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6bfee1efa9e1930fb28eef7be55b4649" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "441fc278ce36832877427eefa7ae0778" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "e2f79c03c3f066e65cc8c9e2de042b01" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9c07ddd6e76906f2a21ef4151c5a2392" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "038f063f388d74fb222b923992e2974c" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "8d4ba788fd3c91dc31cf5ccfd6b6b9ce" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d2c59d06b2ab8375b4a20c87c43ebfea" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4e43e29be64b3e596b668ae52ab57fac" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "bb9e05fb61540035a11867177c3f114a" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c3b2eef23712066d8bccdc30849266ae" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8fbdc46dd45596f82cbbcd7b9bfad08a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "82c8155cdfb604bd3c01ee380bb1d4a9" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d136e1a2e98c84442319bac9239241c0" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5e2d15b55d5407698c98bf93630db63c" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ad16fe0e426184d7b504f11fa08fb291" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dda154feffad72ec5518bdb53f08b09d" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0fe10e782f048dbfeef740e6d25c3b6d" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4073d0bb58af737f5dbb0906fa17e194" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d8076c82ba1192eb16535a46e4b23580" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7f6e1ff1c41702e41e9e8e831d261d07" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "67f45b66eb48a44b7bf6cdcb44512828" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d76ba94ca4ecb281295e2db235f000be" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9163210edeeb83e01da065d5e6d5ec59" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "84d74e201aa768c145c80f348d8c9c5e" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6e6786d080e1203866d9b75fdc120482" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "844a8b0bda88f73b4ea5c2007a14b1e6" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "711326d452897aa5654e8345fc79d0ff" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e02a04f8c952c99bb88de13af45b417b" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "153201109e9ffb702302b22e8d831ec0" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "c929e4e171d404b2a3c2bb06c5d60bb9" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "00af33186be09a255f1864033f1d8384" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7a5bc1bc63ccba564e7f5a567fc77053" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "a0b4ec368e2372fc730c196854c9b1ea" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "775fff63051032f9e1f85abc2094d304" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "81b4a56f60c7dae21588ac71dd9908f9" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "781991a2d4eae849608c196d830d765e" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6494d6816f8e803adadf21215acea0f5" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a799c77291897448318e76e31fcdb8c1" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aa9d216333dd1fa49d5eb96a557cfc3b" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e70de91835b85fc6eaab685cc5bd2289" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "69df24077b8dbc27ed796f88a5ad4372" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4f810fe00b10b936b0d6a9f37c626c0e" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ded292a50f1ca504193e0734b46c8c68" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4409d0c25f737e4774a74e9b7140e35b" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c709bbb3fa403fdf0ec6b4f442c6211e" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 160, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "cb5f0f9ec107d96a0794b497984e1e2b" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8e56558b89fdd5716d33a8a31c3cfac3" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 864, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "9c7335ba1e18e25178bf0036fff87cfd" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "73440f07f9a1a251f6482b6c15979ad9" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 17039360, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 160, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 160, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 } ], "md5sum": "e6e9eb97f56192689a6c0fe98c44896b" } ] }