{ "metadata": { "ParamSize": 195, "ParamBytes": 7642159104.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "lm_head.weight", "shape": [ 32064, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "20f767987117b5a1dcebabf6ceb91a59" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "61f1db8af316e585e12d4e4e3cafc80b" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b50d9426082783bde745b574cff002a4" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "c733182448093f13578442060423b75d" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b0e7e33ce3831ad8c363d1fbc5a22b93" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9de05f8c7926df22a2425b8f4f7f469e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "de4cd54fa91e31ff4a0c54bdf0336fb8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0db08c985271c24b7181aa45904b1048" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d02d32d6b68c620c83485f2b5a71f00e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0cf699d4c967f1a65322ac6216e53584" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "d3104c9cad8990753682da4bc527b3d6" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3931ea704eea12fb030040fcca194edd" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "41fbee0d20ea0a6e26abbc5675e5cb51" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "620f3504e3c0521b706089a2af826182" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "846695f1cbdeea90f36693780cefd4e0" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "80d0f6ffe26b45cd6b208e0c35038f83" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5a35a17d62fe5090b3edabb1b7183127" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bd3dffebfe2ae78418bf5d2b6dd4f56a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "02a9f85bd57b6a44de5fdb1791f17bd9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "63c8315f3e50f1c62d47f504fcfa8378" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "576272d67f3cebcd1bfb844e6fa1f458" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "538048ac07d0eb93c90d6b7e92ffeded" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "58001b07cdd83876bf7ca647f0cd3dec" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "151f95512bee7130689b227268b4a1d6" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e1f625cd7b2e37460c8c490460fecf8e" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e935fcaa47bc2044c74855e0287d6a58" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "abd971f2a5417f837893d81aa1dc1cc4" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "25c8ed95069263a500b44c49da89bcfd" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f110460661855ac871075364dfab4429" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "842cdc072cee78be502c84318a158d35" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "e79e3aa3437df414c4af28889a6200f5" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "370efa1f389053127f8b2af77a858b6a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7a3c72bf046d2a21add3cbec89ead424" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "15c8c746f7e161af91eb4e63260919bc" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "afe888a5f5f722523d957cddc3473e39" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "97d464172344e5777e615d6d33a3955c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "69cfffdc645acbac107fad5b0412ed92" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5a58ad89dd533102c784f675443873b8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "ee622d307cf5e57b9ab05541f4bd3ffe" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a632be1e7644cad9ac7a340d5101c992" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ba25b183d0421b40ec08efeb78f71448" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a365f25fa254776b829339c6a8978632" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "5d9f3d17abd4e3251daf4b4fc40a006c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32064, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "44a86159031c9fc01a395e20c3ae4506" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6337d0a967d6d9de48a9632bdb3bb34a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "47da65a6649337c973868098b688c5ec" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6cdf005a73aedcdce54c391cecfe4007" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "4523b6093d311f4c2981978644cc447b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9a8016c0f2c44a9fe895b5aeea776f36" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "91e0bea7c0cc087dab66b87c1e60f1df" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cf316bb79d6513025b15970667e457fa" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "bac4b74453644003a166aa9e322bcf93" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dfbfc42776e21df9a58a2adf15c33b61" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "586b7352fab4a30cb63980ffb2244001" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e0ae00acb274a5e7347cd89289331a1e" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "b1c9194613504621776bfd3b3d80bf7f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fc8ec96e63f5f581e88af35450a471b9" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c77eeb53cf21870890bac52f7f4bfc32" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d2264f7b47e3eb57189a18227c679988" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "c4a5d636ff6b53339dfafa7e70fafc89" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cc810331b696f85e1177a22ad5d0d697" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c91108c26fe3dc89e3e42431f3194d75" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "30f5b67a47cbb963021ef210cac88048" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "19542189aa95bb61987ac500652af343" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "93b58d640fcc541e074e36f2aba1124f" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dfa86a99e50e8793e5adf55a1529d725" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9da84cb116d5b77b93561bf75d1e6d4e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f8cbfa3395d170c959e8fea958776793" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cebb647ca6a068832c7ec017c341c978" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "78c330bd2414152be7dc2e911e957eda" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e4e48f04a3b1979e07387912700fe582" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "afa51c66d784081e691069445c104549" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3ed0d39c35e4f1aaf76c9fe80ac2e6f3" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ba05d36f1ad7ebc97458b4b722ea2306" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2217dfbe652654d167e69d3300a48c08" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "dca332b1c61965f2c062f7ed27a3b6a5" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b3073f405ca3bfc29f4b6d98f3237865" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dfcef91847e9edebd7d92e4f40a51766" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c657399d637544a9536be949361bf07d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f5ea2ff98ee7ca89869e78f57013c2e5" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c6fef22e70c8a9a23d65e8df172f9be7" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ef9593188a61ebb071d47d4aa8817549" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "311f97f74fb839a10ded0a8fa57293e8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a18776ba20cc4ba8dbaa04ed93c3c3a8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9b4a42ee4be8368149e7f0833f9a91f2" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2a03b2facb7603946de71303ce0d2d1c" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4ef17cf3c9ad9a04bc54dac1e1da19d6" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "2fdf5b3a42ca9c376355527f80d8cbd1" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5427978f3daf565b02cd6521be626a41" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ca9f603a7c724662c7d77259f46e999f" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fb0e661582e237b9d724bc508206cca0" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "14ea17ebc1a3b3bbbb60c933bd623f93" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "81ceefa209f60b613bc97f06fc95f19b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5659c29c2380eb15dbb1fda0b80801e9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "eee5db42e23a7549f45cd4961be23479" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "e3eaa729c1d8713926b584842d0737fb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9a5c4c8beb921fb2565ce38ff8a02e1d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6369b00d4e67b6b2a0821aeb1119b7ed" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "565b807f59830e3a5d74297f54599d46" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f8357067cd64c23918897ed4a881fc5a" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8d5d78c2615021c387748b9f2f656162" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "caa81c64634b34fc00ee954eab5e5e5c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d81d9883914d4ca194164f07883e8195" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5737de756b6a370ac814dfbebb2d3c97" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "327848c43160bceb9fb0a11a5b9c771d" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9374044fa1196bf136569ea5f5208628" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "230dc5eb061bdfbd5bd92c2f45ef4089" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "16d2105d6473164ae5d49a6aca32de90" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "370bd2f270f554214976915619b76df9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6ad7d02959b75118951cf941ed05e8aa" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b9818bce2082f0f49a7b7e7ef9fede56" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "84a51380cfbb01b37253796006d537b3" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "52e3aa55438f6ed0e2d4f118cbfd1007" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8791c0e8d96e258f56814a5c8535c6c1" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "55c69dd16f67bc22b34572e969e29f29" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "33517d2e365640a76bd48a0cad228d6f" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "ac05e6011a5dbd96150cc70398ba4d11" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "147efe13dcfb6e752d318f82a9e14487" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6da52157ce76276914d2e16b46bfe378" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "dc2db587d5b187240cc086595e79d06a" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "918d53e693e991259a33bf060f3f15b0" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5ab2b3b46b94a2bdc489b4f2e6190029" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6e74f30bb5ff6e2658807bc792bb5d19" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "32e1b51953337eacf9c25db0c05ace38" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "af3fc95837e5539f51b645fb07d58675" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "49a5eeccda1b67320cb5ded507e2bea2" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "86c524ad766428959ea0e1d2552645a7" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e31b7260797f27c9adb2c5aeb0c6de18" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "51aae5483d993159f006f200fcc5009f" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 19273728, "records": [ { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6144 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12288 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18432 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 24576 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18898944 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18905088 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18911232 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18917376 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18923520 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18929664 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18935808 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18941952 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18948096 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18954240 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18960384 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18966528 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18972672 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18978816 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18984960 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18991104 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18997248 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19003392 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19009536 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19015680 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19021824 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19027968 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19034112 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19040256 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19046400 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19052544 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19058688 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19064832 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19070976 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19077120 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19083264 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19089408 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19095552 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19101696 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19107840 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19113984 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19120128 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19126272 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19132416 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19138560 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19144704 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19150848 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19156992 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19163136 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19169280 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19175424 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19181568 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19187712 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19193856 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19200000 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19206144 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19212288 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19218432 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19224576 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19230720 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19236864 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19243008 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19249152 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19255296 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19261440 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19267584 } ], "md5sum": "516d0f4e466a17bb74caaadc17e4fc0c" } ] }