QwQ-32B-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
8d2383d verified
{
"metadata": {
"ParamSize": 451,
"ParamBytes": 65527752704.0,
"BitsPerParam": 11.661296738129801
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "d791160bf2a34e8309d391f87f387762"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.63.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4e09ff8c6cb2104355263ca5c6b63ae2"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.63.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2a7087d0be276da9263ec1573234b896"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "34c980c6bfe1c298750d71526c4c5bb4"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "28f1260c625f1f9b65e81b6f320f9271"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "de1145b99addb4d99316d9bef5619154"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "de60bc6d6f3bd40c62ca6e4b48403116"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "72aa77160671358cc6e9242b59763fd3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3e1686d914d18250516ca0cfb3395d8b"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "48ae1411464c71bfab4f0810c0e84be1"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ecadb507f74a3b743ea7fb5462e0283d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "99a75ba7f30ee44625cd2a03bd818f8e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "51117130f47bc622fdb5a1f4cd6790d6"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b82ede23f4d2a6ea97fd6cdc4d1fa408"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "328fe3afd5e5c62e21880d1b2637d95d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "999c50807dc80a4b94bd3b500d921d41"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "76e6d89e505360d0912bcfadc6f89cd3"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "eec85f5eafe82d19000b7190514abea4"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c9838ace4ddb94ad818715cfdc27d33c"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d49f5fa8c56ba55ed1b0abe740c8819c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4d162f0b51cbbe527145e244f64b10a3"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f7a9727934fc21df120586ab73aedf92"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e70ed86a6a4499919160d9213af5bc1e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6d6c46011760f35700228e1d688546df"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "10b082bfbbb0c4d18359e4443f26acc5"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "88c79f45e4d589e7448a628b9ee82fa2"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e573b9eefd415ca24dcf4977ccf13f66"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3d136105da660077100d02548c9b01f0"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f228769df0719472c271755a68fe651b"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "20946ebf0fd15082d264abaf7b1a4d28"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2f402daae15edc8bbb6a0941b5f06aaa"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4011e0393e2734eefd573b540be55b27"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1c874ac54eabdb989febe7c67bb6d4ed"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fdbbe90c9db9eabfa3a65021784df663"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7514de09c098ca86605da8cf917cf608"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b4b441014eacb8a0d99e2415ee1f943a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "141f0c24e3099add2121d2f80375fc4e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5298173cb1a1e8aecee6285d98ad1922"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e71db6986bfd2176c832a536094edb67"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "63cfa04838bcbdf473b63486eecf50a8"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "132a9c3886e545458ac25245ed1f7a20"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "62b6ca1ae4c8deff150acd32136cbe20"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "404c1bf989ca27db3cc34f1696ecfd9e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "9589cc45ff63fa6ab2db8c92bfafbeb0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "71102bb3c022f0493a0d6fa507342a2f"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e97aea5b4d2d96f357fe4e204dda82bb"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "cde3abdbee692989e0cd279a8a72b629"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8f53ec9196ca5cd99e32919666a1fe16"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4002b5235877b8e78e36caedac6bbcd7"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4bf8cdc0d5cc9d54ccf1a582c5925a30"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "70aa1b56500d433b066ab96c92a16d10"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a6e094e11d2b518b9f5d28ae284eacf4"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3c53f98cd59516a3698fcb10ff5da242"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "39437905301396ce2a89def5b8a5835c"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "09134486e614a7658f822e1f8c000993"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "13358a72d287996e898e97d0a82191b8"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fb1de77bcb54c88841387ea4e5519ff5"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "39c2f9f9d4a5f270d72bdeb7d8e39af1"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b4fec253eaf7ab5797050d4f0f091a89"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e8d823fde32a2cb8c032578213061fa6"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "74b9b6d08b1cf14bca58da4b00465d0a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6c0d83a13e75bf4091c6ffc1c1cd8b70"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9fef31ea3bdaf9afdf6d326946523127"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "2a0a5a73b4a51d6aeebeedf03871265b"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6e6df94c5cb1b873b85d044dcbee7863"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c6f74e0402234b2d5729effc60e3b404"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f9e88d39f06e1803e2d570f18fd566a3"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4448c612104178bfc4bce175b2f53bbc"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dc69b111c3b8783d3b8ee5e8d094cfb8"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "cd77a6d11e583ad62eb97c41206280f6"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ae94ee347a7fe42e9f7fab4c66022eab"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5d1c8e26b56facb3d140fec503467024"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "64c10cd7e300201cb474fef604d115de"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6f0e22e63017909079d8c487d1e2aa06"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "3a63a6bd9fd3e620a923b00daaacda94"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6fb871de54b8cb8854e0454d3763d3a8"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "81274a3085df7615b203902527eac3fc"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9e80ad854023128b9362dae6a1305fe6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6adba822397c9110477a2cc4a867b7ac"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a5815f0c44f09dfb8417c8617306ec68"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "801478a2b80c513e334b730ef13b93ea"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c3dc26aea4f356e83d90120c5a7480b1"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "65c2e15dc90e687946ff3122e54a386f"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e5e785fcd78acf5560222b97040efcfe"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "87edc205bf98a71362adf96539da5ffc"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "12e2a05264d93b3e4334ed3547247a4f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2a2ca8ed92ccd62c8ba29415b0269358"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "bbd8707c47b71b022006436971123d15"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8563b7a3c93480de611a89da484b196f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "1a8e0e1ecacc878636956a668b39ef0a"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "63add0eab7a37360ef2d0e7e36791704"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8fc1ceb801f7273978467de4c0befe5d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "943966f6c055878f4a64714701edff0d"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "41781f6affc6c7a2dea8c9ddfe0c9dee"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d1d60b561c2a3cff1315b1f1a52bd268"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4afe3b98f17876e87883d04c43b84fd8"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "51d5619d22f5739bc3e3047e216ec6f1"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a2632684273225489d119cfea2617fef"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f949a0acb869e445bcf9260784692358"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "11e3eefaa95bb3fab9939339fbcdc2a4"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d28b682f817ff8ab2f6bc066b543ae54"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "441150bec33097916bd3d93551d9193d"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "059f81ba6bc1ada5003e0525e51c9f06"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "9a01d32c1495e024f47180b105e820bb"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c1b04d3b9270ac3334a6aef23076ed22"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "716c81a64e83d4bfc97e8ebaefbcfade"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "6bd22ea88219cde6752c8d52b85d30af"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ab5428fc8ca60e328d296650e8447040"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cd0179c97418d36b94c3f02498ea2191"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b4737e0fcd2ac9dcc0ba11fb318508f8"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9f80c55aec4055dacbf3477e53522b27"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "921a7b0ec4ceff95eeea346c8300b88b"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c88a2ea778cc4bc16b4ae8d201aec69c"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "cdad35b791d807c0abec2d1292c98566"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "57d4fd67e1e16211cd9565b44144ec16"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "163bc0816ab8c6e16bcff1374aed44dd"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e5cbae97e09182d76bc3b4960fdb86e5"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "825cdda9566c042b382a7037732e8af4"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "be0416936595dc9be5f5ff8da3d5ba6e"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f9a88414f54cd0d0a3941900c00ee40f"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6a71cc5d4bd25e97e95312a5fdc6cde5"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "2825585bc500b72961141e88d7115c3a"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "308bfe0c13cc30a95aa884bee1b91bdf"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f4884e100e6fb2318385b28b2da06676"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6ee6fd83f82e5710c1a2d748b6a19019"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c3036403bdd07528b7904b86114553fd"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b9125949b981c8006d5785c1d57d62cf"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "db998fe1b3cf8c6277674d880f6b1d15"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "27ded8887fc4c61686d4e9947ae4b93c"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7b3bd02717e59dc1b128ca33942db611"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f4037fd2dbf1d1addcf6625b11be898f"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "707ece2b1a03bb672859973d0911d34c"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9d129a08dd61015e1fdb9008330cf491"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f6249c47b4a35454ce37017a325c2056"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "939ebc8b831f2ebbce74a928154bf2d8"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "fb27da9d346255c09fc08a9c2ec37713"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "675a5d0e9142704ac32354b45707ab2b"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "60fa96ba49c70ad8faa4288fc3756ac5"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4c74709bbe7e51e3fd150f4e4416d492"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "52764e4bc832b1c54487ae6d596d53ed"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e380e1d39737ffae41a5d404aba1f1ba"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8ed820d4d5bbd66f121024b74de2cc52"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d5d9f7a02c58f90255bd52c651ed4926"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3121607f77032e910848c0de0a7ab645"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "442245eacd8edc0c5e4da3d50e8be824"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ac002b3b82094962fe62a84ef3e5d0f2"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8aa73b2f58776a2acd17b3482be0b95d"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aa0eeb8b428c6c0ea998f95da3b9668e"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.36.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5f857709bc20e15784bcaa70bc5916b5"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2e165d971d846e9c3dfd326551c4daa4"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "718964ec893ba845af656d4773abd807"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5dd2ed4b68a50d6eb30c90a1fed802c9"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7152f4d5a2bcd89c3e6c9868912b3c46"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e78df3a7587b9391479f635082b0c17f"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "2a6f0fe445f2d56ae871c9a851bbcda3"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "eaf3fd0e1cea74515c96cd01c55c60e3"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7efafc5f1eedc8ababd0109f71e333e9"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "73a799be6147a89809308b50d66d6fb8"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "332cb325ef640506b51fb8f245f2511e"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.38.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9bed9c69e707ca617b30e5d48156365e"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ae32dbe8be43feaa415642a75f60134f"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "10eaa9ae8a4afb8fc9a90f3d3f44d177"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ae687d08dbefb6787a9afdf4aaa67000"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "297b415bb7ad16c12e1371deff2b4f6e"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.40.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "95ecd210b6532527556fa10e7734eb4c"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "57ed6ff0c0cbfa6de822df634c4582e3"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "fdcc0c47419a28cf43325a14a0004847"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "730690cab16ffc277aa6aaeaee2a0744"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.41.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "dee3358f9c9ed63dd0a537eb42ebc228"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7a2b83346648f90786d3e2ea2fb104f3"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1ebe2620c8a7a67b06b7583e39f4aabf"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "83525533fbd46f4f61cc17a1296a04ff"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.42.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "82aa7da633a1a41359e1152ff4570c21"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d6618d9501e4bec4c5e51a57dbc1f358"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "565f74574e3ff134b9ce5521aca49d15"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2c0e0d4b5e33c62a4c053f43e0dea196"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "109b23ffd6dd1d977ecf41202bfbf22a"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "08eac780f7b7408532914969920292c1"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "39b8408022022fe309ae75b1bb02def0"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.43.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "672f9cf4d6a2f02ba1cd37b65caf90a0"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.44.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "defee555836005fdd6a990286f63d65e"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c934f8450ea1f5495f1d6345d6526a1e"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e83d93d3e0de74449695f60aa75a4312"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0996e4bda5a5d2e48183da4f04473a62"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.45.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a331a0e0f5824e2615e6629738c4efd8"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "874f5baf9f26c1f8c2766066a2ccc47a"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b1b2709185519b3296bfe8ba650f80ca"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1d5936b64a7db3825f38f208fdf97320"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.46.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6fcf952a560418f3e62c863ae31b3e31"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "adb4c5bd811b28a32868c38c1c86a6c2"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "230a51076e69b467045292d32d01ee94"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a7f3a5bbeff3d08f21a6bff44017dac7"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.47.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "705f81b4af9462678ed192342a8f4f70"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c31458808b20c6a4b46ff12257e61d0d"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "213d1548d50a1a77f743469d69f4efba"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1d04055a1a81c5983a17db4bec5699e1"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8f0d5b17bab9bd4f469155e5394d0f68"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.48.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "cc502a0ac49cea989e03316ce9cee147"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.48.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1bf554672eceda66be78219d32601dc6"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.48.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e2361f1f1472266d86bea97fe0e77bc1"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.49.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9834e34e4ab5150d8d738bfd5bfd50e3"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0af0e968b959ad1eda4feb5faf98c0be"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.49.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6cc3b625caccaae7fdb563d830322d47"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.49.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "88cb9585b8e843812687f1afbb70f26b"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.50.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5b1d130fec510aa9720270206d2dd0f9"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.50.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "12b63caa0e2ba2b2d553e5a38a6be5b2"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.50.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "889bcbc7e466f21f2518a2685d7e6a9c"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.50.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "22b707482aadcb53d690bc65fbb10eb4"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.51.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "12843067b616cf4e3970aca0842e4dd6"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.51.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d8b1e414834e044cd00084119a426fa3"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.51.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f5cc5fd5388d5eddb2a864990297e8d6"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.51.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2210d2aa0a52cfce2f3696626818df3b"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.52.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "fd75f0f1bfc176c66ac3f04fd0034f0a"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.52.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e2b4dcca3fa43ba571394fdd87119ce2"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.52.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4323409a20965888fab8e3f50efba94f"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.52.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5459f23fd4ecfa482bd91acb5f682bc8"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.53.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "849e4f0021a6530f3c3a4dfb29f7588e"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.53.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "25192edadc9634437f6640de90b7bace"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.53.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "97b6bedb764a9f1ff3f4eadda55fdb0b"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.53.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6b0d38c78dbb341353589086d5812e07"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.54.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "38d9d25a1c97ae79850575b77851e887"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.54.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f21a3d35dfeb9187216a5c74ccfa802a"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.54.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3a8baf73d5cbaa7f618b8abc29126783"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.54.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f1f392901b9b63678bc211d7342a1685"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.55.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "37d567ef245bf3b26bd28518cb364981"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.55.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1d3a563140b9f604838f9b0f0f23817f"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.55.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "278055e86aaedc433292d40e7c892775"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.55.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3012d5bfcb7d4a8ef35cdaba8416cecf"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.56.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "64d3def0fe2c902a3403c915a0aa22b9"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.56.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "bb4d21b83a198e3dd2cca4a83c770f35"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.56.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b40b745dca1dbb715fa4712dee4cae2c"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.56.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9f25b87c30a7b6c6da2f1ba701f74158"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.57.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f52ec84202a4af1552abbc2a2c4c43e9"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.57.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "3f08d323ce5eda6a0b6dffa79704fdbf"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.57.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "99f95c581ce2fb80db1ba320da6626cf"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.57.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1c0c51fa0622c9a8b767803e6138670a"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.58.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "66041ad522f62f0ebc291428bc1e1ed0"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.58.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "71d85d114a3b69d3396127d1e1a6ecef"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.58.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ae0ccbb166525f3e198236f1d20e2d42"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.58.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "13193bf74eda3809edde4d3ac24a30af"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.59.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e0eda2cbc59f637268feec1448f37d6c"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.59.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f346dd85f6d4a76b20324be8d105fd24"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.59.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3fd86cddc593398669a1e6f6f4251b81"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.59.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "01c7e4056be831ac724b8f0bba684597"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.60.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c7f74e09f8d6d16cdd3951ed5d15ca69"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.60.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ffeb337601b74df8fd6aa131b3efefb7"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.60.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "78a03ac1bd5c1323c3795c469c7e0b37"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.60.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e4264ecee8676e6af939d1659f189a86"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.61.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b6a66ff49bcb0683b07909b91c313a91"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.61.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f7c28d00c344e8ea440c85bb1ee137f0"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.61.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6de8423e927678593d36ce757e4dd9f1"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.61.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "571027f20e9a1ff7eac09911c302a620"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.62.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "1c164336ebf22a8743a6e1d9e6c3b90d"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.62.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5c8d10e216c8156242314ce478f13e74"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.62.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "181e4221e752d573c6101b07886fe62f"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.62.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "749708b1dcd410c5ba7079d93b9bab65"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.63.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6f65ef8b4cab6e9b251d21f465dad57b"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.63.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cd3c39d63c60dd56f87e6f29f30e6b7d"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 2238464,
"records": [
{
"name": "model.layers.63.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.63.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 10240
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20480
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30720
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 40960
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 51200
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 65536
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 75776
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 86016
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 100352
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 110592
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 120832
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 135168
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 149504
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 159744
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 169984
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 184320
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 194560
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 204800
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 219136
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 229376
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 239616
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 253952
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 268288
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 278528
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 288768
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 299008
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 309248
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 323584
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 333824
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 344064
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 354304
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 364544
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 378880
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 389120
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 399360
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 413696
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 423936
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 434176
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 448512
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 458752
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 468992
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 483328
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 497664
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 507904
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 518144
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 528384
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 538624
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 552960
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 563200
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 573440
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 587776
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 598016
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 608256
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 622592
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 632832
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 643072
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 657408
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 671744
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 681984
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 692224
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 702464
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 712704
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 727040
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 737280
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 747520
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 761856
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 772096
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 782336
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 796672
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 806912
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 817152
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 831488
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 845824
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 856064
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 866304
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 876544
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 886784
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 901120
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 911360
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 921600
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 935936
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 946176
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 956416
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 970752
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 980992
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 991232
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1005568
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1019904
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1030144
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1040384
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1050624
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1060864
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1075200
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1085440
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1095680
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1110016
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1120256
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1130496
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1144832
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1155072
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1165312
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1179648
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1193984
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1204224
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1214464
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1224704
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1234944
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1249280
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1259520
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1269760
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1284096
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1294336
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1304576
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1318912
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1329152
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1339392
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1353728
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1368064
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1378304
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1388544
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1398784
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1409024
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1423360
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1433600
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1443840
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1458176
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1468416
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1478656
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1492992
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1503232
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1513472
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1527808
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1542144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1552384
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1562624
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1572864
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1583104
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1597440
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1607680
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1617920
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1632256
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1642496
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1652736
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1667072
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1677312
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1687552
},
{
"name": "model.layers.48.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1701888
},
{
"name": "model.layers.48.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1716224
},
{
"name": "model.layers.48.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1726464
},
{
"name": "model.layers.49.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1736704
},
{
"name": "model.layers.49.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1746944
},
{
"name": "model.layers.49.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1757184
},
{
"name": "model.layers.50.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1771520
},
{
"name": "model.layers.50.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1781760
},
{
"name": "model.layers.50.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1792000
},
{
"name": "model.layers.51.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1806336
},
{
"name": "model.layers.51.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1816576
},
{
"name": "model.layers.51.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1826816
},
{
"name": "model.layers.52.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1841152
},
{
"name": "model.layers.52.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1851392
},
{
"name": "model.layers.52.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1861632
},
{
"name": "model.layers.53.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1875968
},
{
"name": "model.layers.53.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1890304
},
{
"name": "model.layers.53.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1900544
},
{
"name": "model.layers.54.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1910784
},
{
"name": "model.layers.54.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1921024
},
{
"name": "model.layers.54.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1931264
},
{
"name": "model.layers.55.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1945600
},
{
"name": "model.layers.55.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1955840
},
{
"name": "model.layers.55.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1966080
},
{
"name": "model.layers.56.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1980416
},
{
"name": "model.layers.56.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1990656
},
{
"name": "model.layers.56.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2000896
},
{
"name": "model.layers.57.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2015232
},
{
"name": "model.layers.57.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2025472
},
{
"name": "model.layers.57.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2035712
},
{
"name": "model.layers.58.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2050048
},
{
"name": "model.layers.58.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2064384
},
{
"name": "model.layers.58.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2074624
},
{
"name": "model.layers.59.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2084864
},
{
"name": "model.layers.59.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2095104
},
{
"name": "model.layers.59.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2105344
},
{
"name": "model.layers.60.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2119680
},
{
"name": "model.layers.60.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2129920
},
{
"name": "model.layers.60.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2140160
},
{
"name": "model.layers.61.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2154496
},
{
"name": "model.layers.61.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2164736
},
{
"name": "model.layers.61.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2174976
},
{
"name": "model.layers.62.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2189312
},
{
"name": "model.layers.62.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2199552
},
{
"name": "model.layers.62.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2209792
},
{
"name": "model.layers.63.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2224128
}
],
"md5sum": "218942cea7187623205074da46cfaefa"
}
]
}