diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10103 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 20481200128.0, + "BitsPerParam": 3.644827456917361 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "43ae1d376a54f35c911edda54cef79a4" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "18399911cea635739970b0dff0dd2fd5" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e2926571ac87993849e94910079d0970" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "77ebc34fb76e2f902b705fe6114a4006" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "180626cb846ddefda15204b7277f1456" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "7e3237e4dafaf9601c4f89164fedb9f3" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c12172143c0f2369a901003f73caf9f8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26583040, + "records": [ + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 10240 + }, + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8857600 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26552320 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26562560 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26572800 + } + ], + "md5sum": "f4fb2721c06fa12b173e5d700a28740d" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "022e22163ffcff00c6ef056920ec250c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "37ce87b61dac973fd65698f682cc49b8" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "09bd6b9fc2d25f9a4ddd2c0f6709e35a" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c6144ebc9f7b389b025292a7006dadf9" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b573a929b6e98d9f268744184c2eb3f2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "81ca0f79dafd93e8cbcc4e98699a2598" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b4d746940cd6a5da7ce32d3cc7477013" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "90cd7afca8bc4e18c19fc825435a37f5" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ed9ced5a8e1a2e4b0d7cf46814f8aa5c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d81310e333fd81405dfcfd1aaa77d7b5" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d9c72a861a5087ee39eba2f8e2b125be" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1eecddbb25d017df6fa20c0d7470444d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "ad99981c59d80228340d60baf3a28c5e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "87ddbf2d46518d234ee6c29324a0c7d1" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "149b824da5ce6026405c84a47e2b4673" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "d2fbda82c48c3bb04cc4f482749c7114" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7ef9c4f15a269ddbe7c9ff93fb89b5b0" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c94efa44ebf36b9a621121debcbfe757" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b6b0f64adbd395db592ab9efad379750" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5baaabb0a5c84c001b938051bea18016" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "dcddfff0cdf0f46f901db3198b97646c" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4aecf8ee3fe372eb4e8e0b8c7def7b82" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "769d3fe5161f368742b8b165145e5dd7" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "67e6af049e5ddb1235b2106f862dcdd4" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "522a224db4dff7f9bcdfe7b6692336e2" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "43a1961f6b478f46e7acaaea74bc6c2f" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9f02f6a2f77758da731e6fd0ef09a79d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b48ffeb66c3b989625ada863a081707e" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d6519a98a4902b5c215dc6b9d80aad5d" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "07574a1fd7d1c3daab7ae15c03255d25" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "45883d0ad30aa9538a5131dbf5a071a7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b8fee51c5583f6e7be543e6883f557a3" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "dec58a6b742b5f911475dc84c82f55ea" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "06ad58abb69a4aa75441f62afba6120e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "74fb597728976050a53ca4cb2d90b6dc" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "78bc24e6ad3824c1496358c303a7d4c6" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b1f81490885b4f1b59bd909d611c65a1" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ecc0cf635b514d70ec4d68d57ac9c088" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "47d6e77363b1195740a0b5f8a93ac3a6" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c695a40c47054ea29bc0f4f022af63a7" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "73505175a7d4bd66b57388f9e8e324c5" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "325ee21cc87076fbb05d9c315c7926de" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8c72878606f0fd8da92743a8d762fbc4" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cf8e47f5b0d62f5e6a017264d9ae6c37" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d1b6a7206d9305802d6efc2ea86203ed" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6d806795affc4b1dea18623763c8dd02" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0a507342fe0c053f23f2053f20b31366" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 32495616, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23613440 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23623680 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32471040 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32481280 + } + ], + "md5sum": "ea9bea1fcb6e80123ac61c5eafdfc3a3" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "83e69ab63f11dd2be16a57e5d88aa133" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b7e664e91748eac7fc723ba75044abdb" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "98c8d871e57bc0f72f02bc092139e540" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2eadf82db6dc8d359b12f9eaec894a6d" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "2b6b0ed288c1ecf225a721088c245cbb" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b37a04a4c242bac78c696e5047d2672e" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "eb7891cfbba31c0c8cb8f86d81d98a7b" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "81ff41c849e70a7475bce139a88a373a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "064e0155c93869aa0285329b3abf4b0d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "b4ec06b9046d408a82af52e566803946" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b0eb1bb9446cd84ae27ab53ab3a4e288" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7136915b596bc8005064121524e28798" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4a6aa02ed4d1ffe431d6c7d0406f042c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "676e0ac8a1d16c876b367b49d2121c7a" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "69ae835c32f414831882fa46aefde58e" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9e7999cc713442c24510c568f54deaf8" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a19f56203ce31ca04a04589ca8f9b1fa" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "2f202c43d37db16bd20fb743e2e79119" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "23805ccd32779f8ca062dd71eb9dede4" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "edb3ee35d8acfa4685a8bd33117c3437" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "70a393c2114087d2029c577acfa439b6" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6b45e6c37c4f7adbdadeced0e8810870" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "55f26c1307b9b0678d82008ec9607ec2" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "35cc0298c4090eb5edea4c72ee087bd9" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "af5928bd111d382c9b0cc7ba7b0f6a2a" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "20bcc2f1da58c6430953a566c286bba2" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "8888eb7188abee2060758f9642ed608a" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "712c3c0524c20d4e875b3854001d15de" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "4d0f6bb63ba85d3339f9ac1a27d3e842" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c6e6a58fd33408da2bbb6a4021931193" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d0b02323062aa96840a6affe4a1a5f33" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "2b95fca69140ce2bb025291c7f723c70" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d271ad7545673e1b329bde7c4b03e0f1" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "9f3919fdab0bd8c5c55e50e344c2c31f" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4a753b502d38742a954c0a83c20c4708" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2f4bb98a6a2fa42e9e086b2e38792a18" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "903825d5ec841f9fb55a46eb7383869f" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7021c853e0213ef1ef1504226fb1481f" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "bb1b100752bd5516c5f6872532229554" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d2395a1a0f04fb1fbdbaabfc2ccf5e09" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8047a30469e64c35dc8a8611a80a47c5" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "dd6912b33e6e4d2b1679b56d1fd51362" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e84c07bf24a6a026f4228751b94e21e7" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ca052847498fd4f2817623c1ba3d7218" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "9a4e185077e298eb0ceabf4460887c20" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f3b6b5349854abb58bf2b816c64c9110" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f474e2b9e5e845f780e26730ed47c741" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "0dc9402492f8a5e2b3c9c201dea2718a" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "306a5268c165bf3b3d8e47edb963d023" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b6d0b2c5f8b00f2e4eef678cfcded519" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "cc5a562e7a92acfa5e7919500e17abb5" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "afadf59f770301ed48e64e799dc272af" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "1aef83238f58d644017f4e5a1f2bdb41" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "170797a6a48f76c36fec44f126012ea5" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5841ff11f9c695d387476d3a35e47a84" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "3b75d692b1246cbd56c757babcc9e513" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ad0fe4327dea0de11221a6a571ff17c4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "f6581ddc631aa2480ca420b72c7081fb" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c1ddba4f7c00db564c412ef90e3f5010" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e18ee43865a38d82c62455837c944141" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9d8863977982b2e4389dc6b0b21beb8c" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5b31562432a6e9f8e785b455ee4db866" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "def9350444acc06c5575717bf5f76deb" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "39da6aa29f08b0952cb6e4ea43449fa3" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "74c8dc57444dcbba26c1c80dc26d94a1" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "2dbad4ce85a0524ec811aa54a8a81d3f" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "22011d067a319aa56b24721f0c7f75f4" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a25e583bf9e10c6fe923b04bc49de98d" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "0180e33453342378fdb54201a6bba7f8" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d4cc5fadf2621aec8bc1dcc5855953d4" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d527dffbd2d9c8d5649c8c940bd52764" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "c79e508174357bb877bb09162b98e778" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e38585eae8a3e7eea7dc7fa1feb88c02" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6965b695b74476246319172becdf5b4f" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d3c751feb02f07853dbbe8f0c5a1b194" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1bd8e2042a681e5843effaded9f95913" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "0d7ba70a751cac12c23364af73fcdf36" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9871af1e076dbd9ea9561ad20a59d936" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7608c39b95dadf2d8e2b186b5fedb3eb" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "861ed48d67a9532f4cc9d8c54237a643" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7b17eb5e490ccf9b9bec71e836828efe" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "71ade53623e16cee28f3377f7a32e952" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3f535594b399088a8d17ae66b1d57f3e" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b7a26afe8e300c6fae2258be89887f5a" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "3daea97e4e46d1d2fc7565e5096fb02c" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "50e402b70309161048a1eef32a55ff98" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "fe5bd3b20567e7d43a40cd7131aeda3d" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "29f693d1fcd2e9149f44ebbb705dc6ae" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e2cc366a063a1debca3ad24ef069abf6" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "cfb28f40aeab465bc7e436a5af67b7f9" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "34fc4d6e2c5411229ba715f1a7b8a93e" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d2f2fcd544aea0c3fa8e7c1174b4afc1" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "81b9133ea1eb1d1a725af952d8dd2bdb" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f32015546c92e19536de712e960498ac" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f939f57b179a0705877f5c5b573a1679" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "3c3ad72ee7f13c57c3d645d899f3fa20" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2d5acea3991ada0415ab5f664664813d" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b15da06c5e7606942b3972e47a98daac" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f44ce76cfebdb30795cbbb5c8248a84c" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "41bbf54361f69b1762b42ecaf88167d0" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "aa6ceae9b58ffcd85ac6573e8f280921" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c78f860df002d60b97c54adc8b6c4312" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3962120234bc9958ea2e0f606f971fa9" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a38115ab0003d89ba798724acfa596c8" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "55347a72129e9b1438d25dc97e22a2bf" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "36de9ed4cfe9c65cb3352119b5b92b96" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0f35b7a7a5815058f8603314c93481fd" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ae76f78bf9ef17f8b276363fab4c5d8c" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5ed7cd719ba3eb3937da8d4b8d73ff40" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "38a94dc96e08bf41ac1f57a1f3becd03" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "b49a9dd6048b5a50e0bf92735ffbdc00" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "f52b1942bba387700156c7420cb0ac42" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a58239c3eca258915eada74cff970f50" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5f563da0d4a406159b85905edec6c4c0" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "ba939e03c5e5575269546e9a142299a5" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "06ff96d03140dcb7104b9c1d910e2004" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bdf78006049e621e63bb44b88123ebf3" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "a6706b2122696a17dd202fc1fbf55700" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dbebf66c89eaf56e63b30c0d812c1e77" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3130483d5e491545efcfb07ae4a29b7a" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4bf3782ab3affce86af4ee1f80035ef7" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "63c31fe38c875f684a922bc38f6f4c14" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "baab5984b049a91c282338df246bd140" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9f44c5b14f6856f856140ee3760d5a9c" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8afb4e4d2ae7af1e330841e855cfd405" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "86b3bc2e7db3418bdf9590a5a2dba7b7" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f9f2ec653dac50924bdc292780594a94" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "38709295be0b4a4eace42b986f373253" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6ede5b23cc95120b39ac707867992451" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8798bcdad88d0d31890de8ae23edf9b3" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "715033f6afc9d6a8114e67270ef3b5a6" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b04f3a14969677dc3266d892353c20a4" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "77eda90a58bb974d9e0c85127edab64e" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "522ea270919db900d3c7eca04a56f29d" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "eb08c30b7950cf64079b241cf86216ab" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "7a97341f6873df6406f0f8f937fea513" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f9b2f7a7a166b26cfc767a7be64ea631" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e889402e94beb637383a1b1245db1543" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "931cfbf7ac3a4aecf4f35b5c58ffb9d4" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "103ea0bcd50b6b83c95213ba078308f1" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3792aa602eb24c00d9b9c5162c8b6605" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "894103e53e1df3dfe61a033869da929d" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "570a6b4586d475e9891a35ecaf90cbb2" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e7ba54674942e1eb2c558b41be07af78" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9c936476065f8b27834e01eb9b39bd23" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "662f3e4e6440d0a00ac9d10c3c8ed4a2" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "d54ace0c9c472da4afcaf880a168feb4" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4e450869c0da8abdf5c2014163b4b47a" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2bf116b2ce61b6e7fa5e1d60ba4fda72" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "945c95c71c0edd3d8393c1a6b966cec9" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bbf05a3d3160d39b9f796d262c1cd66a" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "148662eeb3c952d04ec1148994f41e1d" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "906a9c42bbc0ed927e1d4bd99fdb49a4" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c8925f523928468a477ea37bdbedf72b" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "2d94656c3c6397b61d2b7985938fde70" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "abc36f4465f761d352ab5c17d8713515" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "16573b97c56891eb390732e113b9f06c" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0c11930bb95af76add876ad2c76c253b" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "22f5debae414b2718de7447f94fe7e02" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "1e0dc35ff4cdee0c7dd93b03b0691dd0" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "64a73ca3ee53eaf52225a7974c5c0d5c" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "04d05ed87100d15b56c91a595248a9cf" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "2529dbf6c0ebcd7b4ee089fe728450b0" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "40a111a7ef520868058080a13743502c" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d036ba17ae8876d3d8988ba6e6d60b8a" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "a0b8fbb586ad2064e49b81b4ab3c7328" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "24ba929df17d07c0c606df82228fdfa9" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c76856fd7b9852a18e55349b7f9559e9" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a4958da4509438fbeb42db55022d6847" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0f66ca364ec81d28dd37d896b77ee8b9" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "3f685a672e43db749411cd5a988446ca" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "19070e282add150dca2a05b96fce513a" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "4b6700916900167be0343440357a9bc3" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "693bcf1d0bd9101ccf179a7522750301" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cd33e05549c262a1c210e176db648db6" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "2503e6102e84036dd5b5a912f63da02c" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "be55f352d6392f1ba4b94a19551a7495" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0d83cded26a62fd2b18e0c226d7f2ce4" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0070112e211fc25f3f3ae70dac903f7b" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7652f4f764e116e0f4d54116fddb4dd8" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "791a0fbe1329bda66086b8fb3798e47c" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "28491d89eaa0984f45615e026ce9105c" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8633eb1292c6b7331bb6a60a89dd55f6" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "487120b7c5ca7dd39e29bfa91b6c00da" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "61a1a7dd14c34cacd868e5d7447a98a1" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a2251c312b43a86bce4ee6cc65ea4780" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "056d81bf2d00e22b247e56bed15bd284" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2ca45df776c0f970d47e36118b4106a3" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0fcdef25fb55d97a23dd36d9bf90ec1c" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "e59989a56448c1a612b94a1fa9c61620" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cfe8b12a9eca57eb5c3f7f73b3061a1e" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5b08dcaec477b20b3dcde0e1eaf598be" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e1e0e19eaa5ef6e0fb25f28a3a4ae7bf" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "181cbee4756bca5fb7a839993d9e9765" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "77ffc538891056e93b15392cfb06eb5b" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5b388f0d0518fbb1ce6a2db733b46a30" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3d7c6649c60c2920ba5da99de12c9b18" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d178d6c0ba56cdb21a7c240a8f9dec1d" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1ccd88d3bfebf08cec377f715466146c" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "58ee8e377c74ab9fc893bad1d37247fb" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2f920d62ae692f692c38aff06d2ec84f" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fef9029cbe5c8f9222382eb4f2ad928a" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4bad949b1c86b742c7442b53e8114c98" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "21b5715cc4b83a002afa6c66ba4ccef4" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "f2edd5da41048ee39e964e5b672bcd15" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "919dc44b0468f3e8a8fc5c31dbeeba96" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4a5737539c02470307f2d65416d1c6af" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "d5dd88cce6aabfd32697fee4549abcde" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c9065dcf51925bab5c4e10a80885b72e" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "71be57e44d225748414d778fd0ca0b93" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "78571b17b89a8de235cf425d58f5ffcd" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ac9a751952fd399614920c59ce2db709" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "897052d3ef17e9df170f2dd4137c76ef" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "350da7b01e3e49f868ddda59eadf6556" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2863911b168394e6bf4cad210adb7c5b" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7fc90f1b4c49aae8dbf7e7ee07382a66" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "407a51042fe63a1074f9c3b554c1172f" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8e2c4bbc698e458b1064a0e1d14aa5d9" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "25406c78149ad8e47061daf2bd7e3585" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c377c86f8c8018f92e614e57d1bde3b0" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b57f3b16985666e9b5895d9600888d1f" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5f9444694d56c634920320b9022c6173" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a6866cc4006a82f320b5e970f3c14f34" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "7be531d2e4cac2ddf365de89fb42ae44" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a8de860943098e776a29bcd4aba6f292" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d01f39a04f63fb0a0653d514517d978a" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9c08f31bd3d179838dca3e5a6a4d14ff" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "aaf088de1991e5c9deaf1c9652d44218" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "9c121356dad491dd020b60c1ad7746ee" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d1a28b80753d30196f29c6857c703fde" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "aa79b676eabfb171d4a57842e8f5f7e0" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "62cb2da472cfc49d91563b2d247fa11c" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8e6f39d3b4dcef13caf4236f9c06a4f1" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "01334fe2ea2e0611b9f312aae9e9e7c4" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "a5b1f47b95e904855677ecf73d9339cb" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "14138d060801b49a5b85628fe955c886" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a8c07c5ea029e5b32f51f56bb78263b0" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "1a30d49c4f19f1638f3b0420a3797afe" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c928d14d3f093a4bdd4410871f44cbd1" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5ba6f78038cd1898403680cbfbe92c53" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1d7565825125105b5e3728e9b0471466" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "696699219dab38ff4684dd1823907883" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "05f1b945d3872b1a0ea50f5daf7725aa" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4249b107413bc0f27152831009fb2908" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8b895c4dc2d3e42d040bc67519aa2948" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c1285bed042e22f6df7a1996ca0061cc" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ef185accd9b54b8d0baca774342d9d12" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "cf0408f88e0873f9cdff4f02914622e0" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "74c544bf68520c6459eb18153e40d6bf" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1c102ea3aae1aaa1f2ba534b8e07f159" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "947d246d78269d029a5d375e599465fc" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "230a7ab8345a3fdb556d6293f5cdf2fb" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "042dc0834b1aaba2d14f5f576c5c2a1e" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "fffe71801143b2e4f43ca94bdc55b661" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 17039360, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + } + ], + "md5sum": "087108e8e0a80d18540a92c17cce85ac" + } + ] +} \ No newline at end of file