diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..8f1453ad16eb3c4ad73013c22df16998012e4380 --- /dev/null +++ b/ndarray-cache-b16.json @@ -0,0 +1,4175 @@ +{ + "metadata": { + "ParamSize": 305, + "ParamBytes": 4319821824.0, + "BitsPerParam": 5.000976230824355 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 209715200, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 102400, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 209715200, + "byteOffset": 0 + } + ], + "md5sum": "606880b7a8dc848647d0f4f96a4b89ea" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b6da9737170d1c0b645180a4de7eddcd" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8492c07d2f5760fbc9219bdd3b678b50" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29040640, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 102400, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 26214400, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 26214400 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 26222592 + } + ], + "md5sum": "a1e27e3b38f9d5dbefa1a60a33844a7b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "882786af5e221402ac6a93cfbe558d18" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31014912, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 5644288 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 5652480 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 28196864 + } + ], + "md5sum": "4b2506de5078dd4f8676e493bd57dde5" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "2016cd57aa151424f83b8a11866639b5" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1676066fb9d039225b9db1a7025a435b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ed1f5c885371df0971b2d2eb1290b5a8" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "45266bce7406124aa81a10a348fde366" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "a088038b26d0b22649806cc1d028fdf9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d67505ab892df80fa16122767fd50761" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "da71da4dbe03b54fe30fd3cd21d3e8f9" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "f49e54b4bb128fc91b502800f3fe2e5c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "52bb43a590afc5a108144c97e1545541" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "895d44bf500994768e69cc4acb0fe917" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5e2ccbcac2ce04eac264d6a85fa38ba6" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "bad478a89ded29503b7ad5247596bab1" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6c78425185d2ed4231feb8fa7d0ac144" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ccbde98336037080f6518ea492561a6f" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "845793104fed7cbf95ee1534a2d21659" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "20d73c91a3d72233d81982ce621a72c3" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "97661861b9330740351d75485aabdce9" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b4f341001044c445a5366ba03e97dbd1" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "1982a151e5efe66f797dffdcb665e8bd" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fa27b88003a19faf081a3c9565bd1b77" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "52865cb1714cf5630b995a5583ea2df0" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "20b2f49b7fa06db00725aa97c436b952" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 209715200, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 102400, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 209715200, + "byteOffset": 0 + } + ], + "md5sum": "cb7f2f3c841f0ee8657397755d0dbdd3" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 102400, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "247d530128a3acc07316362bf878ef5b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "538bfee51b0cd51ad8c57cef968c1b81" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8de562a563fd2c319e18751f54a9a258" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1fa5bff53cc592bb798d1075ead4c32b" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32595968, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12599296 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15417344 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21053440 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21061632 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24207360 + } + ], + "md5sum": "e5647ccc8edffb18918b73b175d14772" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "61802bfcda32eb42d51fae14f9d8d6fb" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f2fc8896ab3e23fc577de0d2a5daf98f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "4c543de50f9e1ad35153c1ba5a66e058" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b9818960ffc4bd9a71205e649bf97e89" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9b8747e93fbccd762a88ad4e768a8f40" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "df63e726070f154a4fd009e4a252587b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "18052ca573905c87811a83b199e457e2" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f005300edf1ab989f6e9e36de6bdda70" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "840c3be3ac1b54b4af55ca53b2a8a002" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "0388837eef43c12f07f59b200704206d" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6ab677658eb4afa9234655d6261cfdba" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3033c3ef797fc34862a4f5d1113748ab" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a75291043a554d9f9c32475111cfbe14" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "57fda20ee30ec1e11afa4858a8f9bc0d" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5754cc13d26ce5bd7cd6bbc429dcc37d" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ee3e18ecc01bbd548beb64d76bc2fd1d" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "8ef26c4416be17c6da3b37f59eb43a4e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "325ab20541b280b55ff2037492d905ed" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2074661de6b0194cd2f24cef5a3edc7f" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1873dd4562e9ed7c493e77effac3ad80" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "a522d3059f4f9305f7f3b82a0f8474c2" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "bfc566d2e359a21811584069fdb6dfd6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4b56610368dd9a1efa9179a2e6b8fea1" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "46d6389203559e24c81a891ce53f13d2" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "4e30fe4386b207489f8af6b9e44c1381" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "df0953c545bebe4c332c05b9e87918ee" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a49caeb6e52d4f009a05a386d275b189" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "7c22a6609bd775f754302364776bac3b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "96ed8151fb856a871150dfb828ce66ca" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8f3cbfc6d5255e7ae1c468b71149e870" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "c7bc07ccbcef367393c389e32b0f237d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0806c61004ab81f16f2a0bba1264d6ba" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "bcf10dd970bf297fc8442125fdc6c12b" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "81e693dffdc97f72728d8cc0c0bfb90b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "cc6681044aeb3c1339b59890e1b39206" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ef9dce04f7a42cdb15b7f3332f3cca46" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e2deca87c61d92102516851c4fe9bf68" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "a1a4c25aaca07739b86d44a661b0f199" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "87b45b61bc6024979fced00e910e4ace" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "632bf70a29517427aae921f5f6f46fbc" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "02d5b26cb847690b7d0b3d775d276279" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "bc8ddd14b1ff9303d6393d7c979d2e53" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4d8c8a3584c50927613034b8f8cea42d" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6bc8e84046d80beab429a1cf7c6edfc4" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "4b23d8cfa463acb0553955d5347eb0ea" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e25872cd1502cc268068612bf3e810a6" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "cb772f1c323ca7f4ada0327f7b8f0151" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "51ee6a441d70c5438780aa43057a3423" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "cc223878af98d797a73cb50991a85d65" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 1048576 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 26214400 + } + ], + "md5sum": "027462be1b2294837b184dd763415a34" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "5a8dad096acda11755fd1043218f08b0" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "206813d374e06f6bb22562b704ee8827" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e5865516f88c52b7e7e5c3f90cb1eef6" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "24e60c84726af450b6c80164d70858da" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "48a911cd98a3967ffba6773f7f2f2ed8" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d7cfb15ba54a4117678cca83caedd157" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "e68f7cb78dec00e6ca96693eed60d900" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "133da966c46812b369e7c3881543c5fc" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "73a90e1bc10515347f4cbdd8085ed61e" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fc4a8187566cafcda9190d00f1bf5946" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "790fe54ef8c522f5eaaba8b6ba8073eb" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a57f0e2b9da6daebd9899f604beaae85" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b6279ae29c6ade77a8621f8caffa756c" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "1ff9166596f291d0a0d455f320801ccb" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "9e572a685c864ab28a96e6b093f3b280" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5798be8e470dae8d8b6d2913fd518fa0" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "33149eac79b8edc244cf49321a5ad60d" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5e3ed348453f0e7739f1a0ee0a723ab1" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1e5657f2e0aa96e009efef532227ae0e" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f617da8d732289a198f2b8d2e46f2d88" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "0e813d19b44087e77c8f7994edf91632" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "03da8c7e5307360c72ec479b33fd170c" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2baa643cd4b5af6ce825f412ceb5e171" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d279f693f9548f33dfdb6896de219857" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 21045248, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + } + ], + "md5sum": "15ce253834790194fc711813c6711d45" + } + ] +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..859453aa8227f4658f2b5ef673edf759479984a9 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,4175 @@ +{ + "metadata": { + "ParamSize": 305, + "ParamBytes": 4319821824.0, + "BitsPerParam": 5.000976230824355 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 209715200, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 102400, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 209715200, + "byteOffset": 0 + } + ], + "md5sum": "606880b7a8dc848647d0f4f96a4b89ea" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b6da9737170d1c0b645180a4de7eddcd" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8492c07d2f5760fbc9219bdd3b678b50" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29040640, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 102400, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26214400 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 26222592 + } + ], + "md5sum": "a1e27e3b38f9d5dbefa1a60a33844a7b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "882786af5e221402ac6a93cfbe558d18" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31014912, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5644288 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 5652480 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 28196864 + } + ], + "md5sum": "4b2506de5078dd4f8676e493bd57dde5" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "2016cd57aa151424f83b8a11866639b5" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1676066fb9d039225b9db1a7025a435b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ed1f5c885371df0971b2d2eb1290b5a8" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "45266bce7406124aa81a10a348fde366" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "a088038b26d0b22649806cc1d028fdf9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d67505ab892df80fa16122767fd50761" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "da71da4dbe03b54fe30fd3cd21d3e8f9" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "f49e54b4bb128fc91b502800f3fe2e5c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "52bb43a590afc5a108144c97e1545541" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "895d44bf500994768e69cc4acb0fe917" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5e2ccbcac2ce04eac264d6a85fa38ba6" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "bad478a89ded29503b7ad5247596bab1" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6c78425185d2ed4231feb8fa7d0ac144" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ccbde98336037080f6518ea492561a6f" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "845793104fed7cbf95ee1534a2d21659" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "20d73c91a3d72233d81982ce621a72c3" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "97661861b9330740351d75485aabdce9" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b4f341001044c445a5366ba03e97dbd1" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "1982a151e5efe66f797dffdcb665e8bd" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fa27b88003a19faf081a3c9565bd1b77" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "52865cb1714cf5630b995a5583ea2df0" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "20b2f49b7fa06db00725aa97c436b952" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 209715200, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 102400, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 209715200, + "byteOffset": 0 + } + ], + "md5sum": "cb7f2f3c841f0ee8657397755d0dbdd3" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 102400, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "247d530128a3acc07316362bf878ef5b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "538bfee51b0cd51ad8c57cef968c1b81" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8de562a563fd2c319e18751f54a9a258" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1fa5bff53cc592bb798d1075ead4c32b" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32595968, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12591104 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12599296 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15417344 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21053440 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21061632 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24207360 + } + ], + "md5sum": "e5647ccc8edffb18918b73b175d14772" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "61802bfcda32eb42d51fae14f9d8d6fb" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f2fc8896ab3e23fc577de0d2a5daf98f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "4c543de50f9e1ad35153c1ba5a66e058" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b9818960ffc4bd9a71205e649bf97e89" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9b8747e93fbccd762a88ad4e768a8f40" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "df63e726070f154a4fd009e4a252587b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "18052ca573905c87811a83b199e457e2" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f005300edf1ab989f6e9e36de6bdda70" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "840c3be3ac1b54b4af55ca53b2a8a002" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "0388837eef43c12f07f59b200704206d" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6ab677658eb4afa9234655d6261cfdba" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3033c3ef797fc34862a4f5d1113748ab" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a75291043a554d9f9c32475111cfbe14" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "57fda20ee30ec1e11afa4858a8f9bc0d" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5754cc13d26ce5bd7cd6bbc429dcc37d" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ee3e18ecc01bbd548beb64d76bc2fd1d" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "8ef26c4416be17c6da3b37f59eb43a4e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "325ab20541b280b55ff2037492d905ed" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2074661de6b0194cd2f24cef5a3edc7f" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1873dd4562e9ed7c493e77effac3ad80" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "a522d3059f4f9305f7f3b82a0f8474c2" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "bfc566d2e359a21811584069fdb6dfd6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4b56610368dd9a1efa9179a2e6b8fea1" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "46d6389203559e24c81a891ce53f13d2" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "4e30fe4386b207489f8af6b9e44c1381" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "df0953c545bebe4c332c05b9e87918ee" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a49caeb6e52d4f009a05a386d275b189" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "7c22a6609bd775f754302364776bac3b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "96ed8151fb856a871150dfb828ce66ca" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8f3cbfc6d5255e7ae1c468b71149e870" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "c7bc07ccbcef367393c389e32b0f237d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0806c61004ab81f16f2a0bba1264d6ba" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "bcf10dd970bf297fc8442125fdc6c12b" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "81e693dffdc97f72728d8cc0c0bfb90b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "cc6681044aeb3c1339b59890e1b39206" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ef9dce04f7a42cdb15b7f3332f3cca46" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e2deca87c61d92102516851c4fe9bf68" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "a1a4c25aaca07739b86d44a661b0f199" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "87b45b61bc6024979fced00e910e4ace" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "632bf70a29517427aae921f5f6f46fbc" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "02d5b26cb847690b7d0b3d775d276279" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "bc8ddd14b1ff9303d6393d7c979d2e53" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4d8c8a3584c50927613034b8f8cea42d" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6bc8e84046d80beab429a1cf7c6edfc4" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "4b23d8cfa463acb0553955d5347eb0ea" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e25872cd1502cc268068612bf3e810a6" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "cb772f1c323ca7f4ada0327f7b8f0151" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "51ee6a441d70c5438780aa43057a3423" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "cc223878af98d797a73cb50991a85d65" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 1048576 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 26214400 + } + ], + "md5sum": "027462be1b2294837b184dd763415a34" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "5a8dad096acda11755fd1043218f08b0" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "206813d374e06f6bb22562b704ee8827" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e5865516f88c52b7e7e5c3f90cb1eef6" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "24e60c84726af450b6c80164d70858da" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "48a911cd98a3967ffba6773f7f2f2ed8" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d7cfb15ba54a4117678cca83caedd157" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "e68f7cb78dec00e6ca96693eed60d900" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "133da966c46812b369e7c3881543c5fc" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "73a90e1bc10515347f4cbdd8085ed61e" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fc4a8187566cafcda9190d00f1bf5946" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "790fe54ef8c522f5eaaba8b6ba8073eb" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a57f0e2b9da6daebd9899f604beaae85" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b6279ae29c6ade77a8621f8caffa756c" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "1ff9166596f291d0a0d455f320801ccb" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "9e572a685c864ab28a96e6b093f3b280" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5798be8e470dae8d8b6d2913fd518fa0" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "33149eac79b8edc244cf49321a5ad60d" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5e3ed348453f0e7739f1a0ee0a723ab1" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1e5657f2e0aa96e009efef532227ae0e" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f617da8d732289a198f2b8d2e46f2d88" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "0e813d19b44087e77c8f7994edf91632" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "03da8c7e5307360c72ec479b33fd170c" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2baa643cd4b5af6ce825f412ceb5e171" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d279f693f9548f33dfdb6896de219857" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 21045248, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + } + ], + "md5sum": "15ce253834790194fc711813c6711d45" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d6bcd59d0fbb2e70256243ef58de1016efaab48 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db3fc0d2c34bee44205f1d20acbda17e516a6672ad4d4fd26e3c4cb382789eb +size 209715200 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..26d85501ceaf2a738e3d6d1bdb669d96144aa154 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade22e161c15591438b58cdf311b86ad554aa275dc9742bf41d2aec2bae976ce +size 22544384 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b3441ef3acd4e019b99df0eaf649a106f8c406e --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8186c267c698c23252ff84a3a5208797737bd39ff72d4d09aa4a18d7dd65cbb3 +size 32587776 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..eae43b2caa49acc03f1376f58b122ec81a4e9837 --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40a6e69df1b5c6248cffe2b094d91a14621d0f34ce0db596fd045d27bb673d0 +size 25165824 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..49f9b54e4e114d48825305e787154bc460850eb0 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6220e641dba61e8c28f2113b17bb559706b8d73670c34e70f92f48f06b4e8e1 +size 22544384 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fb0aafcc58b1fdb13aa451586ad19969c3626ac --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95f8ff530814d6fddbc3eeacd424ae5e43e183aab22a3dc9ff4aa74f5e91d5b +size 45088768 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c05b6855e40bc1d3ec89d4d519d67781b6a15e5 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060c13d63db0d95565df363abb5400930b6de60def963705255a0123b320a0c5 +size 25165824 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..683aa667b8d7eb22f4b85191914c8162033771ea --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52717a9442942f8d37575b72973076d9a62cda467e2e0d61a19773bb8e60d84 +size 32661504 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..41e955926c4de069c9a32833321724c537535e51 --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603acd4d4947f22b483df685d3db4956b9ac717835a1d3a8c1d9c513d764a50c +size 31989760 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..faa7722ec0dd7e69556026d4f9c2fbea299ff69d --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d624d9938ef548732ff08fef3cbb745bfbda44dc293c72325a4ae53412bc196c +size 45088768 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..897bbc8251279a36e2d4d3a3a95481f32fcd7362 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5699c5ddb6ba5e3efc8dcdc129ad8146040f4f81ebb171a32216cff10a82df69 +size 25165824 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0f71640af86a3810298533332468879579fc4a0 --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a60005125aafe769be68d226e3989c49981517c2c1ebdb4c9acdd28bb5a4971b +size 21045248 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..49f8e846488f8ffd214977127a71dffe01c15e50 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7b117de03cf2d188c2aa58ccf5d0c90cb32a8039c8c2a4aae1889eaeb9b349 +size 45088768 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d449b07ba75845bc84ecd095f84d7ed6be759ac --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0aaf98c24b1e97eb843717dca938cb63b98c6e688fc17af468bbac234fc457a +size 25165824 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..a837fd72763b67954db8ecb8d2474b3ee87855d9 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e502eeae92a8b6fe8166844af0d1f650bcd09468c0baf25dfe888e3b3641787 +size 32063488 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..07a80aca9002b4aad153965a6d0ac5f15b9989a6 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec3118e6d05f568b626a54697ad43e27fb90832025186f025e65af691abf830 +size 22544384 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..a43566df5876b0d0e1d7e4ff1aeb555896e79f25 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde9cff13e673001feb812e2d26a3a2fd187003060e4613b4c8836ad4e8d172f +size 45088768 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..54eecb5c8cdd88dc071d647f3b8f46dac0b5f45b --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcd9f95c9978fb35b573181a77861bbdcfa657588a5249db6917e73d0939d30 +size 25165824 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b6076c41b759c300752cb829039f2f316e5707e --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc80a083d576670ab194c0bd3a7240584debcb2eee0fa9ce79b57f80845477cd +size 32587776 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..47ec101e9f0c573d06f7e83c4a0fe6fb16c4c714 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:059bc51ee26f36426ea9da96030eac98eb663a10837910527fafc15cd6f2bc13 +size 45088768 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..29e7689bc9671086706d145904b1411efb35833c --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c310cb578d4c73a84850631244d62ae40a321aac245a4fd356391a4ee2864e8 +size 25165824 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..c31dcc5edce6b0893985823f44528c1ae8f16806 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07402fff3554997bc20acc79622709cbca8aa5bf9134a525464fd80594effed +size 45088768 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..542cb44b0d05eb4a6b6dbd10125cf62a6cc80991 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43263ccf1e9d1d7c2c4bd3d2ed6b9c6c8f465089f555319d86af74006eb8bc9 +size 32063488 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..8433eb9e7993ce0e4fd820740d817746c7ed937b --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd16a0ff7181a1696ac8224f0aac6726788e383b63d9c92f078fe99c169dd63a +size 22544384 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a4b8bff89bc5732f628682d5a70c8eddb841e02 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1e87a1458ef0b335db5d55bd8bb98a1de478f3552d6ff07973eae6b8016c79 +size 45088768 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9ff5aeb68b18b473929865bc227e1d43b1c5cb1 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92c947a79e0947b1f72eb257df4527789077e25b197f3f4c9f0bbff9dc4a4a7d +size 25165824 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..46d4590d8f7f50dd677088ad526077e3c3871a11 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0375622663c5f7ef5276f2c2709e89cea4933e1103b3eb502c7308189f29d16 +size 32587776 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ad8d73dd060d474afa1e3889b9c403fbede84b5 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1cb09563f7fcc529729708605b7fa96dcd8e391d22c37f765362546f561bac0 +size 45088768 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8e735e65e8ab08d84a8fb45c255478ebe277ba6 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300ff7c2640d97113008f79fe533f616fe5929dd518efbb87649f227f2ca009a +size 25165824 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..32e947c2e2fcdae7bb7eccd01e04f8b7bc038c24 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5f5cd20986f6f6e1d4a7bd473aa2f29524e289cba42578701a5426357472ab +size 32063488 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0ada3b4585998a051adae090526ae69b4f327e4 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55bed5d851085b90f14fc467b628d961f2c4b4d005068e495513a59187c7924 +size 209715200 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ce50c4bde7780c1933d7c80b55c8bff237d6d04 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be26b61186f3167ee2d81f15ce5d2595922e85e6e7d18f53653abfc35753af0 +size 26214400 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4f73022bd9e84c9fa3554a567e8ef3fef3fd054 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0717c0cc528268e136c2f88f2a67790b3359a63bfbb5bf82658021c3cfdf709e +size 29040640 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..14977003c9b74aa1d4ae693a28ecc527dfc29340 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b82ba8d59f018e634a7101d9d86a6f334fe8e2993f425fe462e8f5ac523245 +size 22544384 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c9f6ec90c814f8d4a9e7f432b8e999dd7fb6dbf --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cce45326bd6b909ce6160a9d2227c8cb681fa156e9de57c2c2f73641b2e588c +size 45088768 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..afc3a7e0074b6336b6d9f9b9a88fcc253cdf6d6f --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db753553e967cf87affb994ee7bada3cf304b40e40bf4c1080402fabf3c72e90 +size 25165824 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..43feb779cebf6073c7dfae4f579142a2881bf4bf --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e84c9db20e7d3fa449e0d5137900871d5265d3ee7d4c44733531c67ed3f61b7 +size 32595968 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..017dc5b6d04d808cd4d127a57191badfc09d718f --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea5e6b173256edb51d318fd708ff01e617fcdc934f2130a8abf7122b948d6803 +size 45088768 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..986a3c8d9564cc40579a5b29a71662d399fc299d --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115accfa9b10528afacca1d2cd342153e2455b14d8408f03bab64f388bfd89e7 +size 25165824 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bd4711875a24f9441f092a4ac993bec7a3e9c43 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c672e58fe28a244cb1e70fde666fd6d86cc9071a4f65fc21ecfe50251c2f28 +size 32063488 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..de64c1cfefbdc2d21f9edc10c674de89b5f93c98 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b05b8c5c60f09d39e70460ff4cf7ef644ef37d3ba79fa23e3aeeff4348f425 +size 22544384 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..53d81394cd5272fb5f5e4c4abd473540c2e1c929 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9620818c683f080b47cf2df7ee328512a3353caaaf21e22167c6f9358b1586 +size 45088768 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e615a523a755cb4c2c9628f69b7b272dbf88b16 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826b14415be5c76b36aedf28c0e44fecb7b7ec5f6021334bd052a9ddcabb2cfe +size 25165824 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..4013158a74f8b9a1eb89181274939de88e15fc0c --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623b1b33c18a796063032baa279ac743a4d57561431f90ce3b1f54cc174b7189 +size 45088768 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..a286be6b50944315a4ce8b002c791eb9dc5ea129 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44a9dc6fc76d0e7e16b5fc1dbe9e4b581bbcbe8c5d23d985f332cc8e06485c5 +size 32587776 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..0af26a45e1d587e8ef16abd2c60a173fc04d1735 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd45cbffa696d8dfc47dddbfeced2bb71f068bde05048baf5b1bd5566d0c80c +size 45088768 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6e560f70fa587c42dd85e3ce39365d8cad8b475 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16419984f0115df8b03bf6d725046c8ec37473cdc28bc4711812c71bb063f8ff +size 25165824 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..f223d047e868d989fa41f75021b28450355c92d9 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a3800e5be98a73c7e6106d75b6bc3ff8a417a8656c43ed9281d80f4a681ce9 +size 32063488 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..d825430bfd9ec1d3b9eba496da84765901fe7739 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f830e0d7a4d1395a6f1015730559b55fdef8ce5708b40fa3bc647f272403a70f +size 22544384 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..51b20e2dc25fab2e5aa02da503115f3fa1cda619 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04998ea0ec0899f2dab7deedd4073899d02b2e9b7aeb4653f8a331e191b0bad1 +size 45088768 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..86fb2a08e7ff81f604fce341700fbdd3fd747124 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b8d17982cd906c7db7c8dfabe4f3bf2a4631ab15c44e23f6bc764e70be5652 +size 25165824 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..569c98caf17dcd5c1b19a2ce3a429e78330f97d2 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a063c7f8fded5453442cb4dc23703b3aad1aecf9fe4ce3488c248ee5a1d25980 +size 32587776 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea9eb28b2291aef1f5ed53349efb60d56d00f6d4 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6bfc2c285d188c571acee76415ddbd2ba06ec79f41f6df5751d4e290a927fa +size 45088768 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c377cd17bb0b4cb19d231f510470721bbdd2710 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4c4ec0c0d3958d4a126613799b02960e0d909119777f77cadd505da12275dc +size 25165824 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..873c6113ebf06eef89df2a4056b112b26f40b5fe --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2417da80008b17373a0279caf081f2545ef5d700cd7c9451f21cdc5cd948fc +size 31014912 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..92e010244f1a8b43e2aa47db3af9511768812a21 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aeae83321beaa9b84daacf213fe1c38da8b58106bc06ca3e6d090aec9a7d862 +size 32063488 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..ceadae29d654f00955caeb671ccdf6d84190f089 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25db2126ccc202e916f04da9dd5f7ea8ad13058cef1a897f5806d4dda6493684 +size 22544384 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..c7e1c24bf0eaccb767ae176f1444ee6def51077e --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee671e8fdbab374a962bb72499a6f0362f985efe4a6d849591cdd5adcb8442a9 +size 45088768 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..38e8c73d4a1c0a117ae89509d0a697fd5ececfd3 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e871ccfe75925495e18f6a6e34c5bf609de92d0999066c20bd28a644f468321d +size 25165824 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..3761d0d61a89e07b6d75bb742aa38f782c4eb66c --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584bfd9f045bf6908b5688fc47406e70be200b87a0dd68b6c0a068ae892a67aa +size 32587776 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..af465dadcb853f171ef6af4bafc18e2eb2f2a99c --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f566f02df1c0649830d314a3bf8c4b6f24bf8bb1906d8d004f21af7c865270 +size 45088768 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..c88bf99fa7d7f9678daa27683f4f850e83d887fa --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3416ca3a028a6847311966aa0cab70287bda1957ebd7282f368c91b4363322 +size 25165824 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..536a5613b8206963a4195eb3056e3bec0ce95953 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dbbb27961d4662f1ed834f6bf437b85a6aa548ea3935a4346d7358c946f0eb7 +size 32063488 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..107d78af8ecdc427ec530ee87e67130d5ba9ce95 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94cee815ffe1a454bdd41b02a1ae4935c70f32f756a2442bbfd083875d0f8fc +size 22544384 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e06662dd7191e1581f8e8f7d19afa3834569cd3 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed0b3938a83dfe3bccc2abe3976cb2ab5bd4237169b11b3e238b473a4c01ec6 +size 45088768 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a8600129d068c4c36c93e47b89c0009d588acde --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28dcb713df7201f2c6cb9e3684edcc69940b4cc6787928b97454c86fbfb7bc8 +size 30810112 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe7310d100ce6a3e90e5c1181f81b9ddd5d1a4f5 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe3ddd2f2eaa538c1f66096f04fe87f529146597a041bfc6843d0f78eb8dc11 +size 25165824 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..0363f1482f9daf97c1c069e0bcf051755f8ee8c9 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30932af51ae32ef299877da05e83d593b3cf7ef7d2d6a11177d2af27b9d6b85 +size 32587776 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..03e0852d4e8769625430c8e89de08f37a7d2dc39 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4aa6d27337a9e8ec896f75728db3bc33bf0f7753b9dd74f2ce779609433eb9 +size 45088768 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..4571f271b5eca8d2dcb405a066410621591fb3a4 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84bc954a4407ffa20e97c94570e122db05c7e8bf1da69d2d4ea9c3767aea423b +size 25165824 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0862b25b8db22a29bdb42f789449bc5dde36b9b --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0c5a8189d29e9e57fff1c58eb9086349d24ed82a488f9aa8a9e4f9beca9c8a +size 32063488 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..366ba199c8a6ed345220bb7ef775c1ccf65fbe12 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fd780e4e4224a8982faf8077fc4fc805d8916c70cd9592a3518ff8275c23d5 +size 22544384 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..264bd5806f620b2861ed1c49193d79b83eec75c7 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c815a6d43389be21ad472959568fbb1b4652c0384a5e56de867c9d9f5f32b76f +size 45088768 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..a75b02ca474d13e1b167670d8eefbf18f151d579 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188408a0f43d4bca6f400f435c2e09e62308e68fcae4ea579c5b0d51bb9782f4 +size 25165824 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf1c470492bf425c43325bd73fb04e1007e38cb7 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d129f095a8d366fbd6d7c98adb8ea8a68163727efc010515770816ba3468f3b0 +size 32587776 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..92643c3199132cc131b257b68721681c5a91f721 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115e30290a76e30839ae92b77efc19956b86843e048f585afda760fa330331c9 +size 45088768 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1225310c5d5a6b1d556774975f1527a1a5ef69f --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d585eca0eb37a8d85781b196a08fbda13fb2836ad87ad126595385b42e81eb +size 22544384 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..64240088a015c307c1452f9fe86a1d7ba8a43b63 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6932d752ddbabd23c430db9c904b22de00f982c7036c4b2d982734fdd07f694 +size 25165824 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..da2d1ed5223959a99eca2f63ba876d309513fa4b --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e36ebbc8cf720e30057836ed6900e06565055aa4aa27ada990dc5c8b43dd4e +size 32063488 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..73513aa062e889ed3bf0ae54e008a91e435d1073 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e89a70a0b98e35c2e40d7411c18b6b6db0c07dca5e04e91c9ff6c73f5a55445 +size 22544384 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..1db8ea652d72c94a24dce764f970be6dd88c212e --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41d81b711dc192b23ccc80bfe1056c69fd17f274ab6f9eac49f9aee19368e9d +size 45088768 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..20a14a9ccca371111dd16d9383eea756d72534e2 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93182659f4292c7010ef4288e37adb93dc9044d80d3ef99272575d412e51ff1 +size 25165824 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..e13b132d96e3ec65a051d80ed394854eb69d302e --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bafb7efd75d86e534117849d29f9bd61e1a732151a984b8e6cd1fc1525c8f040 +size 32587776 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..50fd9bbca569448634e6ac7badcb0552149405ef --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ff6d186d77c74805f20d3504e3f168fe1d17ec7ccb5f5b1f9b8d8d8876d411 +size 45088768 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..536696b0a170dcba9806b6ad73b2ea1f1d7d9ffb --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff211c7d56388a0784fa0a807f821abcf685a18e33f0c6c9db1cd4e99ca4fa51 +size 25165824 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..b92b2629905eacedcafc177c650c6f9c4edd7a35 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70477e231cf0a69eba9e2e4b4981edfe5a924fe2e82cb77bcc6d364c0b67980f +size 32063488 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..acd67ae7555cb941ed4b4fd89bda314d0fad2997 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05d66d99568f282007dc6d292bea14aad97b3866de47b1861cb5bfb47b95e72 +size 22544384 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..6700a169cb464029d9b62e097b6dfe5a783e3b53 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc83336ea34276d05372f21ce8d8cca9942e849c58d8af85906e48601bac200d +size 45088768 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..704b02c4d75bd918b18495f10fc343fe47a54b1b --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6680a6e0fff8cb46f2db26bb6f95304a183da5644179f6acd9c631b02c464ed2 +size 45088768 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..04d97a4d9c257a59b1fd1d11c1641a6587930965 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ba55e1cd9579d7e0e68b85f9d26c2dd46d3fdcc00a7dee58ff38e390e91232 +size 25165824 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ceabc3947f5de95d739e09e76a6de29202395ea --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e4fa13896c5d4e0ac3f678b9e8d8e5207f5ab98eb675ff5738b75e98557183 +size 32587776 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b33e5688de1ae6b54873e82ac571dd6427d11e0 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fa2e225c6d904453d33b13f3d3c91ed5fa0d03843df8158890e466d5bf05d20 +size 29360128 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a572b3414ec2f379f89afeb531034fed4e21924 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd73f6296a81a26bc4426995baa7ce467b11b60e1d1b09f2648b21f79cf358e +size 31989760 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd9bcb61cecbf07582b910e0ee98b0eaa88b634d --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f3c86f0879e6b6a0df3ea479a213c8d3d559ad5bd500e14455f824525723f9 +size 45088768 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fc92f07e5e8e7bd329e0d2dfa342d1e348a0f0c --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abbc4ac85c7d7a0c2aa53b968592edc89a53e0ec6be429dfc6c7971d05decad +size 25165824 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..a660779042e57f537be066f85ff3fc1695f7cb92 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24ce61512b9b2e1f367d41b0cb61d7fa1e8dc215f39d1f84e772906843d703e +size 22544384 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..8077f6bc45fd8db65700144fc075dcd435a76666 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58fd4e54357264c17569649c44e1642c4e90ba05cab2c700ac11689865c0c4e +size 45088768 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..f461c4e09136778f1d4c75626104343b36d332ea --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72ca1cfa9628308c69540649f33271bdc219626e01d9159c4ae0a2f40cfae5a +size 25165824 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b48af618f6c982b0600fe0df04843e4defe2b9f --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92afe1b5849b82b67665a09c2976eb40517c80d106c78c9e88b19ac7cd3d87cb +size 25165824 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8f156fa875fcc1d8f12b96942d38774d9511f3f --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e7e3ec07346e7fa386889cda57276be2764b0ab2ecfd221ddfca62a23c0844 +size 32661504 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c50a819ff2acc3035ddb004fe90aff39e82bcb1 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56643c38d0193cd9c520648090a575ec68ca65732b6d2223e21519d2d4d5c0ae +size 31989760 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..0bed434a40fcf754d7988ab3cf9466b063208941 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23022a466b3c5af0cac82f2dc9645a6802beb327234904c5d0a83c84a22e651a +size 45088768 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbd75c2e60cef1841ed646e628867527db1c8af6 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cf642cc426a64f660f0c8007b086c7ddf21ef39bdf8ba141db2ed1602037d4 +size 25165824 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..e29d4f1b8fb3cd76ce7a91dc545c81c26506e769 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09c62f6a6251a169efff06a9aa98e867ebfd7681204dc9e89c63c15a71e74c0 +size 22544384 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..ccf6a0b7be9ea671216767a0e457e61689f7faaf --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30efe396c500030eab36f7281b34fcf6a8b58f2667aec1ffdd2ae2815d824480 +size 45088768 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..708c00cc9e6df9dd8cc25691bc6227aae0f2c58e --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a62d23e6e1f4eec8764fe09e8c183bbcbe96ffea2c9db1c1e18e03211b8920d +size 25165824 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..c482b413a595de88c1b83cb9c6c719271eaa2e11 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a80653d6459fc6e6d72533708ffc0362059585870c670602f755b286f3917c +size 32661504 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..836023a4461a77ee4f12c804c273c5740712ccc6 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3dc7622a4816cfe351ffba3e2db96b4c20b1acf48fe9b2da7694e37f381705e +size 31989760 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..808a9b288224d2cdba85721d53fddcf0bd50711b --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0815e138cdd4cf9ff043ef1f751ea4b6426f7d57c59c3c3a408eba79ef02cec3 +size 45088768