diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bae9d7a55df368cb23123dd19a00e51360f8907f --- /dev/null +++ b/config.json @@ -0,0 +1,78 @@ +{ + "_name_or_path": "allenai/Llama-3.1-Tulu-3-405B", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 16384, + "initializer_range": 0.02, + "intermediate_size": 53248, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 128, + "num_hidden_layers": 126, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "quantization_config": { + "config_groups": { + "group_0": { + "input_activations": { + "actorder": null, + "block_structure": null, + "dynamic": true, + "group_size": null, + "num_bits": 8, + "observer": null, + "observer_kwargs": {}, + "strategy": "token", + "symmetric": true, + "type": "float" + }, + "output_activations": null, + "targets": [ + "Linear" + ], + "weights": { + "actorder": null, + "block_structure": null, + "dynamic": false, + "group_size": null, + "num_bits": 8, + "observer": "minmax", + "observer_kwargs": {}, + "strategy": "channel", + "symmetric": true, + "type": "float" + } + } + }, + "format": "float-quantized", + "global_compression_ratio": 1.5358243277028012, + "ignore": [ + "lm_head" + ], + "kv_cache_scheme": null, + "quant_method": "compressed-tensors", + "quantization_status": "compressed" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.48.1", + "use_cache": false, + "vocab_size": 128264 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007106e0406b742111f308137a375db48601f26b --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.48.1" +} diff --git a/model-00001-of-00086.safetensors b/model-00001-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df7ee59c848b019184ba5de47776c86c26dd6934 --- /dev/null +++ b/model-00001-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c46b63226d0478c42f8d27ee8d6a1c099248cc17750b863f7e3009c4cd6d19e +size 4773450840 diff --git a/model-00002-of-00086.safetensors b/model-00002-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21f332985a2a27d7d9f00dd36b0c45708fe1fcdb --- /dev/null +++ b/model-00002-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0439a93087c808004471c940d870f21e6a9af0207cf99b4e5c1d3d1792465103 +size 4933097744 diff --git a/model-00003-of-00086.safetensors b/model-00003-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe1b1ae3b4b4f083e4b942b31490bd186b3b1f2e --- /dev/null +++ b/model-00003-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8be4c79853223e84eacc8593148197c607bda41974c22bc33247fc6d13f468 +size 4631063728 diff --git a/model-00004-of-00086.safetensors b/model-00004-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84a58c514562f902737f543064db1db4d12efac8 --- /dev/null +++ b/model-00004-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1ca49cb51e545f8de6ded1179f49bde69885b3b7eb07a7a659cad0c2c6f981 +size 4933097744 diff --git a/model-00005-of-00086.safetensors b/model-00005-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57beb243b2d3a28cd6f1d5b8aa006a6ca9573671 --- /dev/null +++ b/model-00005-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47c039a30f31438c2bd66093314676aae0dfc71d479b2cc954072c344349e81 +size 4631063728 diff --git a/model-00006-of-00086.safetensors b/model-00006-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4cee1936d8d2af04a113b85836785b29ad05806 --- /dev/null +++ b/model-00006-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd43f67e1abdc431ff9c73ebbc1280ad61c8e653e2c1e3e576833915300e77c +size 4933097744 diff --git a/model-00007-of-00086.safetensors b/model-00007-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e095f39536b862e41d1ad3abe95b8143cbcc180 --- /dev/null +++ b/model-00007-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f512d46d3378f42aded3c02f8b49874c0267e5192153f83ddf9b5b4ef6ab971f +size 4631063728 diff --git a/model-00008-of-00086.safetensors b/model-00008-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01d108425a92511ffbc64d16d60d45273c6bbef7 --- /dev/null +++ b/model-00008-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8e20449b3463d10e6fbf1803eac8b770a95649455bdd96bd7b2de735b876e2 +size 4933097760 diff --git a/model-00009-of-00086.safetensors b/model-00009-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd789dafb3cdb2f7745fc253569a21f390a0cfc1 --- /dev/null +++ b/model-00009-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfbed7d4452f6c7ab41825d8c8935af1d8d0e242c468fc0284d1c211aa8d9b3 +size 4631063752 diff --git a/model-00010-of-00086.safetensors b/model-00010-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8615e3508aeb33541239646538f8b2f8140f5854 --- /dev/null +++ b/model-00010-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11be0fc4c2acac94593b2b7759023f03f75312481c26b8f640b1eb0f497df8a +size 4933097760 diff --git a/model-00011-of-00086.safetensors b/model-00011-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91e41ec897a7edc959c1f6cf5cb62bfbea827393 --- /dev/null +++ b/model-00011-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5703c9ed58a935e98e08200cb4a046ab934ce1e61ec94cd7f87007b0d459ab1b +size 4631063752 diff --git a/model-00012-of-00086.safetensors b/model-00012-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2636f5f925ed561129cb93efba5028ec9a37825 --- /dev/null +++ b/model-00012-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ee5925adf43c1b48df67952a168ab7f9686e8058f77232c28d3749b28affd8 +size 4933097760 diff --git a/model-00013-of-00086.safetensors b/model-00013-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cda76d0bcde2ad1f26fdad23a53228de06b7f2ef --- /dev/null +++ b/model-00013-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f62da2b39ddf3e03dad1d69ba9d07ee1628d5d8e919c32b3da319025e5a208 +size 4631063752 diff --git a/model-00014-of-00086.safetensors b/model-00014-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..113af0d76a70c6eb77b706eba7d45ba103044f79 --- /dev/null +++ b/model-00014-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83522d2b72c0206f46165259e2a4ce38c4ea4195e58f7a94f58a291784127c8e +size 4933097760 diff --git a/model-00015-of-00086.safetensors b/model-00015-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e92f1476d9218b383d6d49cd51872c08eee7399c --- /dev/null +++ b/model-00015-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf7d08606906d3a4f279337fd77dc977b4fb772768df591a16e565e81d94f15 +size 4631063752 diff --git a/model-00016-of-00086.safetensors b/model-00016-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d96a3a50200148fca42b93d226339c091b68a25f --- /dev/null +++ b/model-00016-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db909fc808835052ada8ce62c4883d6e91d97c76808261f25314cd3d4f6c72d9 +size 4933097760 diff --git a/model-00017-of-00086.safetensors b/model-00017-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3deff7f878c18716a45c31c12867bcc48c8cb85 --- /dev/null +++ b/model-00017-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b63ca3326dbc7b6e0c72401985568f73f70117b592117c99d60e571873ee89 +size 4631063752 diff --git a/model-00018-of-00086.safetensors b/model-00018-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..263756f35e3f1649a769796b2a72baed2bd65969 --- /dev/null +++ b/model-00018-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc91c89700b618da56b35d9c577da406a9b1f8c5e2eeb4ac626baae6456fb5b +size 4933097760 diff --git a/model-00019-of-00086.safetensors b/model-00019-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..248c2b01c3a49ce993f3a1bdd327be0633e4fbec --- /dev/null +++ b/model-00019-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad952490a6c1cbe5355bc81bfdc1724df1f917e14af5a0f0351a56769e28d9a +size 4631063752 diff --git a/model-00020-of-00086.safetensors b/model-00020-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fedc2da61d2647baafda1467b2bc5c92e7f0e815 --- /dev/null +++ b/model-00020-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f550ab7daf8e3309a0107228dd44603a60f4a5ccdf7a05136e562acb41c44769 +size 4933097760 diff --git a/model-00021-of-00086.safetensors b/model-00021-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b3dab629834381346a3929f7c42e79e810e6287 --- /dev/null +++ b/model-00021-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c1427ea40221f24f7053444f698fbf592e23443ed20361070389526ff807fa +size 4631063752 diff --git a/model-00022-of-00086.safetensors b/model-00022-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b98e2dde056ed520d9bc371c51e5393e6e9efd73 --- /dev/null +++ b/model-00022-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5367ea72915f6ad1fb766c8c64508b0e4aa508d1c8bf9c757ebee5d2b7616d +size 4933097760 diff --git a/model-00023-of-00086.safetensors b/model-00023-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a753830a423f36286c1d9f5d0ea7e528da89c87 --- /dev/null +++ b/model-00023-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89943e2b47b0fc6d5751e2751f9911c434e7dc1b29f5c22b8733c35663f98155 +size 4631063752 diff --git a/model-00024-of-00086.safetensors b/model-00024-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0cf4a6adc57660357ce7fbe50ec9d7602353d1f --- /dev/null +++ b/model-00024-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e2ac30f1c4ec50d5ec0c7f553a3d439037fca9b6bf6c7a3f5d45603a80535c +size 4933097760 diff --git a/model-00025-of-00086.safetensors b/model-00025-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe38f635398633fd056cc89f75e565048aa91c9e --- /dev/null +++ b/model-00025-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7520becc7adf57f413c0aae817dd376ef9d44677593dbae6a7fbeb5785b9158 +size 4631063752 diff --git a/model-00026-of-00086.safetensors b/model-00026-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bc28bae4cf507126fb92356b7dfde483c15c96a --- /dev/null +++ b/model-00026-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82733a936d7b6a4855b9b482d8bb8d97a64dead51a75b8590325f354a0627148 +size 4933097760 diff --git a/model-00027-of-00086.safetensors b/model-00027-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28c0841318f26d70b62d42b355ac1bb9b9f3f2a8 --- /dev/null +++ b/model-00027-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552cb54964e90c3c64e83fb854d1052bfe4102ae0d592a86b4b94836e88f20af +size 4631063752 diff --git a/model-00028-of-00086.safetensors b/model-00028-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dcfa2bbd3c26016410b69b87cb89cc7b5d3fa71 --- /dev/null +++ b/model-00028-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469355551869c706d62b2e21f88fe91dc23772210fab3c8966f717309e99912f +size 4933097760 diff --git a/model-00029-of-00086.safetensors b/model-00029-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..add28346da3aa0ba8910f132f221293243658d76 --- /dev/null +++ b/model-00029-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8003feae6115d63f8733a149b2d1c6cec485ed0c7d6efa8d42445a766ff518d9 +size 4631063752 diff --git a/model-00030-of-00086.safetensors b/model-00030-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eafc88305450f3918fb8dee17586d393448ecee1 --- /dev/null +++ b/model-00030-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27d725b44fd676a1abbf2b4eb6dacfbd5e569a9c5aaf514da494b93dbd51cae +size 4933097760 diff --git a/model-00031-of-00086.safetensors b/model-00031-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1bad414869ba9ecdf35018bbe1d5cfafed18eee --- /dev/null +++ b/model-00031-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e85074cf2ddf3b389ab5e1209e30f78c923e4b12067c079ebfda002e059000e0 +size 4631063752 diff --git a/model-00032-of-00086.safetensors b/model-00032-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eea7e603d9250311f2f88bddf8b2212da78e6074 --- /dev/null +++ b/model-00032-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e2b804cd839226cde121188db91b8df9e91480f714f50ef799b3f32a6cc52b +size 4933097760 diff --git a/model-00033-of-00086.safetensors b/model-00033-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49af5e3c73a4648f04f3703338096e4f67469121 --- /dev/null +++ b/model-00033-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3771556183bff4b88e31c330961ec60134e10da85399f3f06bd09a7c4c74a2e1 +size 4631063752 diff --git a/model-00034-of-00086.safetensors b/model-00034-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0640e59b933404384cef3bff9e763cbae1c1319a --- /dev/null +++ b/model-00034-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4c20d53b2df17e8345bc3d0790db582b3bd973776c2f48c9af0f0dd77b5355 +size 4933097760 diff --git a/model-00035-of-00086.safetensors b/model-00035-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..494e50f8b184b13ba7bf02cfaad474856976c7d8 --- /dev/null +++ b/model-00035-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3703f68582b5d1a1cc2a07daac406fae2de8461b82107649d21c72b5a1676e +size 4631063752 diff --git a/model-00036-of-00086.safetensors b/model-00036-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24ecabf3a65099dfd587896d4bc581dc4c21aa15 --- /dev/null +++ b/model-00036-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37da0e9dffab3afaa333dd8a37bf5b6d58d9a2d1246a71922eb9ba6d9c599d41 +size 4933097760 diff --git a/model-00037-of-00086.safetensors b/model-00037-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f56371bc34849ca522542b1d9661cbaa0894be2 --- /dev/null +++ b/model-00037-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d25c55ae81d5b957e4a8a9de9141f3a8d8f70fd5e750afee3be9e6776c8c952 +size 4631063752 diff --git a/model-00038-of-00086.safetensors b/model-00038-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e3a36fe8157cbc2603175b0996882a939adf753 --- /dev/null +++ b/model-00038-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2f96ff6d58f958c9ea000e8a2ace9ef6b77f18fcc35b6fccaf6ad15496f8b0 +size 4933097760 diff --git a/model-00039-of-00086.safetensors b/model-00039-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf5baf37668c608aee41f749fef02817f84dcd0b --- /dev/null +++ b/model-00039-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ace23934e6bdd9eef92a2a2cf4b167daaebeabebab4cf0beac9700439e61b28 +size 4631063752 diff --git a/model-00040-of-00086.safetensors b/model-00040-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcf7eb28da8598db31e79049d070f0d050f384b0 --- /dev/null +++ b/model-00040-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8d4c7858612fcb756d6cc3840556707f54cab2247d862b1585cf94e8b97a88 +size 4933097760 diff --git a/model-00041-of-00086.safetensors b/model-00041-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ee924c04929593a327571a9f5cd5af44fe0da66 --- /dev/null +++ b/model-00041-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63b3e48dfd0957089d52239cb8fc10cacad15d2576a42e2e1c86dccdadf937e +size 4631063752 diff --git a/model-00042-of-00086.safetensors b/model-00042-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdb19042158ceea0484b5248d56c293a4077ec65 --- /dev/null +++ b/model-00042-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea4e39718586a49acced4bd73e48625fb04b4f36f129efc72091d574b645bbd +size 4933097760 diff --git a/model-00043-of-00086.safetensors b/model-00043-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05a6efcd490bad49e05c1a45c874dffe883b5a5b --- /dev/null +++ b/model-00043-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b854f3abadcad4e1e923d4a36abd1a1648234f14709f2866024f606c4faf0861 +size 4631063752 diff --git a/model-00044-of-00086.safetensors b/model-00044-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff1f89b15677c7527600c7197284f584098b2222 --- /dev/null +++ b/model-00044-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7773a9e84447e9dcab1317a6440b1700b0390885fb6fceec661d4579904c63ea +size 4933097760 diff --git a/model-00045-of-00086.safetensors b/model-00045-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b2200c7990eca85006c22f0e596e578ced6b2de --- /dev/null +++ b/model-00045-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33ccfc982a6917a8467245724f7b8c3214b7135da1c502e37c77ed6e235aff0 +size 4631063752 diff --git a/model-00046-of-00086.safetensors b/model-00046-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a234a4121ba62914c8f459c5ebad27d7b234acb4 --- /dev/null +++ b/model-00046-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f68c8376c585271e0a60da98e69d5501f33a946004fff0c305617e1e4135de +size 4933097760 diff --git a/model-00047-of-00086.safetensors b/model-00047-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..592cb8dbc934277b132080726e4b8dcbc5cbff89 --- /dev/null +++ b/model-00047-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56183217745985c16f12c7e2307334713b485973656d8590b27182e83b6b0da0 +size 4631063752 diff --git a/model-00048-of-00086.safetensors b/model-00048-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67b7b65ba74cc68c63801f4a38e874e771e6bc67 --- /dev/null +++ b/model-00048-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9e24f66e77f359125eed91156c8e15a6e40b12ce77c4eb9d44f4bb379036c5 +size 4933097760 diff --git a/model-00049-of-00086.safetensors b/model-00049-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6fd2bd4d8a2e61bc7e00af7117cc0964066419f --- /dev/null +++ b/model-00049-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b56c07031b82eb4626b3e17e85189df26c0327a81df0e68a99de95324f0e69 +size 4631063752 diff --git a/model-00050-of-00086.safetensors b/model-00050-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48cbe04560303ac0441d83c9739a56dcf022adde --- /dev/null +++ b/model-00050-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3c4ecf31c6bc120053ce7129f96859e54e64559d9aa0f1437c5d15c4ff0366 +size 4933097760 diff --git a/model-00051-of-00086.safetensors b/model-00051-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1524a53d829c86cbd16babedb9805c4aeb8621b3 --- /dev/null +++ b/model-00051-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d66a5c69c3b17c1a8ad1c938e1b837b2e212aabf5595e28fa5a9ec5dcd0af5f +size 4631063752 diff --git a/model-00052-of-00086.safetensors b/model-00052-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff73c83debf96f163c6890c66ee6280963e198d6 --- /dev/null +++ b/model-00052-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fdf6d9622719950505f635d06d5808615cbb05fac4333640510adca73bf12c3 +size 4933097760 diff --git a/model-00053-of-00086.safetensors b/model-00053-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..faddce5b0521b16eaa766c92e343baa69e44e68d --- /dev/null +++ b/model-00053-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c34b34af87b17dba4bcd18655f75f9b68d8361884c9827f23c4c6e36adf85d6 +size 4631063752 diff --git a/model-00054-of-00086.safetensors b/model-00054-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a83fe8d85b602a6cf51f788b23e2bae8153561f4 --- /dev/null +++ b/model-00054-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62b661e398359ad9c2e5ff30581c6f2a0d4c496fd340cb8ff6fb8d9aaeee72b +size 4933097760 diff --git a/model-00055-of-00086.safetensors b/model-00055-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9d291943c8f5796c856c6d370eb06d98dc476f8 --- /dev/null +++ b/model-00055-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d35a7c9618d4af821df48a17437684d76bca0013310ee427cdc74584e6df3dd +size 4631063752 diff --git a/model-00056-of-00086.safetensors b/model-00056-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..745401b7c71421fe4cf0e7168139e13d10722c54 --- /dev/null +++ b/model-00056-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b519af8f4343afe466219d422e28c214aeee56df5d64723a3b0cf0ddc71cc5d4 +size 4933097760 diff --git a/model-00057-of-00086.safetensors b/model-00057-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20c3944555c000bca5f8e949d951b043eb427194 --- /dev/null +++ b/model-00057-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84f6584484089b81a3cea7cc3660006f3728e67f6b82f16e3f7d354a50e9ded +size 4631063752 diff --git a/model-00058-of-00086.safetensors b/model-00058-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23938f8ed221e097628e990346cc93341d85c8d6 --- /dev/null +++ b/model-00058-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6caff5ac94b014f8d836cce48ccfd1988d44d63a91fc6fa01f21b0e3d050f340 +size 4933097760 diff --git a/model-00059-of-00086.safetensors b/model-00059-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36ee5bfbe9551909f9f7218faeba93888e3dffb5 --- /dev/null +++ b/model-00059-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2f42639f6405430e44eff7941b49ee6e34869da1525e1201467a99a6b73a60 +size 4631063752 diff --git a/model-00060-of-00086.safetensors b/model-00060-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7968648555d7dab711168e2df6347c4345df1858 --- /dev/null +++ b/model-00060-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2607d33012b88083e830b1cd38b83aa672de0b2882ac96f309a4f3aa4059ebb1 +size 4933097760 diff --git a/model-00061-of-00086.safetensors b/model-00061-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdd53d4d38a24cca602fdaf50b44d9ecadfb8bbb --- /dev/null +++ b/model-00061-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3936f8bd2bee4afff69b681e4ff4680a0fc5d1e1f5a96626d95c767cc26b1dfa +size 4631063752 diff --git a/model-00062-of-00086.safetensors b/model-00062-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a25471bc4fdf497254b6c93f478c12a98ab11df --- /dev/null +++ b/model-00062-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4b5c36d0942d63dcc908ab8fb943ff3243756e859450cb6d1dc3fe02de4305 +size 4933097760 diff --git a/model-00063-of-00086.safetensors b/model-00063-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ccdbff763b13cf5537ba6732795acc78fa7980b --- /dev/null +++ b/model-00063-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a21abe7120971dd50930fa5a93030a0c16a911113d130ae05132a351882a9b +size 4631063752 diff --git a/model-00064-of-00086.safetensors b/model-00064-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db6c2f34d09ee4759a62141c8cd15036f29764a3 --- /dev/null +++ b/model-00064-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06e720e6dca0ee5a021c8263dc636d665d5bd998273a09fcef3000e29b1b3c0 +size 4933097760 diff --git a/model-00065-of-00086.safetensors b/model-00065-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6065f51dd0e8864f6629ceeddb10027b34e0181 --- /dev/null +++ b/model-00065-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5318cec62c9bf43b14aa856a56ecaedff1ed97394bfc6490927e4c6eb903114b +size 4631063752 diff --git a/model-00066-of-00086.safetensors b/model-00066-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f88066885eb38a7821b06ffdc297ffee6b59e50a --- /dev/null +++ b/model-00066-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27642b292a4c5c4dad7f4543b79819644f3b599ebfccdff403efac8c9b1a209d +size 4933097760 diff --git a/model-00067-of-00086.safetensors b/model-00067-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..898f4c5ab97dca4b9bd6fa0d4c039d9232399132 --- /dev/null +++ b/model-00067-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b624e660b7a11a9c377beaeaad37ce344dc2483fdf22d9c5899e654f253cc673 +size 4631063752 diff --git a/model-00068-of-00086.safetensors b/model-00068-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c41d1a8a5a5ac03ca8d9484a6443f6ba8cd8c81 --- /dev/null +++ b/model-00068-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc62a71c95e0179c5fff58183be590717f9eb02560016f4557ad748abdc828e +size 4933097776 diff --git a/model-00069-of-00086.safetensors b/model-00069-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..355c395457125a13a08d31f78f81b8f37fa886f1 --- /dev/null +++ b/model-00069-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ef36f1dc36b3bb27fbf118ba51648a4321e6c0aaa9b7549eec37b66e0db120 +size 4631063784 diff --git a/model-00070-of-00086.safetensors b/model-00070-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cc9516290381627728cf9a0f9ef74a0e52e62bd --- /dev/null +++ b/model-00070-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f376c049bec0d8e8e700bae712a9411d4979ebb37d7c315c03c1ab793a05839 +size 4933097784 diff --git a/model-00071-of-00086.safetensors b/model-00071-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12476bd3aeb06ec982190437f7441b42f6049d99 --- /dev/null +++ b/model-00071-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768e514404b2c1350e8f4b53d772761e51937bc2278eb6147e7ccc70da748ecb +size 4631063784 diff --git a/model-00072-of-00086.safetensors b/model-00072-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f12a864ed578c46066b2a0064f112a4a49b09c8 --- /dev/null +++ b/model-00072-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f0927aa31641eb3a22a42029d8af6db7cd2e33fb286b8a5dad19366f5f2c03 +size 4933097784 diff --git a/model-00073-of-00086.safetensors b/model-00073-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e14343eacd9a5af5f8f2fef7e4c1f863abf465f1 --- /dev/null +++ b/model-00073-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d8cfc94f2a8efa24b77e2fcfb02e80c4c2945df0e2fa6c948f9282dd67be0f +size 4631063784 diff --git a/model-00074-of-00086.safetensors b/model-00074-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..525c9cbe836fb1f222e366d8530f0d2f1d3f65b3 --- /dev/null +++ b/model-00074-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b2fb97bd50fdb59f2d8d771fbf4e220eb7dd7dbb8ffdb5880802bea62916b76 +size 4933097784 diff --git a/model-00075-of-00086.safetensors b/model-00075-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..644607e4b3755c42aa8e60c52a3d76dc0bda53d0 --- /dev/null +++ b/model-00075-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91815939d8e1f68cbbd5bae38fdbbe7a8b699614249d877c8f5b120c7786ea7e +size 4631063784 diff --git a/model-00076-of-00086.safetensors b/model-00076-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0bec8f411147d22518ceb71660e75080ef75211 --- /dev/null +++ b/model-00076-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9b1bf23296548605d26bd7e4ef020da329f485b2b28d80c9a35f865347f7ed +size 4933097784 diff --git a/model-00077-of-00086.safetensors b/model-00077-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2930e7763ee47e73aec53414b4fbabe896b9eb1c --- /dev/null +++ b/model-00077-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57136b7716dcbf57cf81c97a6ad7bf4b5b771b80b444150ff435937b65d215c +size 4631063784 diff --git a/model-00078-of-00086.safetensors b/model-00078-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..935c68dfec36e3965fdb508c324c5199a6ae491e --- /dev/null +++ b/model-00078-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44014d8baea460ea3bf524a2287e847d1c4be8d32fc672842c0440890266f994 +size 4933097784 diff --git a/model-00079-of-00086.safetensors b/model-00079-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a9e405897eda9f848de47d9612edf3a8a736408 --- /dev/null +++ b/model-00079-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d990653d6fe2e3536c5af61feecd029e89a019bc2e7e8fae1995c585ff2a2bd +size 4631063784 diff --git a/model-00080-of-00086.safetensors b/model-00080-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f81b3c6246b8f1a0c1a76058e87eed8029bc0c4 --- /dev/null +++ b/model-00080-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b4cb7b2852a8b32ed333945ccc7ff68fb3069aade6d5b4094e18b65e66d0c9 +size 4933097784 diff --git a/model-00081-of-00086.safetensors b/model-00081-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dda0fc40f05b59dda9cb72f42700105adb919cad --- /dev/null +++ b/model-00081-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106343a9669c2e4e16e5723d1fad7b26090dda63131c1021f2b298e3ef36d6e0 +size 4631063784 diff --git a/model-00082-of-00086.safetensors b/model-00082-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c3b9f08ebd384984f9bfc43f5e408ce7044ea46 --- /dev/null +++ b/model-00082-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e09c39733ac1ec2c8cec9287cc0560eab8f8b81cb7e47513d4b5d474b143f1 +size 4933097784 diff --git a/model-00083-of-00086.safetensors b/model-00083-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd148ee0f8ff77f5ad99dfb5b9018029be716483 --- /dev/null +++ b/model-00083-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776b71e4ff42d84657cf4e0cac7ca20134be4f4790ec8132859fa810e8b59322 +size 4631063784 diff --git a/model-00084-of-00086.safetensors b/model-00084-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e57492f5aaf87966c153d8abb987cd9b37dc8a46 --- /dev/null +++ b/model-00084-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289fdb74a9f1341f7b75765b02cd2bd8857070468ebfb8c8a49cd81b8a2d3add +size 4933097784 diff --git a/model-00085-of-00086.safetensors b/model-00085-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bdacf61149116ab98f1835beae4cec58d7df3bb --- /dev/null +++ b/model-00085-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d87aa1e02b15cedeccd2a7d80dedec4f0f91e9ed97c849ac9adcefaebfba3f3 +size 4060600704 diff --git a/model-00086-of-00086.safetensors b/model-00086-of-00086.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82cf784192e3662f9b355f67e009a7d7ea3a0f4b --- /dev/null +++ b/model-00086-of-00086.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2e4187e1b1bf25f455a530fd8f2e90f78e79391ce55400a1a6d9629b30522e +size 4202954880 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..b4c6ffcf2f3ada848da073ec381f4f41d7991e6c --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2026 @@ +{ + "metadata": { + "total_size": 410100490240 + }, + "weight_map": { + "lm_head.weight": "model-00086-of-00086.safetensors", + "model.embed_tokens.weight": "model-00001-of-00086.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00086.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.0.mlp.down_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.0.mlp.gate_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.0.mlp.up_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00086.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.o_proj.weight_scale": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.v_proj.weight_scale": "model-00001-of-00086.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00086.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.1.mlp.down_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.1.mlp.gate_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.1.mlp.up_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00086.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.k_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.o_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.q_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.v_proj.weight_scale": "model-00002-of-00086.safetensors", + "model.layers.10.input_layernorm.weight": "model-00009-of-00086.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.10.mlp.down_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.10.mlp.gate_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.10.mlp.up_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00086.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.k_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.o_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.q_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.v_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.100.input_layernorm.weight": "model-00069-of-00086.safetensors", + "model.layers.100.mlp.down_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.100.mlp.down_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.100.mlp.gate_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.100.mlp.gate_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.100.mlp.up_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.100.mlp.up_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.100.post_attention_layernorm.weight": "model-00069-of-00086.safetensors", + "model.layers.100.self_attn.k_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.k_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.o_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.o_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.q_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.q_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.v_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.v_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.101.input_layernorm.weight": "model-00069-of-00086.safetensors", + "model.layers.101.mlp.down_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.mlp.down_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.101.mlp.gate_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.mlp.gate_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.101.mlp.up_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.mlp.up_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.101.post_attention_layernorm.weight": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.k_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.k_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.o_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.o_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.q_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.q_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.v_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.v_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.102.input_layernorm.weight": "model-00070-of-00086.safetensors", + "model.layers.102.mlp.down_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.102.mlp.down_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.102.mlp.gate_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.102.mlp.gate_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.102.mlp.up_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.102.mlp.up_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.102.post_attention_layernorm.weight": "model-00070-of-00086.safetensors", + "model.layers.102.self_attn.k_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.k_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.o_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.o_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.q_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.q_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.v_proj.weight": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.v_proj.weight_scale": "model-00069-of-00086.safetensors", + "model.layers.103.input_layernorm.weight": "model-00071-of-00086.safetensors", + "model.layers.103.mlp.down_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.103.mlp.down_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.103.mlp.gate_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.103.mlp.gate_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.103.mlp.up_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.103.mlp.up_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.103.post_attention_layernorm.weight": "model-00071-of-00086.safetensors", + "model.layers.103.self_attn.k_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.k_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.o_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.o_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.q_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.q_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.v_proj.weight": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.v_proj.weight_scale": "model-00070-of-00086.safetensors", + "model.layers.104.input_layernorm.weight": "model-00071-of-00086.safetensors", + "model.layers.104.mlp.down_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.mlp.down_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.104.mlp.gate_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.mlp.gate_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.104.mlp.up_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.mlp.up_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.104.post_attention_layernorm.weight": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.k_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.k_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.o_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.o_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.q_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.q_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.v_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.v_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.105.input_layernorm.weight": "model-00072-of-00086.safetensors", + "model.layers.105.mlp.down_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.105.mlp.down_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.105.mlp.gate_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.105.mlp.gate_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.105.mlp.up_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.105.mlp.up_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.105.post_attention_layernorm.weight": "model-00072-of-00086.safetensors", + "model.layers.105.self_attn.k_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.k_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.o_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.o_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.q_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.q_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.v_proj.weight": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.v_proj.weight_scale": "model-00071-of-00086.safetensors", + "model.layers.106.input_layernorm.weight": "model-00073-of-00086.safetensors", + "model.layers.106.mlp.down_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.106.mlp.down_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.106.mlp.gate_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.106.mlp.gate_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.106.mlp.up_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.106.mlp.up_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.106.post_attention_layernorm.weight": "model-00073-of-00086.safetensors", + "model.layers.106.self_attn.k_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.k_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.o_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.o_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.q_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.q_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.v_proj.weight": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.v_proj.weight_scale": "model-00072-of-00086.safetensors", + "model.layers.107.input_layernorm.weight": "model-00073-of-00086.safetensors", + "model.layers.107.mlp.down_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.mlp.down_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.107.mlp.gate_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.mlp.gate_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.107.mlp.up_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.mlp.up_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.107.post_attention_layernorm.weight": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.k_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.k_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.o_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.o_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.q_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.q_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.v_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.v_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.108.input_layernorm.weight": "model-00074-of-00086.safetensors", + "model.layers.108.mlp.down_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.108.mlp.down_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.108.mlp.gate_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.108.mlp.gate_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.108.mlp.up_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.108.mlp.up_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.108.post_attention_layernorm.weight": "model-00074-of-00086.safetensors", + "model.layers.108.self_attn.k_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.k_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.o_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.o_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.q_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.q_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.v_proj.weight": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.v_proj.weight_scale": "model-00073-of-00086.safetensors", + "model.layers.109.input_layernorm.weight": "model-00075-of-00086.safetensors", + "model.layers.109.mlp.down_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.109.mlp.down_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.109.mlp.gate_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.109.mlp.gate_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.109.mlp.up_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.109.mlp.up_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.109.post_attention_layernorm.weight": "model-00075-of-00086.safetensors", + "model.layers.109.self_attn.k_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.k_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.o_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.o_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.q_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.q_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.v_proj.weight": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.v_proj.weight_scale": "model-00074-of-00086.safetensors", + "model.layers.11.input_layernorm.weight": "model-00009-of-00086.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.mlp.down_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.mlp.gate_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.mlp.up_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.k_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.o_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.q_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.v_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.110.input_layernorm.weight": "model-00075-of-00086.safetensors", + "model.layers.110.mlp.down_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.mlp.down_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.110.mlp.gate_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.mlp.gate_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.110.mlp.up_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.mlp.up_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.110.post_attention_layernorm.weight": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.k_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.k_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.o_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.o_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.q_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.q_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.v_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.v_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.111.input_layernorm.weight": "model-00076-of-00086.safetensors", + "model.layers.111.mlp.down_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.111.mlp.down_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.111.mlp.gate_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.111.mlp.gate_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.111.mlp.up_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.111.mlp.up_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.111.post_attention_layernorm.weight": "model-00076-of-00086.safetensors", + "model.layers.111.self_attn.k_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.k_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.o_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.o_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.q_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.q_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.v_proj.weight": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.v_proj.weight_scale": "model-00075-of-00086.safetensors", + "model.layers.112.input_layernorm.weight": "model-00077-of-00086.safetensors", + "model.layers.112.mlp.down_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.112.mlp.down_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.112.mlp.gate_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.112.mlp.gate_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.112.mlp.up_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.112.mlp.up_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.112.post_attention_layernorm.weight": "model-00077-of-00086.safetensors", + "model.layers.112.self_attn.k_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.k_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.o_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.o_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.q_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.q_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.v_proj.weight": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.v_proj.weight_scale": "model-00076-of-00086.safetensors", + "model.layers.113.input_layernorm.weight": "model-00077-of-00086.safetensors", + "model.layers.113.mlp.down_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.mlp.down_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.113.mlp.gate_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.mlp.gate_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.113.mlp.up_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.mlp.up_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.113.post_attention_layernorm.weight": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.k_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.k_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.o_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.o_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.q_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.q_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.v_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.v_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.114.input_layernorm.weight": "model-00078-of-00086.safetensors", + "model.layers.114.mlp.down_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.114.mlp.down_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.114.mlp.gate_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.114.mlp.gate_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.114.mlp.up_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.114.mlp.up_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.114.post_attention_layernorm.weight": "model-00078-of-00086.safetensors", + "model.layers.114.self_attn.k_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.k_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.o_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.o_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.q_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.q_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.v_proj.weight": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.v_proj.weight_scale": "model-00077-of-00086.safetensors", + "model.layers.115.input_layernorm.weight": "model-00079-of-00086.safetensors", + "model.layers.115.mlp.down_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.115.mlp.down_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.115.mlp.gate_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.115.mlp.gate_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.115.mlp.up_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.115.mlp.up_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.115.post_attention_layernorm.weight": "model-00079-of-00086.safetensors", + "model.layers.115.self_attn.k_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.k_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.o_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.o_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.q_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.q_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.v_proj.weight": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.v_proj.weight_scale": "model-00078-of-00086.safetensors", + "model.layers.116.input_layernorm.weight": "model-00079-of-00086.safetensors", + "model.layers.116.mlp.down_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.mlp.down_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.116.mlp.gate_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.mlp.gate_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.116.mlp.up_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.mlp.up_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.116.post_attention_layernorm.weight": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.k_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.k_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.o_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.o_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.q_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.q_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.v_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.v_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.117.input_layernorm.weight": "model-00080-of-00086.safetensors", + "model.layers.117.mlp.down_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.117.mlp.down_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.117.mlp.gate_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.117.mlp.gate_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.117.mlp.up_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.117.mlp.up_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.117.post_attention_layernorm.weight": "model-00080-of-00086.safetensors", + "model.layers.117.self_attn.k_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.k_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.o_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.o_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.q_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.q_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.v_proj.weight": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.v_proj.weight_scale": "model-00079-of-00086.safetensors", + "model.layers.118.input_layernorm.weight": "model-00081-of-00086.safetensors", + "model.layers.118.mlp.down_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.118.mlp.down_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.118.mlp.gate_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.118.mlp.gate_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.118.mlp.up_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.118.mlp.up_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.118.post_attention_layernorm.weight": "model-00081-of-00086.safetensors", + "model.layers.118.self_attn.k_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.k_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.o_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.o_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.q_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.q_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.v_proj.weight": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.v_proj.weight_scale": "model-00080-of-00086.safetensors", + "model.layers.119.input_layernorm.weight": "model-00081-of-00086.safetensors", + "model.layers.119.mlp.down_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.mlp.down_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.119.mlp.gate_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.mlp.gate_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.119.mlp.up_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.mlp.up_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.119.post_attention_layernorm.weight": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.k_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.k_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.o_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.o_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.q_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.q_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.v_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.v_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.12.input_layernorm.weight": "model-00010-of-00086.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.12.mlp.down_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.12.mlp.gate_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.12.mlp.up_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00086.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.k_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.o_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.q_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.v_proj.weight_scale": "model-00009-of-00086.safetensors", + "model.layers.120.input_layernorm.weight": "model-00082-of-00086.safetensors", + "model.layers.120.mlp.down_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.120.mlp.down_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.120.mlp.gate_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.120.mlp.gate_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.120.mlp.up_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.120.mlp.up_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.120.post_attention_layernorm.weight": "model-00082-of-00086.safetensors", + "model.layers.120.self_attn.k_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.k_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.o_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.o_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.q_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.q_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.v_proj.weight": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.v_proj.weight_scale": "model-00081-of-00086.safetensors", + "model.layers.121.input_layernorm.weight": "model-00083-of-00086.safetensors", + "model.layers.121.mlp.down_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.121.mlp.down_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.121.mlp.gate_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.121.mlp.gate_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.121.mlp.up_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.121.mlp.up_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.121.post_attention_layernorm.weight": "model-00083-of-00086.safetensors", + "model.layers.121.self_attn.k_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.k_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.o_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.o_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.q_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.q_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.v_proj.weight": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.v_proj.weight_scale": "model-00082-of-00086.safetensors", + "model.layers.122.input_layernorm.weight": "model-00083-of-00086.safetensors", + "model.layers.122.mlp.down_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.mlp.down_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.122.mlp.gate_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.mlp.gate_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.122.mlp.up_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.mlp.up_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.122.post_attention_layernorm.weight": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.k_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.k_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.o_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.o_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.q_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.q_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.v_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.v_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.123.input_layernorm.weight": "model-00084-of-00086.safetensors", + "model.layers.123.mlp.down_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.123.mlp.down_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.123.mlp.gate_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.123.mlp.gate_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.123.mlp.up_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.123.mlp.up_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.123.post_attention_layernorm.weight": "model-00084-of-00086.safetensors", + "model.layers.123.self_attn.k_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.k_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.o_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.o_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.q_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.q_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.v_proj.weight": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.v_proj.weight_scale": "model-00083-of-00086.safetensors", + "model.layers.124.input_layernorm.weight": "model-00085-of-00086.safetensors", + "model.layers.124.mlp.down_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.124.mlp.down_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.124.mlp.gate_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.124.mlp.gate_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.124.mlp.up_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.124.mlp.up_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.124.post_attention_layernorm.weight": "model-00085-of-00086.safetensors", + "model.layers.124.self_attn.k_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.k_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.o_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.o_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.q_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.q_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.v_proj.weight": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.v_proj.weight_scale": "model-00084-of-00086.safetensors", + "model.layers.125.input_layernorm.weight": "model-00085-of-00086.safetensors", + "model.layers.125.mlp.down_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.mlp.down_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.125.mlp.gate_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.mlp.gate_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.125.mlp.up_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.mlp.up_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.125.post_attention_layernorm.weight": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.k_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.k_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.o_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.o_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.q_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.q_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.v_proj.weight": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.v_proj.weight_scale": "model-00085-of-00086.safetensors", + "model.layers.13.input_layernorm.weight": "model-00011-of-00086.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.13.mlp.down_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.13.mlp.gate_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.13.mlp.up_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00011-of-00086.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.k_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.o_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.q_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.v_proj.weight_scale": "model-00010-of-00086.safetensors", + "model.layers.14.input_layernorm.weight": "model-00011-of-00086.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.mlp.down_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.mlp.gate_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.mlp.up_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.k_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.o_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.q_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.v_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.15.input_layernorm.weight": "model-00012-of-00086.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.15.mlp.down_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.15.mlp.gate_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.15.mlp.up_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00012-of-00086.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.k_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.o_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.q_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.v_proj.weight_scale": "model-00011-of-00086.safetensors", + "model.layers.16.input_layernorm.weight": "model-00013-of-00086.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.16.mlp.down_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.16.mlp.gate_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.16.mlp.up_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00086.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.k_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.o_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.q_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.v_proj.weight_scale": "model-00012-of-00086.safetensors", + "model.layers.17.input_layernorm.weight": "model-00013-of-00086.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.mlp.down_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.mlp.gate_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.mlp.up_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.k_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.o_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.q_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.v_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.18.input_layernorm.weight": "model-00014-of-00086.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.18.mlp.down_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.18.mlp.gate_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.18.mlp.up_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00014-of-00086.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.k_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.o_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.q_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.v_proj.weight_scale": "model-00013-of-00086.safetensors", + "model.layers.19.input_layernorm.weight": "model-00015-of-00086.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.19.mlp.down_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.19.mlp.gate_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.19.mlp.up_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00015-of-00086.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.k_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.o_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.q_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.v_proj.weight_scale": "model-00014-of-00086.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00086.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.mlp.down_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.mlp.gate_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.mlp.up_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.k_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.o_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.q_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.v_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.20.input_layernorm.weight": "model-00015-of-00086.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.mlp.down_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.mlp.gate_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.mlp.up_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.k_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.o_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.q_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.v_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.21.input_layernorm.weight": "model-00016-of-00086.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.21.mlp.down_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.21.mlp.gate_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.21.mlp.up_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00016-of-00086.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.k_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.o_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.q_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.v_proj.weight_scale": "model-00015-of-00086.safetensors", + "model.layers.22.input_layernorm.weight": "model-00017-of-00086.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.22.mlp.down_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.22.mlp.gate_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.22.mlp.up_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00017-of-00086.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.k_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.o_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.q_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.v_proj.weight_scale": "model-00016-of-00086.safetensors", + "model.layers.23.input_layernorm.weight": "model-00017-of-00086.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.mlp.down_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.mlp.gate_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.mlp.up_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.k_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.o_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.q_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.v_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.24.input_layernorm.weight": "model-00018-of-00086.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.24.mlp.down_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.24.mlp.gate_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.24.mlp.up_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00018-of-00086.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.k_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.o_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.q_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.v_proj.weight_scale": "model-00017-of-00086.safetensors", + "model.layers.25.input_layernorm.weight": "model-00019-of-00086.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.25.mlp.down_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.25.mlp.gate_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.25.mlp.up_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00019-of-00086.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.k_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.o_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.q_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.v_proj.weight_scale": "model-00018-of-00086.safetensors", + "model.layers.26.input_layernorm.weight": "model-00019-of-00086.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.mlp.down_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.mlp.gate_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.mlp.up_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.k_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.o_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.q_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.v_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.27.input_layernorm.weight": "model-00020-of-00086.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.27.mlp.down_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.27.mlp.gate_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.27.mlp.up_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00020-of-00086.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.k_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.o_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.q_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.v_proj.weight_scale": "model-00019-of-00086.safetensors", + "model.layers.28.input_layernorm.weight": "model-00021-of-00086.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.28.mlp.down_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.28.mlp.gate_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.28.mlp.up_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00021-of-00086.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.k_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.o_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.q_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.v_proj.weight_scale": "model-00020-of-00086.safetensors", + "model.layers.29.input_layernorm.weight": "model-00021-of-00086.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.mlp.down_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.mlp.gate_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.mlp.up_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.k_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.o_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.q_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.v_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00086.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.3.mlp.down_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.3.mlp.gate_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.3.mlp.up_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00086.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.k_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.o_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.q_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.v_proj.weight_scale": "model-00003-of-00086.safetensors", + "model.layers.30.input_layernorm.weight": "model-00022-of-00086.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.30.mlp.down_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.30.mlp.gate_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.30.mlp.up_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00022-of-00086.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.k_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.o_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.q_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.v_proj.weight_scale": "model-00021-of-00086.safetensors", + "model.layers.31.input_layernorm.weight": "model-00023-of-00086.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.31.mlp.down_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.31.mlp.gate_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.31.mlp.up_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00023-of-00086.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.k_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.o_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.q_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.v_proj.weight_scale": "model-00022-of-00086.safetensors", + "model.layers.32.input_layernorm.weight": "model-00023-of-00086.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.mlp.down_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.mlp.gate_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.mlp.up_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.k_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.o_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.q_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.v_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.33.input_layernorm.weight": "model-00024-of-00086.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.33.mlp.down_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.33.mlp.gate_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.33.mlp.up_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00024-of-00086.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.k_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.o_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.q_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.v_proj.weight_scale": "model-00023-of-00086.safetensors", + "model.layers.34.input_layernorm.weight": "model-00025-of-00086.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.34.mlp.down_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.34.mlp.gate_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.34.mlp.up_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00025-of-00086.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.k_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.o_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.q_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.v_proj.weight_scale": "model-00024-of-00086.safetensors", + "model.layers.35.input_layernorm.weight": "model-00025-of-00086.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.mlp.down_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.mlp.gate_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.mlp.up_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.k_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.o_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.q_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.v_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.36.input_layernorm.weight": "model-00026-of-00086.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.36.mlp.down_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.36.mlp.gate_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.36.mlp.up_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00026-of-00086.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.k_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.o_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.q_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.v_proj.weight_scale": "model-00025-of-00086.safetensors", + "model.layers.37.input_layernorm.weight": "model-00027-of-00086.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.37.mlp.down_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.37.mlp.gate_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.37.mlp.up_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00027-of-00086.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.k_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.o_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.q_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.v_proj.weight_scale": "model-00026-of-00086.safetensors", + "model.layers.38.input_layernorm.weight": "model-00027-of-00086.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.mlp.down_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.mlp.gate_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.mlp.up_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.k_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.o_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.q_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.v_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.39.input_layernorm.weight": "model-00028-of-00086.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.39.mlp.down_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.39.mlp.gate_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.39.mlp.up_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00028-of-00086.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.k_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.o_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.q_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.v_proj.weight_scale": "model-00027-of-00086.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00086.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.4.mlp.down_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.4.mlp.gate_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.4.mlp.up_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00086.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.k_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.o_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.q_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.v_proj.weight_scale": "model-00004-of-00086.safetensors", + "model.layers.40.input_layernorm.weight": "model-00029-of-00086.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.40.mlp.down_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.40.mlp.gate_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.40.mlp.up_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00029-of-00086.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.k_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.o_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.q_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.v_proj.weight_scale": "model-00028-of-00086.safetensors", + "model.layers.41.input_layernorm.weight": "model-00029-of-00086.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.mlp.down_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.mlp.gate_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.mlp.up_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.k_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.o_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.q_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.v_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.42.input_layernorm.weight": "model-00030-of-00086.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.42.mlp.down_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.42.mlp.gate_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.42.mlp.up_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00030-of-00086.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.k_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.o_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.q_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.v_proj.weight_scale": "model-00029-of-00086.safetensors", + "model.layers.43.input_layernorm.weight": "model-00031-of-00086.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.43.mlp.down_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.43.mlp.gate_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.43.mlp.up_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00031-of-00086.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.k_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.o_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.q_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.v_proj.weight_scale": "model-00030-of-00086.safetensors", + "model.layers.44.input_layernorm.weight": "model-00031-of-00086.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.mlp.down_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.mlp.gate_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.mlp.up_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.k_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.o_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.q_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.v_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.45.input_layernorm.weight": "model-00032-of-00086.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.45.mlp.down_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.45.mlp.gate_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.45.mlp.up_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00032-of-00086.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.k_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.o_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.q_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.v_proj.weight_scale": "model-00031-of-00086.safetensors", + "model.layers.46.input_layernorm.weight": "model-00033-of-00086.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.46.mlp.down_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.46.mlp.gate_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.46.mlp.up_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00033-of-00086.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.k_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.o_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.q_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.v_proj.weight_scale": "model-00032-of-00086.safetensors", + "model.layers.47.input_layernorm.weight": "model-00033-of-00086.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.mlp.down_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.mlp.gate_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.mlp.up_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.k_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.o_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.q_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.v_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.48.input_layernorm.weight": "model-00034-of-00086.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.48.mlp.down_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.48.mlp.gate_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.48.mlp.up_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00034-of-00086.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.k_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.o_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.q_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.v_proj.weight_scale": "model-00033-of-00086.safetensors", + "model.layers.49.input_layernorm.weight": "model-00035-of-00086.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.49.mlp.down_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.49.mlp.gate_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.49.mlp.up_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00035-of-00086.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.k_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.o_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.q_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.v_proj.weight_scale": "model-00034-of-00086.safetensors", + "model.layers.5.input_layernorm.weight": "model-00005-of-00086.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.mlp.down_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.mlp.gate_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.mlp.up_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.k_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.o_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.q_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.v_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.50.input_layernorm.weight": "model-00035-of-00086.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.mlp.down_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.mlp.gate_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.mlp.up_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.k_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.o_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.q_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.v_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.51.input_layernorm.weight": "model-00036-of-00086.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.51.mlp.down_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.51.mlp.gate_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.51.mlp.up_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00036-of-00086.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.k_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.o_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.q_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.v_proj.weight_scale": "model-00035-of-00086.safetensors", + "model.layers.52.input_layernorm.weight": "model-00037-of-00086.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.52.mlp.down_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.52.mlp.gate_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.52.mlp.up_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00037-of-00086.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.k_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.o_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.q_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.v_proj.weight_scale": "model-00036-of-00086.safetensors", + "model.layers.53.input_layernorm.weight": "model-00037-of-00086.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.mlp.down_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.mlp.gate_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.mlp.up_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.k_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.o_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.q_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.v_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.54.input_layernorm.weight": "model-00038-of-00086.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.54.mlp.down_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.54.mlp.gate_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.54.mlp.up_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00038-of-00086.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.k_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.o_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.q_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.v_proj.weight_scale": "model-00037-of-00086.safetensors", + "model.layers.55.input_layernorm.weight": "model-00039-of-00086.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.55.mlp.down_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.55.mlp.gate_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.55.mlp.up_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00039-of-00086.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.k_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.o_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.q_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.v_proj.weight_scale": "model-00038-of-00086.safetensors", + "model.layers.56.input_layernorm.weight": "model-00039-of-00086.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.mlp.down_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.mlp.gate_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.mlp.up_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.k_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.o_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.q_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.v_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.57.input_layernorm.weight": "model-00040-of-00086.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.57.mlp.down_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.57.mlp.gate_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.57.mlp.up_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00040-of-00086.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.k_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.o_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.q_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.v_proj.weight_scale": "model-00039-of-00086.safetensors", + "model.layers.58.input_layernorm.weight": "model-00041-of-00086.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.58.mlp.down_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.58.mlp.gate_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.58.mlp.up_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00041-of-00086.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.k_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.o_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.q_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.v_proj.weight_scale": "model-00040-of-00086.safetensors", + "model.layers.59.input_layernorm.weight": "model-00041-of-00086.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.mlp.down_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.mlp.gate_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.mlp.up_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.k_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.o_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.q_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.v_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.6.input_layernorm.weight": "model-00006-of-00086.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.6.mlp.down_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.6.mlp.gate_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.6.mlp.up_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00086.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.k_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.o_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.q_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.v_proj.weight_scale": "model-00005-of-00086.safetensors", + "model.layers.60.input_layernorm.weight": "model-00042-of-00086.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.60.mlp.down_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.60.mlp.gate_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.60.mlp.up_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00042-of-00086.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.k_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.o_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.q_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.v_proj.weight_scale": "model-00041-of-00086.safetensors", + "model.layers.61.input_layernorm.weight": "model-00043-of-00086.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.61.mlp.down_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.61.mlp.gate_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.61.mlp.up_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00043-of-00086.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.k_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.o_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.q_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.v_proj.weight_scale": "model-00042-of-00086.safetensors", + "model.layers.62.input_layernorm.weight": "model-00043-of-00086.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.mlp.down_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.mlp.gate_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.mlp.up_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.k_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.o_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.q_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.v_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.63.input_layernorm.weight": "model-00044-of-00086.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.63.mlp.down_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.63.mlp.gate_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.63.mlp.up_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00044-of-00086.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.k_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.o_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.q_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.v_proj.weight_scale": "model-00043-of-00086.safetensors", + "model.layers.64.input_layernorm.weight": "model-00045-of-00086.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.64.mlp.down_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.64.mlp.gate_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.64.mlp.up_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00045-of-00086.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.k_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.o_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.q_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.v_proj.weight_scale": "model-00044-of-00086.safetensors", + "model.layers.65.input_layernorm.weight": "model-00045-of-00086.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.mlp.down_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.mlp.gate_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.mlp.up_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.k_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.o_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.q_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.v_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.66.input_layernorm.weight": "model-00046-of-00086.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.66.mlp.down_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.66.mlp.gate_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.66.mlp.up_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00046-of-00086.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.k_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.o_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.q_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.v_proj.weight_scale": "model-00045-of-00086.safetensors", + "model.layers.67.input_layernorm.weight": "model-00047-of-00086.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.67.mlp.down_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.67.mlp.gate_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.67.mlp.up_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00047-of-00086.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.k_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.o_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.q_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.v_proj.weight_scale": "model-00046-of-00086.safetensors", + "model.layers.68.input_layernorm.weight": "model-00047-of-00086.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.mlp.down_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.mlp.gate_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.mlp.up_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.k_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.o_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.q_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.v_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.69.input_layernorm.weight": "model-00048-of-00086.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.69.mlp.down_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.69.mlp.gate_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.69.mlp.up_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00048-of-00086.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.k_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.o_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.q_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.v_proj.weight_scale": "model-00047-of-00086.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00086.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.7.mlp.down_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.7.mlp.gate_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.7.mlp.up_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00086.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.k_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.o_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.q_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.v_proj.weight_scale": "model-00006-of-00086.safetensors", + "model.layers.70.input_layernorm.weight": "model-00049-of-00086.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.70.mlp.down_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.70.mlp.gate_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.70.mlp.up_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00049-of-00086.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.k_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.o_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.q_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.v_proj.weight_scale": "model-00048-of-00086.safetensors", + "model.layers.71.input_layernorm.weight": "model-00049-of-00086.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.mlp.down_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.mlp.gate_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.mlp.up_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.k_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.o_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.q_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.v_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.72.input_layernorm.weight": "model-00050-of-00086.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.72.mlp.down_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.72.mlp.gate_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.72.mlp.up_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00050-of-00086.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.k_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.o_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.q_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.v_proj.weight_scale": "model-00049-of-00086.safetensors", + "model.layers.73.input_layernorm.weight": "model-00051-of-00086.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.73.mlp.down_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.73.mlp.gate_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.73.mlp.up_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00051-of-00086.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.k_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.o_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.q_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.v_proj.weight_scale": "model-00050-of-00086.safetensors", + "model.layers.74.input_layernorm.weight": "model-00051-of-00086.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.mlp.down_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.mlp.gate_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.mlp.up_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.k_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.o_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.q_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.v_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.75.input_layernorm.weight": "model-00052-of-00086.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.75.mlp.down_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.75.mlp.gate_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.75.mlp.up_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00052-of-00086.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.k_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.o_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.q_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.v_proj.weight_scale": "model-00051-of-00086.safetensors", + "model.layers.76.input_layernorm.weight": "model-00053-of-00086.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.76.mlp.down_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.76.mlp.gate_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.76.mlp.up_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00053-of-00086.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.k_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.o_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.q_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.v_proj.weight_scale": "model-00052-of-00086.safetensors", + "model.layers.77.input_layernorm.weight": "model-00053-of-00086.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.mlp.down_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.mlp.gate_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.mlp.up_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.k_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.o_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.q_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.v_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.78.input_layernorm.weight": "model-00054-of-00086.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.78.mlp.down_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.78.mlp.gate_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.78.mlp.up_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00054-of-00086.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.k_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.o_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.q_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.v_proj.weight_scale": "model-00053-of-00086.safetensors", + "model.layers.79.input_layernorm.weight": "model-00055-of-00086.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.79.mlp.down_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.79.mlp.gate_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.79.mlp.up_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00055-of-00086.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.k_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.o_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.q_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.v_proj.weight_scale": "model-00054-of-00086.safetensors", + "model.layers.8.input_layernorm.weight": "model-00007-of-00086.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.mlp.down_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.mlp.gate_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.mlp.up_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.k_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.o_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.q_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.v_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.80.input_layernorm.weight": "model-00055-of-00086.safetensors", + "model.layers.80.mlp.down_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.mlp.down_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.80.mlp.gate_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.mlp.gate_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.80.mlp.up_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.mlp.up_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.80.post_attention_layernorm.weight": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.k_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.k_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.o_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.o_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.q_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.q_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.v_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.v_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.81.input_layernorm.weight": "model-00056-of-00086.safetensors", + "model.layers.81.mlp.down_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.81.mlp.down_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.81.mlp.gate_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.81.mlp.gate_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.81.mlp.up_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.81.mlp.up_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.81.post_attention_layernorm.weight": "model-00056-of-00086.safetensors", + "model.layers.81.self_attn.k_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.k_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.o_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.o_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.q_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.q_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.v_proj.weight": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.v_proj.weight_scale": "model-00055-of-00086.safetensors", + "model.layers.82.input_layernorm.weight": "model-00057-of-00086.safetensors", + "model.layers.82.mlp.down_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.82.mlp.down_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.82.mlp.gate_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.82.mlp.gate_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.82.mlp.up_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.82.mlp.up_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.82.post_attention_layernorm.weight": "model-00057-of-00086.safetensors", + "model.layers.82.self_attn.k_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.k_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.o_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.o_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.q_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.q_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.v_proj.weight": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.v_proj.weight_scale": "model-00056-of-00086.safetensors", + "model.layers.83.input_layernorm.weight": "model-00057-of-00086.safetensors", + "model.layers.83.mlp.down_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.mlp.down_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.83.mlp.gate_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.mlp.gate_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.83.mlp.up_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.mlp.up_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.83.post_attention_layernorm.weight": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.k_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.k_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.o_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.o_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.q_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.q_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.v_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.v_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.84.input_layernorm.weight": "model-00058-of-00086.safetensors", + "model.layers.84.mlp.down_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.84.mlp.down_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.84.mlp.gate_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.84.mlp.gate_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.84.mlp.up_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.84.mlp.up_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.84.post_attention_layernorm.weight": "model-00058-of-00086.safetensors", + "model.layers.84.self_attn.k_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.k_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.o_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.o_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.q_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.q_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.v_proj.weight": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.v_proj.weight_scale": "model-00057-of-00086.safetensors", + "model.layers.85.input_layernorm.weight": "model-00059-of-00086.safetensors", + "model.layers.85.mlp.down_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.85.mlp.down_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.85.mlp.gate_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.85.mlp.gate_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.85.mlp.up_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.85.mlp.up_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.85.post_attention_layernorm.weight": "model-00059-of-00086.safetensors", + "model.layers.85.self_attn.k_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.k_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.o_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.o_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.q_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.q_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.v_proj.weight": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.v_proj.weight_scale": "model-00058-of-00086.safetensors", + "model.layers.86.input_layernorm.weight": "model-00059-of-00086.safetensors", + "model.layers.86.mlp.down_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.mlp.down_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.86.mlp.gate_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.mlp.gate_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.86.mlp.up_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.mlp.up_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.86.post_attention_layernorm.weight": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.k_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.k_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.o_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.o_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.q_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.q_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.v_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.v_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.87.input_layernorm.weight": "model-00060-of-00086.safetensors", + "model.layers.87.mlp.down_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.87.mlp.down_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.87.mlp.gate_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.87.mlp.gate_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.87.mlp.up_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.87.mlp.up_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.87.post_attention_layernorm.weight": "model-00060-of-00086.safetensors", + "model.layers.87.self_attn.k_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.k_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.o_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.o_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.q_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.q_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.v_proj.weight": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.v_proj.weight_scale": "model-00059-of-00086.safetensors", + "model.layers.88.input_layernorm.weight": "model-00061-of-00086.safetensors", + "model.layers.88.mlp.down_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.88.mlp.down_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.88.mlp.gate_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.88.mlp.gate_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.88.mlp.up_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.88.mlp.up_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.88.post_attention_layernorm.weight": "model-00061-of-00086.safetensors", + "model.layers.88.self_attn.k_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.k_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.o_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.o_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.q_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.q_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.v_proj.weight": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.v_proj.weight_scale": "model-00060-of-00086.safetensors", + "model.layers.89.input_layernorm.weight": "model-00061-of-00086.safetensors", + "model.layers.89.mlp.down_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.mlp.down_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.89.mlp.gate_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.mlp.gate_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.89.mlp.up_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.mlp.up_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.89.post_attention_layernorm.weight": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.k_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.k_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.o_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.o_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.q_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.q_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.v_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.v_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.9.input_layernorm.weight": "model-00008-of-00086.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.9.mlp.down_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.9.mlp.gate_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00008-of-00086.safetensors", + "model.layers.9.mlp.up_proj.weight_scale": "model-00008-of-00086.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00086.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.k_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.o_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.q_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.v_proj.weight_scale": "model-00007-of-00086.safetensors", + "model.layers.90.input_layernorm.weight": "model-00062-of-00086.safetensors", + "model.layers.90.mlp.down_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.90.mlp.down_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.90.mlp.gate_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.90.mlp.gate_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.90.mlp.up_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.90.mlp.up_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.90.post_attention_layernorm.weight": "model-00062-of-00086.safetensors", + "model.layers.90.self_attn.k_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.k_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.o_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.o_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.q_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.q_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.v_proj.weight": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.v_proj.weight_scale": "model-00061-of-00086.safetensors", + "model.layers.91.input_layernorm.weight": "model-00063-of-00086.safetensors", + "model.layers.91.mlp.down_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.91.mlp.down_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.91.mlp.gate_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.91.mlp.gate_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.91.mlp.up_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.91.mlp.up_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.91.post_attention_layernorm.weight": "model-00063-of-00086.safetensors", + "model.layers.91.self_attn.k_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.k_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.o_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.o_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.q_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.q_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.v_proj.weight": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.v_proj.weight_scale": "model-00062-of-00086.safetensors", + "model.layers.92.input_layernorm.weight": "model-00063-of-00086.safetensors", + "model.layers.92.mlp.down_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.mlp.down_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.92.mlp.gate_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.mlp.gate_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.92.mlp.up_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.mlp.up_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.92.post_attention_layernorm.weight": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.k_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.k_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.o_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.o_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.q_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.q_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.v_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.v_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.93.input_layernorm.weight": "model-00064-of-00086.safetensors", + "model.layers.93.mlp.down_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.93.mlp.down_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.93.mlp.gate_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.93.mlp.gate_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.93.mlp.up_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.93.mlp.up_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.93.post_attention_layernorm.weight": "model-00064-of-00086.safetensors", + "model.layers.93.self_attn.k_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.k_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.o_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.o_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.q_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.q_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.v_proj.weight": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.v_proj.weight_scale": "model-00063-of-00086.safetensors", + "model.layers.94.input_layernorm.weight": "model-00065-of-00086.safetensors", + "model.layers.94.mlp.down_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.94.mlp.down_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.94.mlp.gate_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.94.mlp.gate_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.94.mlp.up_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.94.mlp.up_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.94.post_attention_layernorm.weight": "model-00065-of-00086.safetensors", + "model.layers.94.self_attn.k_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.k_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.o_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.o_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.q_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.q_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.v_proj.weight": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.v_proj.weight_scale": "model-00064-of-00086.safetensors", + "model.layers.95.input_layernorm.weight": "model-00065-of-00086.safetensors", + "model.layers.95.mlp.down_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.mlp.down_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.95.mlp.gate_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.mlp.gate_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.95.mlp.up_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.mlp.up_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.95.post_attention_layernorm.weight": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.k_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.k_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.o_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.o_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.q_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.q_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.v_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.v_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.96.input_layernorm.weight": "model-00066-of-00086.safetensors", + "model.layers.96.mlp.down_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.96.mlp.down_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.96.mlp.gate_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.96.mlp.gate_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.96.mlp.up_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.96.mlp.up_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.96.post_attention_layernorm.weight": "model-00066-of-00086.safetensors", + "model.layers.96.self_attn.k_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.k_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.o_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.o_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.q_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.q_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.v_proj.weight": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.v_proj.weight_scale": "model-00065-of-00086.safetensors", + "model.layers.97.input_layernorm.weight": "model-00067-of-00086.safetensors", + "model.layers.97.mlp.down_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.97.mlp.down_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.97.mlp.gate_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.97.mlp.gate_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.97.mlp.up_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.97.mlp.up_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.97.post_attention_layernorm.weight": "model-00067-of-00086.safetensors", + "model.layers.97.self_attn.k_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.k_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.o_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.o_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.q_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.q_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.v_proj.weight": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.v_proj.weight_scale": "model-00066-of-00086.safetensors", + "model.layers.98.input_layernorm.weight": "model-00067-of-00086.safetensors", + "model.layers.98.mlp.down_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.mlp.down_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.98.mlp.gate_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.mlp.gate_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.98.mlp.up_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.mlp.up_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.98.post_attention_layernorm.weight": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.k_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.k_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.o_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.o_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.q_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.q_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.v_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.v_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.99.input_layernorm.weight": "model-00068-of-00086.safetensors", + "model.layers.99.mlp.down_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.99.mlp.down_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.99.mlp.gate_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.99.mlp.gate_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.99.mlp.up_proj.weight": "model-00068-of-00086.safetensors", + "model.layers.99.mlp.up_proj.weight_scale": "model-00068-of-00086.safetensors", + "model.layers.99.post_attention_layernorm.weight": "model-00068-of-00086.safetensors", + "model.layers.99.self_attn.k_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.k_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.o_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.o_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.q_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.q_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.v_proj.weight": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.v_proj.weight_scale": "model-00067-of-00086.safetensors", + "model.norm.weight": "model-00085-of-00086.safetensors" + } +} diff --git a/recipe.yaml b/recipe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c7755fcffd47fc652a1078c28b5ad89671bc70f --- /dev/null +++ b/recipe.yaml @@ -0,0 +1,9 @@ +quant_stage: + quant_modifiers: + QuantizationModifier: + ignore: [lm_head] + config_groups: + group_0: + weights: {num_bits: 8, type: float, strategy: channel, dynamic: false, symmetric: true} + input_activations: {num_bits: 8, type: float, strategy: token, dynamic: true, symmetric: true} + targets: [Linear] diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..89af31a3c96c39011ffec99c32a770062be9dc7c --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..548e7b196b97b38c45068d7a2bcedfbb444546cd --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9400df98529060210393c40f08cb127f7c0df584338b3fbfdba8cf82a33c1ade +size 17210102 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef8218534a4cd20717275b6ae849b35d4821ba1d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2072 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128256": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "", + "tokenizer_class": "PreTrainedTokenizerFast" +}