diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f699144219a862848ba79aa92803b6b233aa5454 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +## DFloat11 Compressed Model: `deepseek-ai/DeepSeek-R1-Distill-Qwen-14B` + +This is a **losslessly compressed** version of [`deepseek-ai/DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) using our custom **DFloat11** format. The outputs of this compressed model are **bit-for-bit identical** to the original BFloat16 model, while reducing GPU memory consumption by approximately **30%**. + +### 🔍 How It Works + +DFloat11 compresses model weights using **Huffman coding** of BFloat16 exponent bits, combined with **hardware-aware algorithmic designs** that enable efficient on-the-fly decompression directly on the GPU. During inference, the weights remain compressed in GPU memory and are **decompressed just before matrix multiplications**, then **immediately discarded after use** to minimize memory footprint. + +Key benefits: + +* **No CPU decompression or host-device data transfer** -- all operations are handled entirely on the GPU. +* **Decompression overhead is constant** per forward pass and **independent of batch size**, making DFloat11 increasingly efficient at larger batch sizes. +* DFloat11 is **much faster than CPU-offloading approaches**, enabling practical deployment in memory-constrained environments. +* At **batch size = 1**, inference is approximately **2× slower** than the original BF16 model, but the performance gap **narrows significantly** with larger batches. +* The compression is **fully lossless**, guaranteeing that the model’s outputs are **bit-for-bit identical** to those of the original model. + +### 🔧 How to Use + +1. Install the DFloat11 pip package *(installs the CUDA kernel automatically; requires a CUDA-compatible GPU and PyTorch installed)*: + + ```bash + pip install dfloat11[cuda12] + # or if you have CUDA version 11: + # pip install dfloat11[cuda11] + ``` + +2. To use the DFloat11 model, run the following example code in Python: + + ```python + import torch + from dfloat11 import DFloat11Model + from transformers import AutoTokenizer + + model_id = "DFloat11/DeepSeek-R1-Distill-Qwen-14B-DF11" + + model = DFloat11Model.from_pretrained(model_id, device_map="auto") + + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token + + prompt = "Question: What is a binary tree and its applications? Answer:" + inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device) + + with torch.no_grad(): + output = model.generate( + **inputs, + max_new_tokens=256, + do_sample=True, + ) + + print(tokenizer.batch_decode(output, skip_special_tokens=True)) + ``` + +### 📄 Learn More + +* **Paper**: [70% Size, 100% Accuracy: Lossless LLM Compression for Efficient GPU Inference via Dynamic-Length Float](https://arxiv.org/abs/2504.11651) +* **GitHub**: [https://github.com/LeanModels/DFloat11](https://github.com/LeanModels/DFloat11) +* **HuggingFace**: [https://huggingface.co/DFloat11](https://huggingface.co/DFloat11) \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db6f62792374f2017b8af0be96309c22025c149c --- /dev/null +++ b/config.json @@ -0,0 +1,48 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dfloat11_config": { + "bytes_per_thread": 8, + "pattern_dict": { + "lm_head": [], + "model.embed_tokens": [], + "model.layers.\\d+": [ + "self_attn.q_proj", + "self_attn.k_proj", + "self_attn.v_proj", + "self_attn.o_proj", + "mlp.gate_proj", + "mlp.up_proj", + "mlp.down_proj" + ] + }, + "threads_per_block": [ + 512 + ], + "version": "0.2.0" + }, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 131072, + "max_window_layers": 48, + "model_type": "qwen2", + "num_attention_heads": 40, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..92878bd36a6f22c0ad39d3eecd6839be7eeab4ab --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.51.3" +} diff --git a/lm_head.safetensors b/lm_head.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..332cdcfaa76a428dd3fc7bef618b8d675b0a968b --- /dev/null +++ b/lm_head.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4eecf4772bdac3bb9c84d51fa2228cbd2c3ce165d1328dd3155615381c1c18e +size 1056784505 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ce4e7abe78379c7a63144e1270307278749fe2f --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cffc93ba4adf070473e7ce99d9cf0e86c18f6ebeb2b8ff31aa77c1add4ea57 +size 10360 diff --git a/model_embed_tokens.safetensors b/model_embed_tokens.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3df89ec4b66b3e8daea8c176f0e64a03e3b1e11d --- /dev/null +++ b/model_embed_tokens.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c40cc4c5d2d761f18fff8a7123cae61ba0409c966a0ddb087bf6aee86ac8bd1 +size 1071226508 diff --git a/model_layers_0.safetensors b/model_layers_0.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8ac565dfe990547a29de98d23cc0509cdf4126c --- /dev/null +++ b/model_layers_0.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64592e1de150efa621f05780c7f600d470871b87535ccdb91f755dce1c0cfd52 +size 373047105 diff --git a/model_layers_1.safetensors b/model_layers_1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0971cdc31933836063f5f02a7b2da44e2036c09 --- /dev/null +++ b/model_layers_1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466ac8e8297745cec7a05a96e47f462aca00e9dd5d520ef5c1e164c5a973d475 +size 407124933 diff --git a/model_layers_10.safetensors b/model_layers_10.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fd29432b817d673d3b8a438afa56071898279d1 --- /dev/null +++ b/model_layers_10.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71eaa3f0fa3de50249353c28d03302b71d5f5eca3c889fa36e21d4927744d8a +size 372696451 diff --git a/model_layers_11.safetensors b/model_layers_11.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07f3c614d659225d77e9ce38c667dff268eee88e --- /dev/null +++ b/model_layers_11.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e58cfeeb590d2bb82379ea6dfb8f53c5c43dc7024b5250e75d9b0d62d64b9d +size 372735217 diff --git a/model_layers_12.safetensors b/model_layers_12.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44dd3be62bcfcb0da656b7a6d446713bef110010 --- /dev/null +++ b/model_layers_12.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127023e6a433ee5b95102faa6925d247708df11021852b107db2b10a389650a8 +size 372512186 diff --git a/model_layers_13.safetensors b/model_layers_13.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a124be78001eb9ac0cd7958f21a435b62f5d7a6d --- /dev/null +++ b/model_layers_13.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ab9aa91cbefae8dec0fb7720a1b3d1cd5bea9ad879d52b8e2979644c5567cf +size 372644304 diff --git a/model_layers_14.safetensors b/model_layers_14.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdebb401ed518644f0b6d37ea96982b7bdf18455 --- /dev/null +++ b/model_layers_14.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1d1e5f24f38635c8621062de0e5d55146a359a77f11d9653848d79a348f9e3 +size 372689109 diff --git a/model_layers_15.safetensors b/model_layers_15.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..646b32ee920cd2bb9555f67263106d78049e562c --- /dev/null +++ b/model_layers_15.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a563e438369a75b25fb2ddda9a794fb2607104022573d45671d18097563fb173 +size 372827573 diff --git a/model_layers_16.safetensors b/model_layers_16.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0933ba57f84c60837313adbd005e9d2e7b917fb --- /dev/null +++ b/model_layers_16.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c54b5cac32bd8adc33a770fb36ed078c075b403737bfc4cd889b99269876d1 +size 372841593 diff --git a/model_layers_17.safetensors b/model_layers_17.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05eaeecb8ac5a843c81d217a8b848b6a9f280c8c --- /dev/null +++ b/model_layers_17.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e359b3e1354b2b33bd067facc9f7b73560c32d7b5606dc49c0d007effa0512 +size 372850701 diff --git a/model_layers_18.safetensors b/model_layers_18.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9475824b81b9ce1711d88dae77bb3bda1dd002ec --- /dev/null +++ b/model_layers_18.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3f49e2e46e29f2445b4052d18b8977900591f39d57e3e139510a552bdfd4a5 +size 372969609 diff --git a/model_layers_19.safetensors b/model_layers_19.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..038b15a09d2abc81b576f57f1cf9e086175383dd --- /dev/null +++ b/model_layers_19.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e31168db3bfa6e72083fa50eec09d3a42c409d80562bf71719eb267a99f14a +size 372981033 diff --git a/model_layers_2.safetensors b/model_layers_2.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e2358f5e40f92b47e49f0a3502093001a13d64a --- /dev/null +++ b/model_layers_2.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9b8c08dc171b33db62865878cdd91e13253b1a43bc36775b9dd1e8ef0db9a5 +size 404482467 diff --git a/model_layers_20.safetensors b/model_layers_20.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffff80f0b503df553f451109077a78dafc20be7a --- /dev/null +++ b/model_layers_20.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a570c8005cfebf78591c31b15a921fe5ca89682c1980b1a678beec6abea6d4 +size 373027443 diff --git a/model_layers_21.safetensors b/model_layers_21.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64b5ca9a825568dab297039884eb65dd9c3d07de --- /dev/null +++ b/model_layers_21.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:485274bd83d85e4d9525ffc38a91fe51f531ac230481c04fec8d0414d6a1fab9 +size 372985375 diff --git a/model_layers_22.safetensors b/model_layers_22.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d067d529e56a211f3b7feb6ce1c70b78647895a2 --- /dev/null +++ b/model_layers_22.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe00a055b7145fa00824418f1bfcb8954a697de04e1967efc683a3a106f27218 +size 373042609 diff --git a/model_layers_23.safetensors b/model_layers_23.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eea6e0a25dddb82e4f508e8614b4651f28368b5 --- /dev/null +++ b/model_layers_23.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9184467af1930ceb44a3509e8e65807be0b9e9c6887ebecf0d906dea7e6e44de +size 373149813 diff --git a/model_layers_24.safetensors b/model_layers_24.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8c50d749f28b4ad18c8616839c8bb1035a3f22e --- /dev/null +++ b/model_layers_24.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89740d9a88aab40899c7110af66825acef3fc530c27b34bf0f6418a8ceb114ce +size 373064580 diff --git a/model_layers_25.safetensors b/model_layers_25.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b6d39920f77ee97f3b62c8e5d8c3d0c32a000d2 --- /dev/null +++ b/model_layers_25.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc413b679743fd878d69fc17f082e37c8a50b11b7e9c9b24ba1b5b934f132f2c +size 373101108 diff --git a/model_layers_26.safetensors b/model_layers_26.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb50090335852e75ca111fcf5688bb0d53d6c79b --- /dev/null +++ b/model_layers_26.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2e46a671e40725463997bc5252911fb04f7d874d4e66b128b2d24dc3f3037e +size 373020025 diff --git a/model_layers_27.safetensors b/model_layers_27.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e12108c124f4116e3ae0c6bb3674543320eb4ce8 --- /dev/null +++ b/model_layers_27.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e0e436ec35a653564e30985233b3e0d6be927c20c11c821dcf96d405f6f2c8 +size 373088055 diff --git a/model_layers_28.safetensors b/model_layers_28.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ac65b7c7953da8ec88832b4aa415dd9d4dc58cb --- /dev/null +++ b/model_layers_28.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbcca033611ca315614aebc33dc905e6fd662b62a8f0992565835cb4a6acfc0 +size 373286696 diff --git a/model_layers_29.safetensors b/model_layers_29.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6880d84ad184fcd948e860b28f360e40f1bd289b --- /dev/null +++ b/model_layers_29.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64278000c1b16ab10ec6d6b873453c54d81b9056def35a4806abbf31390b622d +size 373241854 diff --git a/model_layers_3.safetensors b/model_layers_3.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3284e5350541a0a14d70f1925585ffa86dcbd736 --- /dev/null +++ b/model_layers_3.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:620cc20a943e8dee242ad79567f511e535f8a2dac4029595cdc1c0d33b6ab530 +size 399937743 diff --git a/model_layers_30.safetensors b/model_layers_30.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fe2510584cb586ffdeff5af42a981d4d32d917d --- /dev/null +++ b/model_layers_30.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b6bfbc38362951fc091dba8add7bdc8da4200b2a57ed3bcc1a1a8ff99cc8e7 +size 373062312 diff --git a/model_layers_31.safetensors b/model_layers_31.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0324657649cccbfa66c6bc6d79632ea444bf8c6 --- /dev/null +++ b/model_layers_31.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa426af68b53e64575db31045967c9bdb0807706ec35f2e062033bde8b9570f +size 372957134 diff --git a/model_layers_32.safetensors b/model_layers_32.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..736c1d6dd094f26923e1597be296a8c0feccf1cf --- /dev/null +++ b/model_layers_32.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2963fcdb6dbb0a0d26caa89c7702bba3d4229eebc13164ea7edf2ad42ae25557 +size 372970261 diff --git a/model_layers_33.safetensors b/model_layers_33.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b3d8f060ba2033b38aef33d4447404b7f7ae778 --- /dev/null +++ b/model_layers_33.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ededb70f30c148589eee0e2932066da0d5ffe5097d640d8483eccfbadc1e3c +size 373000655 diff --git a/model_layers_34.safetensors b/model_layers_34.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd78fd11b3358fe27b629a27c0b526f2d9f64a0d --- /dev/null +++ b/model_layers_34.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec98b16ba564b3da43c95106ebaa13a77eb597a9ea23c090b04325eda62be552 +size 372965181 diff --git a/model_layers_35.safetensors b/model_layers_35.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78e49a61e9d4389195c0eb7aad8498bb715994f8 --- /dev/null +++ b/model_layers_35.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0188c4d576a6c9fb8e1e3de08cfc5737702b1629b34ebb451d3b439478c409a6 +size 372928983 diff --git a/model_layers_36.safetensors b/model_layers_36.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6597752e94ebf0202bffb2cddb303070ab3379c1 --- /dev/null +++ b/model_layers_36.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03013e93fa7a89b0dbe2c9169385f55be68dda93d0378aeb899ed38e808eda67 +size 373134987 diff --git a/model_layers_37.safetensors b/model_layers_37.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc3c5eeb006f0c0f8e0e7cad84db51c24df8c491 --- /dev/null +++ b/model_layers_37.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7f5ad89914849f2d941b76e69240cb38bfecadb433ddc7bd64cfcb527d86ff +size 372845454 diff --git a/model_layers_38.safetensors b/model_layers_38.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..378b5ee26a30f771337ed9e9e82bc83440bf3a2a --- /dev/null +++ b/model_layers_38.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3532d5f432c5337310ddf2600e883f909fb834f95f8546a88649b1e02e487b +size 372803552 diff --git a/model_layers_39.safetensors b/model_layers_39.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cbc79bee17cec061302d1c3ff3b3eeef5b4524f --- /dev/null +++ b/model_layers_39.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9985c3fdd12379c58fd8de831d326435630a4028f699cae2278e3a6fac087c95 +size 372868360 diff --git a/model_layers_4.safetensors b/model_layers_4.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e2b289f2eca44cf46feea3d9d2af91dc420eaeb --- /dev/null +++ b/model_layers_4.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6badf9c17e52e2e528ff39ab353d39d3f995900a368d8872fd961dfd5c459766 +size 399649128 diff --git a/model_layers_40.safetensors b/model_layers_40.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3671fc42191b96d7ab02ea61d9a3df801f638bca --- /dev/null +++ b/model_layers_40.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf93dfd5a74ec1b40d12cfa506fb60ab59a6095319729a3c63f2ff7d9785bc1b +size 372918947 diff --git a/model_layers_41.safetensors b/model_layers_41.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad6404f1e66a71b9e7c18a1bd48c097fbf1454b8 --- /dev/null +++ b/model_layers_41.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d845994e38c1a4ff8a431427294c7ec8d9624ce7bf998364dac36faa273b8d +size 372935894 diff --git a/model_layers_42.safetensors b/model_layers_42.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88aa15a6073c39113918dc768deb56fb69046649 --- /dev/null +++ b/model_layers_42.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ad6f8434059cbefa0883b51ef4498bfbcbc6f80723d90b1f0328dc23d6a8d3 +size 372957980 diff --git a/model_layers_43.safetensors b/model_layers_43.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6af4b36fe6c31e55eb2c49e33f8068ac0105fb6a --- /dev/null +++ b/model_layers_43.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad2b60612dc3c0156fd633effc2afe9cbb19c685b34a9940e2ccfd2d18fa1b1 +size 373331033 diff --git a/model_layers_44.safetensors b/model_layers_44.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f968dc700c909d7fafb51866b5c2635eceef752 --- /dev/null +++ b/model_layers_44.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47815aa00bbdd8712ee2f23131d8f3ca05750660c5570f7dfa3628d9036846da +size 373460684 diff --git a/model_layers_45.safetensors b/model_layers_45.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..247dd454bf8b6344d0cabe696c37e2b859f0bf1b --- /dev/null +++ b/model_layers_45.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b24af7998a022273d5eff0d65108fe424b9e103f061faf73bda8aa7dcb7d255 +size 373484283 diff --git a/model_layers_46.safetensors b/model_layers_46.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1028e4ae0f6e892acf02c39eca098e59a669871 --- /dev/null +++ b/model_layers_46.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781ed23e51fe41dbef445aff47bebd0d41eeab2eae1149f24b917d6cc50ae7f1 +size 373640710 diff --git a/model_layers_47.safetensors b/model_layers_47.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5048571fbe9eea467168a5ca0c5e683b612e3657 --- /dev/null +++ b/model_layers_47.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc3c41c904f744d70044c8004eed03b10ad3e3e2e14177a0894af169b408b90 +size 373214307 diff --git a/model_layers_5.safetensors b/model_layers_5.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..409ce6960bdcfa8521ec165e1e07d287beb46232 --- /dev/null +++ b/model_layers_5.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4677f587620c1eb953c1196fd898c0a5cab0d29fe177d6108760d4026d5daf36 +size 391166646 diff --git a/model_layers_6.safetensors b/model_layers_6.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80f2298ee1ffb7efcc82597168fd082da0c94ccd --- /dev/null +++ b/model_layers_6.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcfafa9567e687738ff9bfb58b3787f308c8599ca1562e7a2a7c5a7989146d99 +size 383215379 diff --git a/model_layers_7.safetensors b/model_layers_7.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f43a1e2f95ef98ac745feb4f66b89f534f5ff9e --- /dev/null +++ b/model_layers_7.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b20ee4dae9eceb152eeffaa88452f287cddc9fd967fe2ad98e5f95bc8969b56 +size 373295455 diff --git a/model_layers_8.safetensors b/model_layers_8.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bca192b13524c27cb32269f3b81528aa8cee6dd7 --- /dev/null +++ b/model_layers_8.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c2341abd0c170411191e2a7faf92483b039bc5f756a096f6bd07c069d6d596 +size 373083229 diff --git a/model_layers_9.safetensors b/model_layers_9.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaa302020dfa79d77fee7bbb3f49324787780e58 --- /dev/null +++ b/model_layers_9.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384720e15c99e5e7dea8993e2bce2e007a68e6fae90d32aa10d488c680818ba7 +size 372651593 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef6e98c3e0446cad00c5e6fb6bf2f5bbaf2eb0bd --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,195 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +}