Add files using upload-large-folder tool

Files changed (7) hide show

LICENSE ADDED Viewed

+MIT License
+Copyright (c) 2023 DeepSeek
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

config.json CHANGED Viewed

@@ -32,19 +32,11 @@
   "num_hidden_layers": 61,
   "num_key_value_heads": 128,
   "num_nextn_predict_layers": 1,
   "pretraining_tp": 1,
   "q_lora_rank": 1536,
   "qk_nope_head_dim": 128,
   "qk_rope_head_dim": 64,
-  "quantization_config": {
-    "activation_scheme": "dynamic",
-    "fmt": "e4m3",
-    "quant_method": "fp8",
-    "weight_block_size": [
-      128,
-      128
-    ]
-  },
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
     "beta_fast": 32,
@@ -63,7 +55,8 @@
   "topk_group": 4,
   "topk_method": "noaux_tc",
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.46.3",
   "use_cache": true,
   "v_head_dim": 128,
   "vocab_size": 129280

   "num_hidden_layers": 61,
   "num_key_value_heads": 128,
   "num_nextn_predict_layers": 1,
+  "pad_token_id": 128815,
   "pretraining_tp": 1,
   "q_lora_rank": 1536,
   "qk_nope_head_dim": 128,
   "qk_rope_head_dim": 64,
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
     "beta_fast": 32,
   "topk_group": 4,
   "topk_method": "noaux_tc",
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.48.1",
+  "unsloth_fixed": true,
   "use_cache": true,
   "v_head_dim": 128,
   "vocab_size": 129280

figures/benchmark.jpg ADDED Viewed

generation_config.json ADDED Viewed

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 1,
+  "do_sample": true,
+  "temperature": 0.6,
+  "top_p": 0.95,
+  "transformers_version": "4.39.3"
+}

special_tokens_map.json ADDED Viewed

+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<｜PAD▁TOKEN｜>"
+}

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff