nm-testing
/

tinyllama-one-shot-w4a16-group-packed

Text Generation

text-generation-inference

Inference Endpoints

compressed-tensors

Model card Files Files and versions Community

kylesayrs commited on Oct 10, 2024

Commit

5e72461

·

verified ·

1 Parent(s): cece844

Update config.json

Files changed (1) hide show

config.json +26 -29

config.json CHANGED Viewed

@@ -26,41 +26,38 @@
   "use_cache": true,
   "vocab_size": 32000,
   "quantization_config": {
-    "quantization_config": {
-      "config_groups": {
-        "group_0": {
-          "input_activations": null,
-          "output_activations": null,
-          "targets": [
-            "Linear"
-          ],
-          "weights": {
-            "block_structure": null,
-            "dynamic": false,
-            "group_size": 128,
-            "num_bits": 4,
-            "observer": "minmax",
-            "observer_kwargs": {},
-            "strategy": "group",
-            "symmetric": true,
-            "type": "int"
-          }
         }
-      },
-      "format": "pack-quantized",
-      "global_compression_ratio": 2.2202386697247802,
-      "ignore": [
-        "lm_head"
-      ],
-      "quant_method": "sparseml",
-      "quantization_status": "frozen"
     },
     "sparsity_config": {
       "format": "dense",
       "global_sparsity": 13.035292091297688,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
-    },
-    "quant_method": "compressed-tensors"
   }
 }

   "use_cache": true,
   "vocab_size": 32000,
   "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "input_activations": null,
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": 128,
+          "num_bits": 4,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "group",
+          "symmetric": true,
+          "type": "int"
         }
+      }
     },
+    "quant_method": "sparseml"
+    "format": "pack-quantized",
+    "quantization_status": "frozen"
+    "global_compression_ratio": 2.2202386697247802,
+    "ignore": [
+      "lm_head"
+    ],
     "sparsity_config": {
       "format": "dense",
       "global_sparsity": 13.035292091297688,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
+    }
   }
 }