add mixed quantized model
Browse files
openvino_config.json
CHANGED
|
@@ -1,72 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"optimum_version": "1.26.1",
|
| 5 |
-
"quantization_config": {
|
| 6 |
-
"dataset": "contextual",
|
| 7 |
-
"ignored_scope": null,
|
| 8 |
-
"num_samples": 200,
|
| 9 |
-
"processor": null,
|
| 10 |
-
"quantization_configs": {
|
| 11 |
-
"lm_model": {
|
| 12 |
-
"bits": 8,
|
| 13 |
-
"dataset": null,
|
| 14 |
-
"dtype": "int8",
|
| 15 |
-
"fast_bias_correction": true,
|
| 16 |
-
"ignored_scope": null,
|
| 17 |
-
"model_type": "transformer",
|
| 18 |
-
"num_samples": null,
|
| 19 |
-
"overflow_fix": "disable",
|
| 20 |
-
"processor": null,
|
| 21 |
-
"smooth_quant_alpha": null,
|
| 22 |
-
"sym": false,
|
| 23 |
-
"tokenizer": null,
|
| 24 |
-
"trust_remote_code": false
|
| 25 |
-
},
|
| 26 |
-
"text_embeddings_model": {
|
| 27 |
-
"all_layers": null,
|
| 28 |
-
"backup_precision": null,
|
| 29 |
-
"bits": 8,
|
| 30 |
-
"dataset": null,
|
| 31 |
-
"dtype": "int8",
|
| 32 |
-
"gptq": null,
|
| 33 |
-
"group_size": -1,
|
| 34 |
-
"ignored_scope": null,
|
| 35 |
-
"lora_correction": null,
|
| 36 |
-
"num_samples": null,
|
| 37 |
-
"processor": null,
|
| 38 |
-
"quant_method": "default",
|
| 39 |
-
"ratio": 1.0,
|
| 40 |
-
"scale_estimation": null,
|
| 41 |
-
"sensitivity_metric": null,
|
| 42 |
-
"sym": false,
|
| 43 |
-
"tokenizer": null,
|
| 44 |
-
"trust_remote_code": false
|
| 45 |
-
},
|
| 46 |
-
"vision_embeddings_model": {
|
| 47 |
-
"all_layers": null,
|
| 48 |
-
"backup_precision": null,
|
| 49 |
-
"bits": 8,
|
| 50 |
-
"dataset": null,
|
| 51 |
-
"dtype": "int8",
|
| 52 |
-
"gptq": null,
|
| 53 |
-
"group_size": -1,
|
| 54 |
-
"ignored_scope": null,
|
| 55 |
-
"lora_correction": null,
|
| 56 |
-
"num_samples": null,
|
| 57 |
-
"processor": null,
|
| 58 |
-
"quant_method": "default",
|
| 59 |
-
"ratio": 1.0,
|
| 60 |
-
"scale_estimation": null,
|
| 61 |
-
"sensitivity_metric": null,
|
| 62 |
-
"sym": false,
|
| 63 |
-
"tokenizer": null,
|
| 64 |
-
"trust_remote_code": false
|
| 65 |
-
}
|
| 66 |
-
},
|
| 67 |
-
"tokenizer": null,
|
| 68 |
-
"trust_remote_code": false
|
| 69 |
-
},
|
| 70 |
-
"save_onnx_model": false,
|
| 71 |
-
"transformers_version": "4.52.4"
|
| 72 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cad74dd7a61f3d7f164762b506879ac89894aba6dcdb3ff28bab1a6a3e0740da
|
| 3 |
+
size 2022
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino_language_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88460ca0040d4138a0dc22ccb757fe8628ac3fcf6a0721834c3648eaeafa7ce3
|
| 3 |
+
size 135308933
|
openvino_language_model.xml
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
openvino_text_embeddings_model.xml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
<?xml version="1.0"?>
|
| 2 |
-
<net name="
|
| 3 |
<layers>
|
| 4 |
<layer id="0" name="input" type="Parameter" version="opset1">
|
| 5 |
<data shape="?,?" element_type="i64" />
|
|
@@ -19,7 +19,7 @@
|
|
| 19 |
</port>
|
| 20 |
</output>
|
| 21 |
</layer>
|
| 22 |
-
<layer id="2" name="
|
| 23 |
<data destination_type="f16" />
|
| 24 |
<input>
|
| 25 |
<port id="0" precision="U8">
|
|
@@ -43,7 +43,7 @@
|
|
| 43 |
</port>
|
| 44 |
</output>
|
| 45 |
</layer>
|
| 46 |
-
<layer id="4" name="
|
| 47 |
<data destination_type="f16" />
|
| 48 |
<input>
|
| 49 |
<port id="0" precision="U8">
|
|
@@ -162,7 +162,7 @@
|
|
| 162 |
</port>
|
| 163 |
</output>
|
| 164 |
</layer>
|
| 165 |
-
<layer id="12" name="
|
| 166 |
<input>
|
| 167 |
<port id="0" precision="FP32">
|
| 168 |
<dim>-1</dim>
|
|
@@ -187,16 +187,16 @@
|
|
| 187 |
<edge from-layer="11" from-port="3" to-layer="12" to-port="0" />
|
| 188 |
</edges>
|
| 189 |
<rt_info>
|
| 190 |
-
<Runtime_version value="2025.
|
| 191 |
<conversion_parameters>
|
| 192 |
<framework value="pytorch" />
|
| 193 |
<is_python_object value="True" />
|
| 194 |
</conversion_parameters>
|
| 195 |
<nncf>
|
| 196 |
<friendly_names_were_updated value="True" />
|
| 197 |
-
<version value="2.
|
| 198 |
<weight_compression>
|
| 199 |
-
<advanced_parameters value="{'statistics_path': None, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, '
|
| 200 |
<all_layers value="False" />
|
| 201 |
<awq value="False" />
|
| 202 |
<backup_mode value="int8_asym" />
|
|
@@ -212,11 +212,11 @@
|
|
| 212 |
</weight_compression>
|
| 213 |
</nncf>
|
| 214 |
<optimum>
|
| 215 |
-
<nncf_version value="2.
|
| 216 |
-
<optimum_intel_version value="1.
|
| 217 |
-
<optimum_version value="
|
| 218 |
-
<pytorch_version value="2.
|
| 219 |
-
<transformers_version value="4.
|
| 220 |
</optimum>
|
| 221 |
</rt_info>
|
| 222 |
</net>
|
|
|
|
| 1 |
<?xml version="1.0"?>
|
| 2 |
+
<net name="Model3" version="11">
|
| 3 |
<layers>
|
| 4 |
<layer id="0" name="input" type="Parameter" version="opset1">
|
| 5 |
<data shape="?,?" element_type="i64" />
|
|
|
|
| 19 |
</port>
|
| 20 |
</output>
|
| 21 |
</layer>
|
| 22 |
+
<layer id="2" name="Convert_891888" type="Convert" version="opset1">
|
| 23 |
<data destination_type="f16" />
|
| 24 |
<input>
|
| 25 |
<port id="0" precision="U8">
|
|
|
|
| 43 |
</port>
|
| 44 |
</output>
|
| 45 |
</layer>
|
| 46 |
+
<layer id="4" name="Convert_891891" type="Convert" version="opset1">
|
| 47 |
<data destination_type="f16" />
|
| 48 |
<input>
|
| 49 |
<port id="0" precision="U8">
|
|
|
|
| 162 |
</port>
|
| 163 |
</output>
|
| 164 |
</layer>
|
| 165 |
+
<layer id="12" name="Result_10181" type="Result" version="opset1" output_names="inputs_embeds">
|
| 166 |
<input>
|
| 167 |
<port id="0" precision="FP32">
|
| 168 |
<dim>-1</dim>
|
|
|
|
| 187 |
<edge from-layer="11" from-port="3" to-layer="12" to-port="0" />
|
| 188 |
</edges>
|
| 189 |
<rt_info>
|
| 190 |
+
<Runtime_version value="2025.3.0-19807-44526285f24-releases/2025/3" />
|
| 191 |
<conversion_parameters>
|
| 192 |
<framework value="pytorch" />
|
| 193 |
<is_python_object value="True" />
|
| 194 |
</conversion_parameters>
|
| 195 |
<nncf>
|
| 196 |
<friendly_names_were_updated value="True" />
|
| 197 |
+
<version value="2.18.0" />
|
| 198 |
<weight_compression>
|
| 199 |
+
<advanced_parameters value="{'statistics_path': None, 'lora_adapter_rank': 256, 'group_size_fallback_mode': 'ignore', 'min_adjusted_group_size': 16, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'backend_params': {}, 'codebook': None}" />
|
| 200 |
<all_layers value="False" />
|
| 201 |
<awq value="False" />
|
| 202 |
<backup_mode value="int8_asym" />
|
|
|
|
| 212 |
</weight_compression>
|
| 213 |
</nncf>
|
| 214 |
<optimum>
|
| 215 |
+
<nncf_version value="2.18.0" />
|
| 216 |
+
<optimum_intel_version value="1.26.0.dev0+f9cff03" />
|
| 217 |
+
<optimum_version value="2.0.0.dev0" />
|
| 218 |
+
<pytorch_version value="2.8.0" />
|
| 219 |
+
<transformers_version value="4.55.4" />
|
| 220 |
</optimum>
|
| 221 |
</rt_info>
|
| 222 |
</net>
|
openvino_vision_embeddings_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57e5aca259ca410ab26f6563bb00793c2d58b80eb58a3227c1faa7dd79ccddfa
|
| 3 |
+
size 94526592
|
openvino_vision_embeddings_model.xml
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|