echarlaix HF Staff commited on
Commit
459bb29
·
1 Parent(s): cdd64ce

add mixed quantized model

Browse files
openvino_config.json CHANGED
@@ -1,72 +1,3 @@
1
- {
2
- "dtype": "int8_int8_int8",
3
- "input_info": null,
4
- "optimum_version": "1.26.1",
5
- "quantization_config": {
6
- "dataset": "contextual",
7
- "ignored_scope": null,
8
- "num_samples": 200,
9
- "processor": null,
10
- "quantization_configs": {
11
- "lm_model": {
12
- "bits": 8,
13
- "dataset": null,
14
- "dtype": "int8",
15
- "fast_bias_correction": true,
16
- "ignored_scope": null,
17
- "model_type": "transformer",
18
- "num_samples": null,
19
- "overflow_fix": "disable",
20
- "processor": null,
21
- "smooth_quant_alpha": null,
22
- "sym": false,
23
- "tokenizer": null,
24
- "trust_remote_code": false
25
- },
26
- "text_embeddings_model": {
27
- "all_layers": null,
28
- "backup_precision": null,
29
- "bits": 8,
30
- "dataset": null,
31
- "dtype": "int8",
32
- "gptq": null,
33
- "group_size": -1,
34
- "ignored_scope": null,
35
- "lora_correction": null,
36
- "num_samples": null,
37
- "processor": null,
38
- "quant_method": "default",
39
- "ratio": 1.0,
40
- "scale_estimation": null,
41
- "sensitivity_metric": null,
42
- "sym": false,
43
- "tokenizer": null,
44
- "trust_remote_code": false
45
- },
46
- "vision_embeddings_model": {
47
- "all_layers": null,
48
- "backup_precision": null,
49
- "bits": 8,
50
- "dataset": null,
51
- "dtype": "int8",
52
- "gptq": null,
53
- "group_size": -1,
54
- "ignored_scope": null,
55
- "lora_correction": null,
56
- "num_samples": null,
57
- "processor": null,
58
- "quant_method": "default",
59
- "ratio": 1.0,
60
- "scale_estimation": null,
61
- "sensitivity_metric": null,
62
- "sym": false,
63
- "tokenizer": null,
64
- "trust_remote_code": false
65
- }
66
- },
67
- "tokenizer": null,
68
- "trust_remote_code": false
69
- },
70
- "save_onnx_model": false,
71
- "transformers_version": "4.52.4"
72
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad74dd7a61f3d7f164762b506879ac89894aba6dcdb3ff28bab1a6a3e0740da
3
+ size 2022
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openvino_language_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0c144f9eb51ff2a1546d12b0a4b96b4deb544e05ebede49a7f6c45e43f69c60
3
- size 135909156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88460ca0040d4138a0dc22ccb757fe8628ac3fcf6a0721834c3648eaeafa7ce3
3
+ size 135308933
openvino_language_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
openvino_text_embeddings_model.xml CHANGED
@@ -1,5 +1,5 @@
1
  <?xml version="1.0"?>
2
- <net name="Model54" version="11">
3
  <layers>
4
  <layer id="0" name="input" type="Parameter" version="opset1">
5
  <data shape="?,?" element_type="i64" />
@@ -19,7 +19,7 @@
19
  </port>
20
  </output>
21
  </layer>
22
- <layer id="2" name="Convert_185024" type="Convert" version="opset1">
23
  <data destination_type="f16" />
24
  <input>
25
  <port id="0" precision="U8">
@@ -43,7 +43,7 @@
43
  </port>
44
  </output>
45
  </layer>
46
- <layer id="4" name="Convert_185027" type="Convert" version="opset1">
47
  <data destination_type="f16" />
48
  <input>
49
  <port id="0" precision="U8">
@@ -162,7 +162,7 @@
162
  </port>
163
  </output>
164
  </layer>
165
- <layer id="12" name="Result_189847" type="Result" version="opset1" output_names="inputs_embeds">
166
  <input>
167
  <port id="0" precision="FP32">
168
  <dim>-1</dim>
@@ -187,16 +187,16 @@
187
  <edge from-layer="11" from-port="3" to-layer="12" to-port="0" />
188
  </edges>
189
  <rt_info>
190
- <Runtime_version value="2025.2.0-19140-c01cd93e24d-releases/2025/2" />
191
  <conversion_parameters>
192
  <framework value="pytorch" />
193
  <is_python_object value="True" />
194
  </conversion_parameters>
195
  <nncf>
196
  <friendly_names_were_updated value="True" />
197
- <version value="2.17.0" />
198
  <weight_compression>
199
- <advanced_parameters value="{'statistics_path': None, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'lora_adapter_rank': 256, 'backend_params': {}}" />
200
  <all_layers value="False" />
201
  <awq value="False" />
202
  <backup_mode value="int8_asym" />
@@ -212,11 +212,11 @@
212
  </weight_compression>
213
  </nncf>
214
  <optimum>
215
- <nncf_version value="2.17.0" />
216
- <optimum_intel_version value="1.25.0.dev0+06c0a597" />
217
- <optimum_version value="1.26.1" />
218
- <pytorch_version value="2.7.1+cpu" />
219
- <transformers_version value="4.52.4" />
220
  </optimum>
221
  </rt_info>
222
  </net>
 
1
  <?xml version="1.0"?>
2
+ <net name="Model3" version="11">
3
  <layers>
4
  <layer id="0" name="input" type="Parameter" version="opset1">
5
  <data shape="?,?" element_type="i64" />
 
19
  </port>
20
  </output>
21
  </layer>
22
+ <layer id="2" name="Convert_891888" type="Convert" version="opset1">
23
  <data destination_type="f16" />
24
  <input>
25
  <port id="0" precision="U8">
 
43
  </port>
44
  </output>
45
  </layer>
46
+ <layer id="4" name="Convert_891891" type="Convert" version="opset1">
47
  <data destination_type="f16" />
48
  <input>
49
  <port id="0" precision="U8">
 
162
  </port>
163
  </output>
164
  </layer>
165
+ <layer id="12" name="Result_10181" type="Result" version="opset1" output_names="inputs_embeds">
166
  <input>
167
  <port id="0" precision="FP32">
168
  <dim>-1</dim>
 
187
  <edge from-layer="11" from-port="3" to-layer="12" to-port="0" />
188
  </edges>
189
  <rt_info>
190
+ <Runtime_version value="2025.3.0-19807-44526285f24-releases/2025/3" />
191
  <conversion_parameters>
192
  <framework value="pytorch" />
193
  <is_python_object value="True" />
194
  </conversion_parameters>
195
  <nncf>
196
  <friendly_names_were_updated value="True" />
197
+ <version value="2.18.0" />
198
  <weight_compression>
199
+ <advanced_parameters value="{'statistics_path': None, 'lora_adapter_rank': 256, 'group_size_fallback_mode': 'ignore', 'min_adjusted_group_size': 16, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'backend_params': {}, 'codebook': None}" />
200
  <all_layers value="False" />
201
  <awq value="False" />
202
  <backup_mode value="int8_asym" />
 
212
  </weight_compression>
213
  </nncf>
214
  <optimum>
215
+ <nncf_version value="2.18.0" />
216
+ <optimum_intel_version value="1.26.0.dev0+f9cff03" />
217
+ <optimum_version value="2.0.0.dev0" />
218
+ <pytorch_version value="2.8.0" />
219
+ <transformers_version value="4.55.4" />
220
  </optimum>
221
  </rt_info>
222
  </net>
openvino_vision_embeddings_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:109f9b085ef81295816a6687636a21147cb2aafafb345846da7ad2c29cc73b7c
3
- size 94133488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e5aca259ca410ab26f6563bb00793c2d58b80eb58a3227c1faa7dd79ccddfa
3
+ size 94526592
openvino_vision_embeddings_model.xml CHANGED
The diff for this file is too large to render. See raw diff