Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
class ConversionTool:
|
4 |
+
def __init__(self):
|
5 |
+
# Initialize widgets
|
6 |
+
self.intro = gr.Markdown("""
|
7 |
+
|
8 |
+
```
|
9 |
+
### # Optimum CLI Export Tool.. tool
|
10 |
+
|
11 |
+
This tool helps organize the process of creating conversion commands when using Intel Optimum for Transformers to convert models outside of a script..
|
12 |
+
|
13 |
+
|
14 |
+
My goal was to make it easier to construct commands for the [Optimum CLI conversion tool](https://huggingface.co/docs/optimum/main/en/intel/openvino/export) which enables converting models to the OpenVINO Intermediate Representation
|
15 |
+
outside of the from.pretrained method used in Transformers with the OpenVINO related classes like OVModelForCausalLM, OVModelForSeq2SeqLM, OVModelForQuestionAnswering, etc.
|
16 |
+
|
17 |
+
## Usage
|
18 |
+
Here I'm assuming you have followed the instructions in the documentation and have all your dependencies in order.
|
19 |
+
|
20 |
+
Run to to get the latest.
|
21 |
+
```
|
22 |
+
pip install --upgrade --upgrade-strategy eager optimum[openvino]
|
23 |
+
```
|
24 |
+
|
25 |
+
Intended workflow:
|
26 |
+
-Select parameters.
|
27 |
+
-Copy command.
|
28 |
+
-Execute in your environment.
|
29 |
+
|
30 |
+
Note: Converstion can take a while.
|
31 |
+
Expect slow performance and rejoice. After all, OpenVINO supports Intel CPUs from 6th gen forward, so you can
|
32 |
+
squeeze performance
|
33 |
+
|
34 |
+
## Discussion
|
35 |
+
|
36 |
+
Leveraging hardware acceleration from OpenVINO requires converting a model into an Intermediate format derived from ONNX. Basically the command we execute rebuilds the model graph from it's source to be optimized for how OpenVINO uses this graph in memory.
|
37 |
+
|
38 |
+
Using OpenVINO effectively requires considering facts about what hardware the Intel PC running the code has available to it alongside details about the model you're working with.
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
So here are some questions you should be able to answer before using this tool;
|
44 |
+
- What data types does my CPU support?
|
45 |
+
- What instruction sets?
|
46 |
+
- How will I be using the model?
|
47 |
+
- Do I have enough system memory for this task?
|
48 |
+
|
49 |
+
Visit the [Intel Ark ]([Intel® Processors for PC, Laptops, Servers, and AI | Intel®](https://www.intel.com/content/www/us/en/products/details/processors.html)) product database to find this information. It's *the* ground truth on these sorts of specs. Even so, when testing with different model architecture
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
""")
|
57 |
+
|
58 |
+
|
59 |
+
self.model_input = gr.Textbox(
|
60 |
+
label='Model',
|
61 |
+
placeholder='Model ID on huggingface.co or path on disk',
|
62 |
+
info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
|
63 |
+
)
|
64 |
+
self.output_path = gr.Textbox(
|
65 |
+
label='Output Directory',
|
66 |
+
placeholder='Path to store the generated OV model',
|
67 |
+
info="We are storing some text here"
|
68 |
+
)
|
69 |
+
self.task = gr.Dropdown(
|
70 |
+
label='Task',
|
71 |
+
choices=['auto'] + [
|
72 |
+
'image-to-image', 'image-segmentation', 'inpainting',
|
73 |
+
'sentence-similarity', 'text-to-audio', 'image-to-text',
|
74 |
+
'automatic-speech-recognition', 'token-classification',
|
75 |
+
'text-to-image', 'audio-classification', 'feature-extraction',
|
76 |
+
'semantic-segmentation', 'masked-im', 'audio-xvector',
|
77 |
+
'audio-frame-classification', 'text2text-generation',
|
78 |
+
'multiple-choice', 'depth-estimation', 'image-classification',
|
79 |
+
'fill-mask', 'zero-shot-object-detection', 'object-detection',
|
80 |
+
'question-answering', 'zero-shot-image-classification',
|
81 |
+
'mask-generation', 'text-generation', 'text-classification'
|
82 |
+
],
|
83 |
+
value=None
|
84 |
+
)
|
85 |
+
self.framework = gr.Dropdown(
|
86 |
+
label='Framework',
|
87 |
+
choices=['pt', 'tf'],
|
88 |
+
value=None
|
89 |
+
)
|
90 |
+
self.weight_format = gr.Dropdown(
|
91 |
+
label='Weight Format',
|
92 |
+
choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
|
93 |
+
value=None,
|
94 |
+
info="The level of compression we apply to the intermediate representation."
|
95 |
+
)
|
96 |
+
self.library = gr.Dropdown(
|
97 |
+
label='Library',
|
98 |
+
choices=[
|
99 |
+
'auto', 'transformers', 'diffusers', 'timm',
|
100 |
+
'sentence_transformers', 'open_clip'
|
101 |
+
],
|
102 |
+
value=None
|
103 |
+
)
|
104 |
+
self.ratio = gr.Number(
|
105 |
+
label='Ratio',
|
106 |
+
value=None,
|
107 |
+
minimum=0.0,
|
108 |
+
maximum=1.0,
|
109 |
+
step=0.1
|
110 |
+
)
|
111 |
+
self.group_size = gr.Number(
|
112 |
+
label='Group Size',
|
113 |
+
value=None,
|
114 |
+
step=1
|
115 |
+
)
|
116 |
+
self.backup_precision = gr.Dropdown(
|
117 |
+
label='Backup Precision',
|
118 |
+
choices=['', 'int8_sym', 'int8_asym'],
|
119 |
+
# value=None
|
120 |
+
)
|
121 |
+
self.dataset = gr.Dropdown(
|
122 |
+
label='Dataset',
|
123 |
+
choices=['none', 'auto', 'wikitext2', 'c4', 'c4-new', 'contextual',
|
124 |
+
'conceptual_captions', 'laion/220k-GPT4Vision-captions-from-LIVIS',
|
125 |
+
'laion/filtered-wit'],
|
126 |
+
value=None
|
127 |
+
)
|
128 |
+
self.trust_remote_code = gr.Checkbox(label='Trust Remote Code', value=False)
|
129 |
+
self.disable_stateful = gr.Checkbox(label='Disable Stateful', value=False, info="Disables stateful for inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
|
130 |
+
self.disable_convert_tokenizer = gr.Checkbox(label='Disable Convert Tokenizer', value=False, info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect.")
|
131 |
+
self.all_layers = gr.Checkbox(label='All Layers', value=False)
|
132 |
+
self.awq = gr.Checkbox(label='AWQ', value=False, info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
|
133 |
+
self.scale_estimation = gr.Checkbox(label='Scale Estimation', value=False)
|
134 |
+
self.gptq = gr.Checkbox(label='GPTQ', value=False)
|
135 |
+
self.lora_correction = gr.Checkbox(label='LoRA Correction', value=False)
|
136 |
+
self.sym = gr.Checkbox(label='Symmetric Quantization', value=False)
|
137 |
+
self.quant_mode = gr.Dropdown(
|
138 |
+
label='Quantization Mode',
|
139 |
+
choices=['sym', 'asym'],
|
140 |
+
value=None
|
141 |
+
)
|
142 |
+
self.cache_dir = gr.Textbox(
|
143 |
+
label='Cache Directory',
|
144 |
+
placeholder='Path to cache directory'
|
145 |
+
)
|
146 |
+
self.pad_token_id = gr.Number(
|
147 |
+
label='Pad Token ID',
|
148 |
+
value=None,
|
149 |
+
step=1,
|
150 |
+
info="Will infer from the model if not provided."
|
151 |
+
)
|
152 |
+
self.sensitivity_metric = gr.Dropdown(
|
153 |
+
label='Sensitivity Metric',
|
154 |
+
choices=['mse', 'snr'],
|
155 |
+
value=None
|
156 |
+
)
|
157 |
+
self.num_samples = gr.Number(
|
158 |
+
label='Number of Samples',
|
159 |
+
value=None,
|
160 |
+
step=1
|
161 |
+
)
|
162 |
+
self.smooth_quant_alpha = gr.Number(
|
163 |
+
label='Smooth Quant Alpha',
|
164 |
+
value=None,
|
165 |
+
minimum=0.0,
|
166 |
+
maximum=1.0,
|
167 |
+
step=0.1
|
168 |
+
)
|
169 |
+
self.command_output = gr.TextArea(
|
170 |
+
label='Generated Command',
|
171 |
+
placeholder='Generated command will appear here...',
|
172 |
+
show_label=True,
|
173 |
+
show_copy_button=True,
|
174 |
+
lines=5 # Adjust height
|
175 |
+
)
|
176 |
+
|
177 |
+
def construct_command(self, model_input, output_path, task, framework, weight_format, library,
|
178 |
+
ratio, group_size, backup_precision, dataset,
|
179 |
+
trust_remote_code, disable_stateful, disable_convert_tokenizer,
|
180 |
+
all_layers, awq, scale_estimation, gptq, lora_correction, sym,
|
181 |
+
quant_mode, cache_dir, pad_token_id, sensitivity_metric, num_samples,
|
182 |
+
smooth_quant_alpha):
|
183 |
+
"""Construct the command string"""
|
184 |
+
if not model_input or not output_path:
|
185 |
+
return ''
|
186 |
+
cmd_parts = ['optimum-cli export openvino']
|
187 |
+
|
188 |
+
# Required arguments
|
189 |
+
cmd_parts.append(f'-m "{model_input}"')
|
190 |
+
cmd_parts.append(f'"{output_path}"')
|
191 |
+
|
192 |
+
# Optional arguments
|
193 |
+
if task != 'auto':
|
194 |
+
cmd_parts.append(f'--task {task}')
|
195 |
+
|
196 |
+
if framework != 'auto':
|
197 |
+
cmd_parts.append(f'--framework {framework}')
|
198 |
+
|
199 |
+
if weight_format != 'fp32':
|
200 |
+
cmd_parts.append(f'--weight-format {weight_format}')
|
201 |
+
|
202 |
+
if library != 'auto':
|
203 |
+
cmd_parts.append(f'--library {library}')
|
204 |
+
|
205 |
+
if ratio != 1.0:
|
206 |
+
cmd_parts.append(f'--ratio {ratio}')
|
207 |
+
|
208 |
+
if group_size != 128:
|
209 |
+
cmd_parts.append(f'--group-size {group_size}')
|
210 |
+
|
211 |
+
if backup_precision != 'int8_asym':
|
212 |
+
cmd_parts.append(f'--backup-precision {backup_precision}')
|
213 |
+
|
214 |
+
if dataset != 'none':
|
215 |
+
cmd_parts.append(f'--dataset {dataset}')
|
216 |
+
|
217 |
+
# Flags
|
218 |
+
if trust_remote_code:
|
219 |
+
cmd_parts.append('--trust-remote-code')
|
220 |
+
if disable_stateful:
|
221 |
+
cmd_parts.append('--disable-stateful')
|
222 |
+
if disable_convert_tokenizer:
|
223 |
+
cmd_parts.append('--disable-convert-tokenizer')
|
224 |
+
if all_layers:
|
225 |
+
cmd_parts.append('--all-layers')
|
226 |
+
if awq:
|
227 |
+
cmd_parts.append('--awq')
|
228 |
+
if scale_estimation:
|
229 |
+
cmd_parts.append('--scale-estimation')
|
230 |
+
if gptq:
|
231 |
+
cmd_parts.append('--gptq')
|
232 |
+
if lora_correction:
|
233 |
+
cmd_parts.append('--lora-correction')
|
234 |
+
if sym:
|
235 |
+
cmd_parts.append('--sym')
|
236 |
+
|
237 |
+
# New optional arguments
|
238 |
+
if quant_mode:
|
239 |
+
cmd_parts.append(f'--quant-mode {quant_mode}')
|
240 |
+
if cache_dir:
|
241 |
+
cmd_parts.append(f'--cache_dir {cache_dir}')
|
242 |
+
if pad_token_id:
|
243 |
+
cmd_parts.append(f'--pad-token-id {pad_token_id}')
|
244 |
+
if sensitivity_metric:
|
245 |
+
cmd_parts.append(f'--sensitivity-metric {sensitivity_metric}')
|
246 |
+
if num_samples:
|
247 |
+
cmd_parts.append(f'--num-samples {num_samples}')
|
248 |
+
if smooth_quant_alpha:
|
249 |
+
cmd_parts.append(f'--smooth-quant-alpha {smooth_quant_alpha}')
|
250 |
+
|
251 |
+
constructed_command = ' '.join(cmd_parts)
|
252 |
+
return constructed_command
|
253 |
+
|
254 |
+
def gradio_app(self):
|
255 |
+
"""Create and run the Gradio interface."""
|
256 |
+
inputs = [
|
257 |
+
self.model_input,
|
258 |
+
self.output_path,
|
259 |
+
self.task,
|
260 |
+
self.framework,
|
261 |
+
self.weight_format,
|
262 |
+
self.library,
|
263 |
+
self.ratio,
|
264 |
+
self.group_size,
|
265 |
+
self.backup_precision,
|
266 |
+
self.dataset,
|
267 |
+
self.trust_remote_code,
|
268 |
+
self.disable_stateful,
|
269 |
+
self.disable_convert_tokenizer,
|
270 |
+
self.all_layers,
|
271 |
+
self.awq,
|
272 |
+
self.scale_estimation,
|
273 |
+
self.gptq,
|
274 |
+
self.lora_correction,
|
275 |
+
self.sym,
|
276 |
+
self.quant_mode,
|
277 |
+
self.cache_dir,
|
278 |
+
self.pad_token_id,
|
279 |
+
self.sensitivity_metric,
|
280 |
+
self.num_samples,
|
281 |
+
self.smooth_quant_alpha,
|
282 |
+
]
|
283 |
+
interface = gr.Interface(
|
284 |
+
fn=self.construct_command,
|
285 |
+
inputs=inputs,
|
286 |
+
outputs=self.command_output,
|
287 |
+
title="OpenVINO Conversion Tool",
|
288 |
+
description="Enter your model information to generate the `optimum-cli` command."
|
289 |
+
)
|
290 |
+
|
291 |
+
# Add custom CSS to make labels bold
|
292 |
+
interface.css = """
|
293 |
+
label {
|
294 |
+
font-weight: bold !important;
|
295 |
+
}
|
296 |
+
"""
|
297 |
+
|
298 |
+
return interface
|
299 |
+
|
300 |
+
|
301 |
+
if __name__ == "__main__":
|
302 |
+
tool = ConversionTool()
|
303 |
+
app = tool.gradio_app()
|
304 |
+
app.launch(share = False)
|