Echo9Zulu commited on
Commit
7573377
·
verified ·
1 Parent(s): a8739b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +304 -0
app.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ class ConversionTool:
4
+ def __init__(self):
5
+ # Initialize widgets
6
+ self.intro = gr.Markdown("""
7
+
8
+ ```
9
+ ### # Optimum CLI Export Tool.. tool
10
+
11
+ This tool helps organize the process of creating conversion commands when using Intel Optimum for Transformers to convert models outside of a script..
12
+
13
+
14
+ My goal was to make it easier to construct commands for the [Optimum CLI conversion tool](https://huggingface.co/docs/optimum/main/en/intel/openvino/export) which enables converting models to the OpenVINO Intermediate Representation
15
+ outside of the from.pretrained method used in Transformers with the OpenVINO related classes like OVModelForCausalLM, OVModelForSeq2SeqLM, OVModelForQuestionAnswering, etc.
16
+
17
+ ## Usage
18
+ Here I'm assuming you have followed the instructions in the documentation and have all your dependencies in order.
19
+
20
+ Run to to get the latest.
21
+ ```
22
+ pip install --upgrade --upgrade-strategy eager optimum[openvino]
23
+ ```
24
+
25
+ Intended workflow:
26
+ -Select parameters.
27
+ -Copy command.
28
+ -Execute in your environment.
29
+
30
+ Note: Converstion can take a while.
31
+ Expect slow performance and rejoice. After all, OpenVINO supports Intel CPUs from 6th gen forward, so you can
32
+ squeeze performance
33
+
34
+ ## Discussion
35
+
36
+ Leveraging hardware acceleration from OpenVINO requires converting a model into an Intermediate format derived from ONNX. Basically the command we execute rebuilds the model graph from it's source to be optimized for how OpenVINO uses this graph in memory.
37
+
38
+ Using OpenVINO effectively requires considering facts about what hardware the Intel PC running the code has available to it alongside details about the model you're working with.
39
+
40
+
41
+
42
+
43
+ So here are some questions you should be able to answer before using this tool;
44
+ - What data types does my CPU support?
45
+ - What instruction sets?
46
+ - How will I be using the model?
47
+ - Do I have enough system memory for this task?
48
+
49
+ Visit the [Intel Ark ]([Intel® Processors for PC, Laptops, Servers, and AI | Intel®](https://www.intel.com/content/www/us/en/products/details/processors.html)) product database to find this information. It's *the* ground truth on these sorts of specs. Even so, when testing with different model architecture
50
+
51
+
52
+
53
+
54
+
55
+
56
+ """)
57
+
58
+
59
+ self.model_input = gr.Textbox(
60
+ label='Model',
61
+ placeholder='Model ID on huggingface.co or path on disk',
62
+ info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
63
+ )
64
+ self.output_path = gr.Textbox(
65
+ label='Output Directory',
66
+ placeholder='Path to store the generated OV model',
67
+ info="We are storing some text here"
68
+ )
69
+ self.task = gr.Dropdown(
70
+ label='Task',
71
+ choices=['auto'] + [
72
+ 'image-to-image', 'image-segmentation', 'inpainting',
73
+ 'sentence-similarity', 'text-to-audio', 'image-to-text',
74
+ 'automatic-speech-recognition', 'token-classification',
75
+ 'text-to-image', 'audio-classification', 'feature-extraction',
76
+ 'semantic-segmentation', 'masked-im', 'audio-xvector',
77
+ 'audio-frame-classification', 'text2text-generation',
78
+ 'multiple-choice', 'depth-estimation', 'image-classification',
79
+ 'fill-mask', 'zero-shot-object-detection', 'object-detection',
80
+ 'question-answering', 'zero-shot-image-classification',
81
+ 'mask-generation', 'text-generation', 'text-classification'
82
+ ],
83
+ value=None
84
+ )
85
+ self.framework = gr.Dropdown(
86
+ label='Framework',
87
+ choices=['pt', 'tf'],
88
+ value=None
89
+ )
90
+ self.weight_format = gr.Dropdown(
91
+ label='Weight Format',
92
+ choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
93
+ value=None,
94
+ info="The level of compression we apply to the intermediate representation."
95
+ )
96
+ self.library = gr.Dropdown(
97
+ label='Library',
98
+ choices=[
99
+ 'auto', 'transformers', 'diffusers', 'timm',
100
+ 'sentence_transformers', 'open_clip'
101
+ ],
102
+ value=None
103
+ )
104
+ self.ratio = gr.Number(
105
+ label='Ratio',
106
+ value=None,
107
+ minimum=0.0,
108
+ maximum=1.0,
109
+ step=0.1
110
+ )
111
+ self.group_size = gr.Number(
112
+ label='Group Size',
113
+ value=None,
114
+ step=1
115
+ )
116
+ self.backup_precision = gr.Dropdown(
117
+ label='Backup Precision',
118
+ choices=['', 'int8_sym', 'int8_asym'],
119
+ # value=None
120
+ )
121
+ self.dataset = gr.Dropdown(
122
+ label='Dataset',
123
+ choices=['none', 'auto', 'wikitext2', 'c4', 'c4-new', 'contextual',
124
+ 'conceptual_captions', 'laion/220k-GPT4Vision-captions-from-LIVIS',
125
+ 'laion/filtered-wit'],
126
+ value=None
127
+ )
128
+ self.trust_remote_code = gr.Checkbox(label='Trust Remote Code', value=False)
129
+ self.disable_stateful = gr.Checkbox(label='Disable Stateful', value=False, info="Disables stateful for inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
130
+ self.disable_convert_tokenizer = gr.Checkbox(label='Disable Convert Tokenizer', value=False, info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect.")
131
+ self.all_layers = gr.Checkbox(label='All Layers', value=False)
132
+ self.awq = gr.Checkbox(label='AWQ', value=False, info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
133
+ self.scale_estimation = gr.Checkbox(label='Scale Estimation', value=False)
134
+ self.gptq = gr.Checkbox(label='GPTQ', value=False)
135
+ self.lora_correction = gr.Checkbox(label='LoRA Correction', value=False)
136
+ self.sym = gr.Checkbox(label='Symmetric Quantization', value=False)
137
+ self.quant_mode = gr.Dropdown(
138
+ label='Quantization Mode',
139
+ choices=['sym', 'asym'],
140
+ value=None
141
+ )
142
+ self.cache_dir = gr.Textbox(
143
+ label='Cache Directory',
144
+ placeholder='Path to cache directory'
145
+ )
146
+ self.pad_token_id = gr.Number(
147
+ label='Pad Token ID',
148
+ value=None,
149
+ step=1,
150
+ info="Will infer from the model if not provided."
151
+ )
152
+ self.sensitivity_metric = gr.Dropdown(
153
+ label='Sensitivity Metric',
154
+ choices=['mse', 'snr'],
155
+ value=None
156
+ )
157
+ self.num_samples = gr.Number(
158
+ label='Number of Samples',
159
+ value=None,
160
+ step=1
161
+ )
162
+ self.smooth_quant_alpha = gr.Number(
163
+ label='Smooth Quant Alpha',
164
+ value=None,
165
+ minimum=0.0,
166
+ maximum=1.0,
167
+ step=0.1
168
+ )
169
+ self.command_output = gr.TextArea(
170
+ label='Generated Command',
171
+ placeholder='Generated command will appear here...',
172
+ show_label=True,
173
+ show_copy_button=True,
174
+ lines=5 # Adjust height
175
+ )
176
+
177
+ def construct_command(self, model_input, output_path, task, framework, weight_format, library,
178
+ ratio, group_size, backup_precision, dataset,
179
+ trust_remote_code, disable_stateful, disable_convert_tokenizer,
180
+ all_layers, awq, scale_estimation, gptq, lora_correction, sym,
181
+ quant_mode, cache_dir, pad_token_id, sensitivity_metric, num_samples,
182
+ smooth_quant_alpha):
183
+ """Construct the command string"""
184
+ if not model_input or not output_path:
185
+ return ''
186
+ cmd_parts = ['optimum-cli export openvino']
187
+
188
+ # Required arguments
189
+ cmd_parts.append(f'-m "{model_input}"')
190
+ cmd_parts.append(f'"{output_path}"')
191
+
192
+ # Optional arguments
193
+ if task != 'auto':
194
+ cmd_parts.append(f'--task {task}')
195
+
196
+ if framework != 'auto':
197
+ cmd_parts.append(f'--framework {framework}')
198
+
199
+ if weight_format != 'fp32':
200
+ cmd_parts.append(f'--weight-format {weight_format}')
201
+
202
+ if library != 'auto':
203
+ cmd_parts.append(f'--library {library}')
204
+
205
+ if ratio != 1.0:
206
+ cmd_parts.append(f'--ratio {ratio}')
207
+
208
+ if group_size != 128:
209
+ cmd_parts.append(f'--group-size {group_size}')
210
+
211
+ if backup_precision != 'int8_asym':
212
+ cmd_parts.append(f'--backup-precision {backup_precision}')
213
+
214
+ if dataset != 'none':
215
+ cmd_parts.append(f'--dataset {dataset}')
216
+
217
+ # Flags
218
+ if trust_remote_code:
219
+ cmd_parts.append('--trust-remote-code')
220
+ if disable_stateful:
221
+ cmd_parts.append('--disable-stateful')
222
+ if disable_convert_tokenizer:
223
+ cmd_parts.append('--disable-convert-tokenizer')
224
+ if all_layers:
225
+ cmd_parts.append('--all-layers')
226
+ if awq:
227
+ cmd_parts.append('--awq')
228
+ if scale_estimation:
229
+ cmd_parts.append('--scale-estimation')
230
+ if gptq:
231
+ cmd_parts.append('--gptq')
232
+ if lora_correction:
233
+ cmd_parts.append('--lora-correction')
234
+ if sym:
235
+ cmd_parts.append('--sym')
236
+
237
+ # New optional arguments
238
+ if quant_mode:
239
+ cmd_parts.append(f'--quant-mode {quant_mode}')
240
+ if cache_dir:
241
+ cmd_parts.append(f'--cache_dir {cache_dir}')
242
+ if pad_token_id:
243
+ cmd_parts.append(f'--pad-token-id {pad_token_id}')
244
+ if sensitivity_metric:
245
+ cmd_parts.append(f'--sensitivity-metric {sensitivity_metric}')
246
+ if num_samples:
247
+ cmd_parts.append(f'--num-samples {num_samples}')
248
+ if smooth_quant_alpha:
249
+ cmd_parts.append(f'--smooth-quant-alpha {smooth_quant_alpha}')
250
+
251
+ constructed_command = ' '.join(cmd_parts)
252
+ return constructed_command
253
+
254
+ def gradio_app(self):
255
+ """Create and run the Gradio interface."""
256
+ inputs = [
257
+ self.model_input,
258
+ self.output_path,
259
+ self.task,
260
+ self.framework,
261
+ self.weight_format,
262
+ self.library,
263
+ self.ratio,
264
+ self.group_size,
265
+ self.backup_precision,
266
+ self.dataset,
267
+ self.trust_remote_code,
268
+ self.disable_stateful,
269
+ self.disable_convert_tokenizer,
270
+ self.all_layers,
271
+ self.awq,
272
+ self.scale_estimation,
273
+ self.gptq,
274
+ self.lora_correction,
275
+ self.sym,
276
+ self.quant_mode,
277
+ self.cache_dir,
278
+ self.pad_token_id,
279
+ self.sensitivity_metric,
280
+ self.num_samples,
281
+ self.smooth_quant_alpha,
282
+ ]
283
+ interface = gr.Interface(
284
+ fn=self.construct_command,
285
+ inputs=inputs,
286
+ outputs=self.command_output,
287
+ title="OpenVINO Conversion Tool",
288
+ description="Enter your model information to generate the `optimum-cli` command."
289
+ )
290
+
291
+ # Add custom CSS to make labels bold
292
+ interface.css = """
293
+ label {
294
+ font-weight: bold !important;
295
+ }
296
+ """
297
+
298
+ return interface
299
+
300
+
301
+ if __name__ == "__main__":
302
+ tool = ConversionTool()
303
+ app = tool.gradio_app()
304
+ app.launch(share = False)