Echo9Zulu commited on
Commit
7a4e720
·
verified ·
1 Parent(s): 6ad6e23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -24
app.py CHANGED
@@ -58,47 +58,69 @@ class ConversionTool:
58
  placeholder='Model ID on huggingface.co or path on disk',
59
  info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
60
  )
 
61
  self.output_path = gr.Textbox(
62
  label='Output Directory',
63
  placeholder='Path to store the generated OV model',
64
  info="We are storing some text here"
65
  )
 
66
  self.task = gr.Dropdown(
67
  label='Task',
68
  choices=['auto'] + [
69
- 'image-to-image', 'image-segmentation', 'inpainting',
70
- 'sentence-similarity', 'text-to-audio', 'image-to-text',
71
- 'automatic-speech-recognition', 'token-classification',
72
- 'text-to-image', 'audio-classification', 'feature-extraction',
73
- 'semantic-segmentation', 'masked-im', 'audio-xvector',
74
- 'audio-frame-classification', 'text2text-generation',
75
- 'multiple-choice', 'depth-estimation', 'image-classification',
 
 
 
 
 
 
 
 
 
 
 
 
76
  'fill-mask', 'zero-shot-object-detection', 'object-detection',
77
  'question-answering', 'zero-shot-image-classification',
78
  'mask-generation', 'text-generation', 'text-classification',
79
- 'text-to-text-generation', 'text-generation-with-past'
80
  ],
81
  value=None
82
  )
 
83
  self.framework = gr.Dropdown(
84
  label='Framework',
85
  choices=['pt', 'tf'],
86
  value=None
87
  )
 
88
  self.weight_format = gr.Dropdown(
89
  label='Weight Format',
90
  choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
91
  value=None,
92
  info="The level of compression we apply to the intermediate representation."
93
  )
 
94
  self.library = gr.Dropdown(
95
  label='Library',
96
  choices=[
97
- 'auto', 'transformers', 'diffusers', 'timm',
98
- 'sentence_transformers', 'open_clip'
 
 
 
 
99
  ],
100
  value=None
101
  )
 
102
  self.ratio = gr.Number(
103
  label='Ratio',
104
  value=None,
@@ -106,57 +128,106 @@ class ConversionTool:
106
  maximum=1.0,
107
  step=0.1
108
  )
 
109
  self.group_size = gr.Number(
110
  label='Group Size',
111
  value=None,
112
  step=1
113
  )
 
114
  self.backup_precision = gr.Dropdown(
115
  label='Backup Precision',
116
  choices=['', 'int8_sym', 'int8_asym'],
117
  # value=None
118
  )
 
119
  self.dataset = gr.Dropdown(
120
  label='Dataset',
121
- choices=['none', 'auto', 'wikitext2', 'c4', 'c4-new', 'contextual',
122
- 'conceptual_captions', 'laion/220k-GPT4Vision-captions-from-LIVIS',
 
 
 
 
 
 
123
  'laion/filtered-wit'],
124
  value=None
125
  )
126
- self.trust_remote_code = gr.Checkbox(label='Trust Remote Code', value=False)
127
- self.disable_stateful = gr.Checkbox(label='Disable Stateful', value=False, info="Disables stateful for inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
128
- self.disable_convert_tokenizer = gr.Checkbox(label='Disable Convert Tokenizer', value=False, info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect.")
129
- self.all_layers = gr.Checkbox(label='All Layers', value=False)
130
- self.awq = gr.Checkbox(label='AWQ', value=False, info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
131
- self.scale_estimation = gr.Checkbox(label='Scale Estimation', value=False)
132
- self.gptq = gr.Checkbox(label='GPTQ', value=False)
133
- self.lora_correction = gr.Checkbox(label='LoRA Correction', value=False)
134
- self.sym = gr.Checkbox(label='Symmetric Quantization', value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  self.quant_mode = gr.Dropdown(
136
  label='Quantization Mode',
137
  choices=['sym', 'asym'],
138
  value=None
139
  )
 
140
  self.cache_dir = gr.Textbox(
141
  label='Cache Directory',
142
  placeholder='Path to cache directory'
143
  )
 
144
  self.pad_token_id = gr.Number(
145
  label='Pad Token ID',
146
  value=None,
147
  step=1,
148
- info="Will infer from the model if not provided."
149
  )
 
150
  self.sensitivity_metric = gr.Dropdown(
151
  label='Sensitivity Metric',
152
- choices=['mse', 'snr'],
 
153
  value=None
154
  )
 
155
  self.num_samples = gr.Number(
156
  label='Number of Samples',
157
  value=None,
158
  step=1
159
  )
 
160
  self.smooth_quant_alpha = gr.Number(
161
  label='Smooth Quant Alpha',
162
  value=None,
@@ -164,6 +235,7 @@ class ConversionTool:
164
  maximum=1.0,
165
  step=0.1
166
  )
 
167
  self.command_output = gr.TextArea(
168
  label='Generated Command',
169
  placeholder='Generated command will appear here...',
@@ -283,7 +355,7 @@ class ConversionTool:
283
  outputs=self.command_output,
284
  title="OpenVINO Conversion Tool",
285
  description="Enter model information to generate an `optimum-cli` export command.",
286
- article=INTRODUCTION,
287
  allow_flagging='auto'
288
  )
289
 
 
58
  placeholder='Model ID on huggingface.co or path on disk',
59
  info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
60
  )
61
+
62
  self.output_path = gr.Textbox(
63
  label='Output Directory',
64
  placeholder='Path to store the generated OV model',
65
  info="We are storing some text here"
66
  )
67
+
68
  self.task = gr.Dropdown(
69
  label='Task',
70
  choices=['auto'] + [
71
+ 'image-to-image',
72
+ 'image-segmentation',
73
+ 'inpainting',
74
+ 'sentence-similarity',
75
+ 'text-to-audio',
76
+ 'image-to-text',
77
+ 'automatic-speech-recognition',
78
+ 'token-classification',
79
+ 'text-to-image',
80
+ 'audio-classification',
81
+ 'feature-extraction',
82
+ 'semantic-segmentation',
83
+ 'masked-im',
84
+ 'audio-xvector',
85
+ 'audio-frame-classification',
86
+ 'text2text-generation',
87
+ 'multiple-choice',
88
+ 'depth-estimation',
89
+ 'image-classification',
90
  'fill-mask', 'zero-shot-object-detection', 'object-detection',
91
  'question-answering', 'zero-shot-image-classification',
92
  'mask-generation', 'text-generation', 'text-classification',
93
+ 'text-generation-with-past'
94
  ],
95
  value=None
96
  )
97
+
98
  self.framework = gr.Dropdown(
99
  label='Framework',
100
  choices=['pt', 'tf'],
101
  value=None
102
  )
103
+
104
  self.weight_format = gr.Dropdown(
105
  label='Weight Format',
106
  choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
107
  value=None,
108
  info="The level of compression we apply to the intermediate representation."
109
  )
110
+
111
  self.library = gr.Dropdown(
112
  label='Library',
113
  choices=[
114
+ 'auto',
115
+ 'transformers',
116
+ 'diffusers',
117
+ 'timm',
118
+ 'sentence_transformers',
119
+ 'open_clip'
120
  ],
121
  value=None
122
  )
123
+
124
  self.ratio = gr.Number(
125
  label='Ratio',
126
  value=None,
 
128
  maximum=1.0,
129
  step=0.1
130
  )
131
+
132
  self.group_size = gr.Number(
133
  label='Group Size',
134
  value=None,
135
  step=1
136
  )
137
+
138
  self.backup_precision = gr.Dropdown(
139
  label='Backup Precision',
140
  choices=['', 'int8_sym', 'int8_asym'],
141
  # value=None
142
  )
143
+
144
  self.dataset = gr.Dropdown(
145
  label='Dataset',
146
+ choices=['none',
147
+ 'auto',
148
+ 'wikitext2',
149
+ 'c4',
150
+ 'c4-new',
151
+ 'contextual',
152
+ 'conceptual_captions',
153
+ 'laion/220k-GPT4Vision-captions-from-LIVIS',
154
  'laion/filtered-wit'],
155
  value=None
156
  )
157
+
158
+ self.trust_remote_code = gr.Checkbox(
159
+ label='Trust Remote Code',
160
+ value=False)
161
+
162
+ self.disable_stateful = gr.Checkbox(
163
+ label='Disable Stateful',
164
+ value=False,
165
+ info="Disables stateful inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
166
+
167
+ self.disable_convert_tokenizer = gr.Checkbox(
168
+ label='Disable Convert Tokenizer',
169
+ value=False,
170
+ info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect."
171
+ )
172
+
173
+ self.all_layers = gr.Checkbox(
174
+ label='All Layers',
175
+ value=False)
176
+
177
+ self.awq = gr.Checkbox(
178
+ label='AWQ',
179
+ value=False,
180
+ info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
181
+
182
+ self.scale_estimation = gr.Checkbox(
183
+ label='Scale Estimation',
184
+ value=False)
185
+
186
+ self.gptq = gr.Checkbox(
187
+ label='GPTQ',
188
+ value=False)
189
+
190
+ self.lora_correction = gr.Checkbox(
191
+ label='LoRA Correction',
192
+ value=False)
193
+
194
+ self.sym = gr.Checkbox(
195
+ label='Symmetric Quantization',
196
+ value=False,
197
+ info="Symmetric quantization is faster and uses less memory. It is recommended for most use cases."
198
+ )
199
+
200
  self.quant_mode = gr.Dropdown(
201
  label='Quantization Mode',
202
  choices=['sym', 'asym'],
203
  value=None
204
  )
205
+
206
  self.cache_dir = gr.Textbox(
207
  label='Cache Directory',
208
  placeholder='Path to cache directory'
209
  )
210
+
211
  self.pad_token_id = gr.Number(
212
  label='Pad Token ID',
213
  value=None,
214
  step=1,
215
+ info="Will try to infer from tokenizer if not provided."
216
  )
217
+
218
  self.sensitivity_metric = gr.Dropdown(
219
  label='Sensitivity Metric',
220
+ choices=['weight_quantization_error', 'hessian_input_activation',
221
+ 'mean_activation_variance', 'max_activation_variance', 'mean_activation_magnitude'],
222
  value=None
223
  )
224
+
225
  self.num_samples = gr.Number(
226
  label='Number of Samples',
227
  value=None,
228
  step=1
229
  )
230
+
231
  self.smooth_quant_alpha = gr.Number(
232
  label='Smooth Quant Alpha',
233
  value=None,
 
235
  maximum=1.0,
236
  step=0.1
237
  )
238
+
239
  self.command_output = gr.TextArea(
240
  label='Generated Command',
241
  placeholder='Generated command will appear here...',
 
355
  outputs=self.command_output,
356
  title="OpenVINO Conversion Tool",
357
  description="Enter model information to generate an `optimum-cli` export command.",
358
+ # article=INTRODUCTION,
359
  allow_flagging='auto'
360
  )
361