Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -58,47 +58,69 @@ class ConversionTool:
|
|
58 |
placeholder='Model ID on huggingface.co or path on disk',
|
59 |
info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
|
60 |
)
|
|
|
61 |
self.output_path = gr.Textbox(
|
62 |
label='Output Directory',
|
63 |
placeholder='Path to store the generated OV model',
|
64 |
info="We are storing some text here"
|
65 |
)
|
|
|
66 |
self.task = gr.Dropdown(
|
67 |
label='Task',
|
68 |
choices=['auto'] + [
|
69 |
-
'image-to-image',
|
70 |
-
'
|
71 |
-
'
|
72 |
-
'
|
73 |
-
'
|
74 |
-
'
|
75 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
'fill-mask', 'zero-shot-object-detection', 'object-detection',
|
77 |
'question-answering', 'zero-shot-image-classification',
|
78 |
'mask-generation', 'text-generation', 'text-classification',
|
79 |
-
'text-
|
80 |
],
|
81 |
value=None
|
82 |
)
|
|
|
83 |
self.framework = gr.Dropdown(
|
84 |
label='Framework',
|
85 |
choices=['pt', 'tf'],
|
86 |
value=None
|
87 |
)
|
|
|
88 |
self.weight_format = gr.Dropdown(
|
89 |
label='Weight Format',
|
90 |
choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
|
91 |
value=None,
|
92 |
info="The level of compression we apply to the intermediate representation."
|
93 |
)
|
|
|
94 |
self.library = gr.Dropdown(
|
95 |
label='Library',
|
96 |
choices=[
|
97 |
-
'auto',
|
98 |
-
'
|
|
|
|
|
|
|
|
|
99 |
],
|
100 |
value=None
|
101 |
)
|
|
|
102 |
self.ratio = gr.Number(
|
103 |
label='Ratio',
|
104 |
value=None,
|
@@ -106,57 +128,106 @@ class ConversionTool:
|
|
106 |
maximum=1.0,
|
107 |
step=0.1
|
108 |
)
|
|
|
109 |
self.group_size = gr.Number(
|
110 |
label='Group Size',
|
111 |
value=None,
|
112 |
step=1
|
113 |
)
|
|
|
114 |
self.backup_precision = gr.Dropdown(
|
115 |
label='Backup Precision',
|
116 |
choices=['', 'int8_sym', 'int8_asym'],
|
117 |
# value=None
|
118 |
)
|
|
|
119 |
self.dataset = gr.Dropdown(
|
120 |
label='Dataset',
|
121 |
-
choices=['none',
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
'laion/filtered-wit'],
|
124 |
value=None
|
125 |
)
|
126 |
-
|
127 |
-
self.
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
self.
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
self.quant_mode = gr.Dropdown(
|
136 |
label='Quantization Mode',
|
137 |
choices=['sym', 'asym'],
|
138 |
value=None
|
139 |
)
|
|
|
140 |
self.cache_dir = gr.Textbox(
|
141 |
label='Cache Directory',
|
142 |
placeholder='Path to cache directory'
|
143 |
)
|
|
|
144 |
self.pad_token_id = gr.Number(
|
145 |
label='Pad Token ID',
|
146 |
value=None,
|
147 |
step=1,
|
148 |
-
info="Will infer from
|
149 |
)
|
|
|
150 |
self.sensitivity_metric = gr.Dropdown(
|
151 |
label='Sensitivity Metric',
|
152 |
-
choices=['
|
|
|
153 |
value=None
|
154 |
)
|
|
|
155 |
self.num_samples = gr.Number(
|
156 |
label='Number of Samples',
|
157 |
value=None,
|
158 |
step=1
|
159 |
)
|
|
|
160 |
self.smooth_quant_alpha = gr.Number(
|
161 |
label='Smooth Quant Alpha',
|
162 |
value=None,
|
@@ -164,6 +235,7 @@ class ConversionTool:
|
|
164 |
maximum=1.0,
|
165 |
step=0.1
|
166 |
)
|
|
|
167 |
self.command_output = gr.TextArea(
|
168 |
label='Generated Command',
|
169 |
placeholder='Generated command will appear here...',
|
@@ -283,7 +355,7 @@ class ConversionTool:
|
|
283 |
outputs=self.command_output,
|
284 |
title="OpenVINO Conversion Tool",
|
285 |
description="Enter model information to generate an `optimum-cli` export command.",
|
286 |
-
article=INTRODUCTION,
|
287 |
allow_flagging='auto'
|
288 |
)
|
289 |
|
|
|
58 |
placeholder='Model ID on huggingface.co or path on disk',
|
59 |
info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
|
60 |
)
|
61 |
+
|
62 |
self.output_path = gr.Textbox(
|
63 |
label='Output Directory',
|
64 |
placeholder='Path to store the generated OV model',
|
65 |
info="We are storing some text here"
|
66 |
)
|
67 |
+
|
68 |
self.task = gr.Dropdown(
|
69 |
label='Task',
|
70 |
choices=['auto'] + [
|
71 |
+
'image-to-image',
|
72 |
+
'image-segmentation',
|
73 |
+
'inpainting',
|
74 |
+
'sentence-similarity',
|
75 |
+
'text-to-audio',
|
76 |
+
'image-to-text',
|
77 |
+
'automatic-speech-recognition',
|
78 |
+
'token-classification',
|
79 |
+
'text-to-image',
|
80 |
+
'audio-classification',
|
81 |
+
'feature-extraction',
|
82 |
+
'semantic-segmentation',
|
83 |
+
'masked-im',
|
84 |
+
'audio-xvector',
|
85 |
+
'audio-frame-classification',
|
86 |
+
'text2text-generation',
|
87 |
+
'multiple-choice',
|
88 |
+
'depth-estimation',
|
89 |
+
'image-classification',
|
90 |
'fill-mask', 'zero-shot-object-detection', 'object-detection',
|
91 |
'question-answering', 'zero-shot-image-classification',
|
92 |
'mask-generation', 'text-generation', 'text-classification',
|
93 |
+
'text-generation-with-past'
|
94 |
],
|
95 |
value=None
|
96 |
)
|
97 |
+
|
98 |
self.framework = gr.Dropdown(
|
99 |
label='Framework',
|
100 |
choices=['pt', 'tf'],
|
101 |
value=None
|
102 |
)
|
103 |
+
|
104 |
self.weight_format = gr.Dropdown(
|
105 |
label='Weight Format',
|
106 |
choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
|
107 |
value=None,
|
108 |
info="The level of compression we apply to the intermediate representation."
|
109 |
)
|
110 |
+
|
111 |
self.library = gr.Dropdown(
|
112 |
label='Library',
|
113 |
choices=[
|
114 |
+
'auto',
|
115 |
+
'transformers',
|
116 |
+
'diffusers',
|
117 |
+
'timm',
|
118 |
+
'sentence_transformers',
|
119 |
+
'open_clip'
|
120 |
],
|
121 |
value=None
|
122 |
)
|
123 |
+
|
124 |
self.ratio = gr.Number(
|
125 |
label='Ratio',
|
126 |
value=None,
|
|
|
128 |
maximum=1.0,
|
129 |
step=0.1
|
130 |
)
|
131 |
+
|
132 |
self.group_size = gr.Number(
|
133 |
label='Group Size',
|
134 |
value=None,
|
135 |
step=1
|
136 |
)
|
137 |
+
|
138 |
self.backup_precision = gr.Dropdown(
|
139 |
label='Backup Precision',
|
140 |
choices=['', 'int8_sym', 'int8_asym'],
|
141 |
# value=None
|
142 |
)
|
143 |
+
|
144 |
self.dataset = gr.Dropdown(
|
145 |
label='Dataset',
|
146 |
+
choices=['none',
|
147 |
+
'auto',
|
148 |
+
'wikitext2',
|
149 |
+
'c4',
|
150 |
+
'c4-new',
|
151 |
+
'contextual',
|
152 |
+
'conceptual_captions',
|
153 |
+
'laion/220k-GPT4Vision-captions-from-LIVIS',
|
154 |
'laion/filtered-wit'],
|
155 |
value=None
|
156 |
)
|
157 |
+
|
158 |
+
self.trust_remote_code = gr.Checkbox(
|
159 |
+
label='Trust Remote Code',
|
160 |
+
value=False)
|
161 |
+
|
162 |
+
self.disable_stateful = gr.Checkbox(
|
163 |
+
label='Disable Stateful',
|
164 |
+
value=False,
|
165 |
+
info="Disables stateful inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
|
166 |
+
|
167 |
+
self.disable_convert_tokenizer = gr.Checkbox(
|
168 |
+
label='Disable Convert Tokenizer',
|
169 |
+
value=False,
|
170 |
+
info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect."
|
171 |
+
)
|
172 |
+
|
173 |
+
self.all_layers = gr.Checkbox(
|
174 |
+
label='All Layers',
|
175 |
+
value=False)
|
176 |
+
|
177 |
+
self.awq = gr.Checkbox(
|
178 |
+
label='AWQ',
|
179 |
+
value=False,
|
180 |
+
info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
|
181 |
+
|
182 |
+
self.scale_estimation = gr.Checkbox(
|
183 |
+
label='Scale Estimation',
|
184 |
+
value=False)
|
185 |
+
|
186 |
+
self.gptq = gr.Checkbox(
|
187 |
+
label='GPTQ',
|
188 |
+
value=False)
|
189 |
+
|
190 |
+
self.lora_correction = gr.Checkbox(
|
191 |
+
label='LoRA Correction',
|
192 |
+
value=False)
|
193 |
+
|
194 |
+
self.sym = gr.Checkbox(
|
195 |
+
label='Symmetric Quantization',
|
196 |
+
value=False,
|
197 |
+
info="Symmetric quantization is faster and uses less memory. It is recommended for most use cases."
|
198 |
+
)
|
199 |
+
|
200 |
self.quant_mode = gr.Dropdown(
|
201 |
label='Quantization Mode',
|
202 |
choices=['sym', 'asym'],
|
203 |
value=None
|
204 |
)
|
205 |
+
|
206 |
self.cache_dir = gr.Textbox(
|
207 |
label='Cache Directory',
|
208 |
placeholder='Path to cache directory'
|
209 |
)
|
210 |
+
|
211 |
self.pad_token_id = gr.Number(
|
212 |
label='Pad Token ID',
|
213 |
value=None,
|
214 |
step=1,
|
215 |
+
info="Will try to infer from tokenizer if not provided."
|
216 |
)
|
217 |
+
|
218 |
self.sensitivity_metric = gr.Dropdown(
|
219 |
label='Sensitivity Metric',
|
220 |
+
choices=['weight_quantization_error', 'hessian_input_activation',
|
221 |
+
'mean_activation_variance', 'max_activation_variance', 'mean_activation_magnitude'],
|
222 |
value=None
|
223 |
)
|
224 |
+
|
225 |
self.num_samples = gr.Number(
|
226 |
label='Number of Samples',
|
227 |
value=None,
|
228 |
step=1
|
229 |
)
|
230 |
+
|
231 |
self.smooth_quant_alpha = gr.Number(
|
232 |
label='Smooth Quant Alpha',
|
233 |
value=None,
|
|
|
235 |
maximum=1.0,
|
236 |
step=0.1
|
237 |
)
|
238 |
+
|
239 |
self.command_output = gr.TextArea(
|
240 |
label='Generated Command',
|
241 |
placeholder='Generated command will appear here...',
|
|
|
355 |
outputs=self.command_output,
|
356 |
title="OpenVINO Conversion Tool",
|
357 |
description="Enter model information to generate an `optimum-cli` export command.",
|
358 |
+
# article=INTRODUCTION,
|
359 |
allow_flagging='auto'
|
360 |
)
|
361 |
|