Spaces:

Echo9Zulu
/

Optimum-CLI-Tool_tool

Running

App Files Files Community

Echo9Zulu commited on Jan 18

Commit

7a4e720

verified ·

1 Parent(s): 6ad6e23

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -24

app.py CHANGED Viewed

@@ -58,47 +58,69 @@ class ConversionTool:
             placeholder='Model ID on huggingface.co or path on disk',
             info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
         )
         self.output_path = gr.Textbox(
             label='Output Directory',
             placeholder='Path to store the generated OV model',
             info="We are storing some text here"
         )
         self.task = gr.Dropdown(
             label='Task',
             choices=['auto'] + [
-                'image-to-image', 'image-segmentation', 'inpainting',
-                'sentence-similarity', 'text-to-audio', 'image-to-text',
-                'automatic-speech-recognition', 'token-classification',
-                'text-to-image', 'audio-classification', 'feature-extraction',
-                'semantic-segmentation', 'masked-im', 'audio-xvector',
-                'audio-frame-classification', 'text2text-generation',
-                'multiple-choice', 'depth-estimation', 'image-classification',
                 'fill-mask', 'zero-shot-object-detection', 'object-detection',
                 'question-answering', 'zero-shot-image-classification',
                 'mask-generation', 'text-generation', 'text-classification',
-                'text-to-text-generation', 'text-generation-with-past'
             ],
             value=None
         )
         self.framework = gr.Dropdown(
             label='Framework',
             choices=['pt', 'tf'],
             value=None
         )
         self.weight_format = gr.Dropdown(
             label='Weight Format',
             choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
             value=None,
             info="The level of compression we apply to the intermediate representation."
         )
         self.library = gr.Dropdown(
             label='Library',
             choices=[
-                'auto', 'transformers', 'diffusers', 'timm',
-                'sentence_transformers', 'open_clip'
             ],
             value=None
         )
         self.ratio = gr.Number(
             label='Ratio',
             value=None,
@@ -106,57 +128,106 @@ class ConversionTool:
             maximum=1.0,
             step=0.1
         )
         self.group_size = gr.Number(
             label='Group Size',
             value=None,
             step=1
         )
         self.backup_precision = gr.Dropdown(
             label='Backup Precision',
             choices=['', 'int8_sym', 'int8_asym'],
             # value=None
         )
         self.dataset = gr.Dropdown(
             label='Dataset',
-            choices=['none', 'auto', 'wikitext2', 'c4', 'c4-new', 'contextual',
-                    'conceptual_captions', 'laion/220k-GPT4Vision-captions-from-LIVIS',
                     'laion/filtered-wit'],
             value=None
         )
-        self.trust_remote_code = gr.Checkbox(label='Trust Remote Code', value=False)
-        self.disable_stateful = gr.Checkbox(label='Disable Stateful', value=False, info="Disables stateful for inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
-        self.disable_convert_tokenizer = gr.Checkbox(label='Disable Convert Tokenizer', value=False, info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect.")
-        self.all_layers = gr.Checkbox(label='All Layers', value=False)
-        self.awq = gr.Checkbox(label='AWQ', value=False, info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
-        self.scale_estimation = gr.Checkbox(label='Scale Estimation', value=False)
-        self.gptq = gr.Checkbox(label='GPTQ', value=False)
-        self.lora_correction = gr.Checkbox(label='LoRA Correction', value=False)
-        self.sym = gr.Checkbox(label='Symmetric Quantization', value=False)
         self.quant_mode = gr.Dropdown(
             label='Quantization Mode',
             choices=['sym', 'asym'],
             value=None
         )
         self.cache_dir = gr.Textbox(
             label='Cache Directory',
             placeholder='Path to cache directory'
         )
         self.pad_token_id = gr.Number(
             label='Pad Token ID',
             value=None,
             step=1,
-            info="Will infer from the model if not provided."
         )
         self.sensitivity_metric = gr.Dropdown(
             label='Sensitivity Metric',
-            choices=['mse', 'snr'],
             value=None
         )
         self.num_samples = gr.Number(
             label='Number of Samples',
             value=None,
             step=1
         )
         self.smooth_quant_alpha = gr.Number(
             label='Smooth Quant Alpha',
             value=None,
@@ -164,6 +235,7 @@ class ConversionTool:
             maximum=1.0,
             step=0.1
         )
         self.command_output = gr.TextArea(
             label='Generated Command',
             placeholder='Generated command will appear here...',
@@ -283,7 +355,7 @@ class ConversionTool:
             outputs=self.command_output,
             title="OpenVINO Conversion Tool",
             description="Enter model information to generate an `optimum-cli` export command.",
-            article=INTRODUCTION,
             allow_flagging='auto'
         )

             placeholder='Model ID on huggingface.co or path on disk',
             info="The model to convert. This can be a model ID on Hugging Face or a path on disk."
         )
         self.output_path = gr.Textbox(
             label='Output Directory',
             placeholder='Path to store the generated OV model',
             info="We are storing some text here"
         )
         self.task = gr.Dropdown(
             label='Task',
             choices=['auto'] + [
+                'image-to-image',
+                'image-segmentation',
+                'inpainting',
+                'sentence-similarity',
+                'text-to-audio',
+                'image-to-text',
+                'automatic-speech-recognition',
+                'token-classification',
+                'text-to-image',
+                'audio-classification',
+                'feature-extraction',
+                'semantic-segmentation',
+                'masked-im',
+                'audio-xvector',
+                'audio-frame-classification',
+                'text2text-generation',
+                'multiple-choice',
+                'depth-estimation',
+                'image-classification',
                 'fill-mask', 'zero-shot-object-detection', 'object-detection',
                 'question-answering', 'zero-shot-image-classification',
                 'mask-generation', 'text-generation', 'text-classification',
+                'text-generation-with-past'
             ],
             value=None
         )
         self.framework = gr.Dropdown(
             label='Framework',
             choices=['pt', 'tf'],
             value=None
         )
         self.weight_format = gr.Dropdown(
             label='Weight Format',
             choices=['fp32', 'fp16', 'int8', 'int4', 'mxfp4', 'nf4'],
             value=None,
             info="The level of compression we apply to the intermediate representation."
         )
         self.library = gr.Dropdown(
             label='Library',
             choices=[
+                'auto',
+                'transformers',
+                'diffusers',
+                'timm',
+                'sentence_transformers',
+                'open_clip'
             ],
             value=None
         )
         self.ratio = gr.Number(
             label='Ratio',
             value=None,
             maximum=1.0,
             step=0.1
         )
         self.group_size = gr.Number(
             label='Group Size',
             value=None,
             step=1
         )
         self.backup_precision = gr.Dropdown(
             label='Backup Precision',
             choices=['', 'int8_sym', 'int8_asym'],
             # value=None
         )
         self.dataset = gr.Dropdown(
             label='Dataset',
+            choices=['none',
+                     'auto',
+                     'wikitext2',
+                     'c4',
+                     'c4-new',
+                     'contextual',
+                    'conceptual_captions',
+                    'laion/220k-GPT4Vision-captions-from-LIVIS',
                     'laion/filtered-wit'],
             value=None
         )
+        self.trust_remote_code = gr.Checkbox(
+            label='Trust Remote Code',
+            value=False)
+        self.disable_stateful = gr.Checkbox(
+            label='Disable Stateful',
+            value=False,
+            info="Disables stateful inference. This is required for multi GPU inference due to how OpenVINO uses the KV cache. ")
+        self.disable_convert_tokenizer = gr.Checkbox(
+            label='Disable Convert Tokenizer',
+            value=False,
+            info="Disables the tokenizer conversion. Use when models have custom tokenizers which might have formatting Optimum does not expect."
+        )
+        self.all_layers = gr.Checkbox(
+            label='All Layers',
+            value=False)
+        self.awq = gr.Checkbox(
+            label='AWQ',
+            value=False,
+            info="Activation aware quantization algorithm from NNCF. Requires a dataset, which can also be a path. ")
+        self.scale_estimation = gr.Checkbox(
+            label='Scale Estimation',
+            value=False)
+        self.gptq = gr.Checkbox(
+            label='GPTQ',
+            value=False)
+        self.lora_correction = gr.Checkbox(
+            label='LoRA Correction',
+            value=False)
+        self.sym = gr.Checkbox(
+            label='Symmetric Quantization',
+            value=False,
+            info="Symmetric quantization is faster and uses less memory. It is recommended for most use cases."
+        )
         self.quant_mode = gr.Dropdown(
             label='Quantization Mode',
             choices=['sym', 'asym'],
             value=None
         )
         self.cache_dir = gr.Textbox(
             label='Cache Directory',
             placeholder='Path to cache directory'
         )
         self.pad_token_id = gr.Number(
             label='Pad Token ID',
             value=None,
             step=1,
+            info="Will try to infer from tokenizer if not provided."
         )
         self.sensitivity_metric = gr.Dropdown(
             label='Sensitivity Metric',
+            choices=['weight_quantization_error', 'hessian_input_activation',
+                    'mean_activation_variance', 'max_activation_variance', 'mean_activation_magnitude'],
             value=None
         )
         self.num_samples = gr.Number(
             label='Number of Samples',
             value=None,
             step=1
         )
         self.smooth_quant_alpha = gr.Number(
             label='Smooth Quant Alpha',
             value=None,
             maximum=1.0,
             step=0.1
         )
         self.command_output = gr.TextArea(
             label='Generated Command',
             placeholder='Generated command will appear here...',
             outputs=self.command_output,
             title="OpenVINO Conversion Tool",
             description="Enter model information to generate an `optimum-cli` export command.",
+            # article=INTRODUCTION,
             allow_flagging='auto'
         )