Spaces:

HF-Quantization
/

TorchAO

Running

App Files Files Community

MekkCyber commited on Jan 8

Commit

1fdbd50

1 Parent(s): 82f366f

fix

Browse files

Files changed (1) hide show

app.py +30 -27

app.py CHANGED Viewed

@@ -91,9 +91,9 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
             repo_name = f"{username}/{quantized_model_name}"
         else :
             if quantization_type == "int4_weight_only" :
-                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}-gs_{group_size}"
             else :
-                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
         model_card = create_model_card(repo_name, quantization_type, group_size)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
@@ -143,31 +143,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
     with gr.Row():
         with gr.Column():
-            model_name = HuggingfaceHubSearch(
-                label="Hub Model ID",
-                placeholder="Search for model id on Huggingface",
-                search_type="model",
-            )
-            quantization_type = gr.Dropdown(
-                label="Quantization Type",
-                choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
-                value="int8_weight_only"
-            )
-            group_size = gr.Number(
-                label="Group Size (only for int4_weight_only)",
-                value=128,
-                interactive=True
-            )
-            # device = gr.Dropdown(
-            #     label="Device (int4 only works with cuda)",
-            #     choices=["cuda", "cpu"],
-            #     value="cuda"
-            # )
-            quantized_model_name = gr.Textbox(
-                label="Model Name (optional : to override default)",
-                value="",
-                interactive=True
-            )
             # with gr.Row():
             #     username = gr.Textbox(
             #         label="Hugging Face Username",

             repo_name = f"{username}/{quantized_model_name}"
         else :
             if quantization_type == "int4_weight_only" :
+                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
             else :
+                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}"
         model_card = create_model_card(repo_name, quantization_type, group_size)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
     with gr.Row():
         with gr.Column():
+            with gr.Row():
+                model_name = HuggingfaceHubSearch(
+                    label="Hub Model ID",
+                    placeholder="Search for model id on Huggingface",
+                    search_type="model",
+                )
+            with gr.Row():
+                quantization_type = gr.Dropdown(
+                    label="Quantization Type",
+                    choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
+                    value="int8_weight_only"
+                )
+                group_size = gr.Number(
+                    label="Group Size (only for int4_weight_only)",
+                    value=128,
+                    interactive=True
+                )
+                # device = gr.Dropdown(
+                #     label="Device (int4 only works with cuda)",
+                #     choices=["cuda", "cpu"],
+                #     value="cuda"
+                # )
+                quantized_model_name = gr.Textbox(
+                    label="Model Name (optional : to override default)",
+                    value="",
+                    interactive=True
+                )
             # with gr.Row():
             #     username = gr.Textbox(
             #         label="Hugging Face Username",