Spaces:

HF-Quantization
/

TorchAO

Running

App Files Files Community

MekkCyber commited on Mar 31

Commit

b5887d5

1 Parent(s): 931ff17

first

Browse files

Files changed (3) hide show

.gradio/certificate.pem +31 -0
app.py +414 -120
requirements.txt +1 -1

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py CHANGED Viewed

@@ -2,16 +2,31 @@ import gradio as gr
 import torch
 from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer, AutoModel
 import tempfile
-from huggingface_hub import HfApi
 from huggingface_hub import list_models
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from packaging import version
 import os
-import spaces
 MAP_QUANT_TYPE_TO_NAME = {
-    "int4_weight_only": "int4wo", "int8_weight_only": "int8wo", "int8_dynamic_activation_int8_weight": "int8da8w"
 }
 def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
     # ^ expect a gr.OAuthProfile object as input to get the user's profile
@@ -20,19 +35,29 @@ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) ->
         return "Hello !"
     return f"Hello {profile.name} !"
-def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization_type, group_size, model_name, quantized_model_name):
     """Check if a model exists in the user's Hugging Face repository."""
     try:
         models = list_models(author=username, token=oauth_token.token)
         model_names = [model.id for model in models]
-        if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
-        else :
-            if quantization_type == "int4_weight_only" :
-                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
-            else :
-                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}"
         if repo_name in model_names:
             return f"Model '{repo_name}' already exists in your repository."
         else:
@@ -40,62 +65,160 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization
     except Exception as e:
         return f"Error checking model existence: {str(e)}"
 def create_model_card(model_name, quantization_type, group_size):
-    model_card = f"""---
 base_model:
-- {model_name}
----
 # {model_name} (Quantized)
 ## Description
-This model is a quantized version of the original model `{model_name}`. It has been quantized using {quantization_type} quantization with torchao.
 ## Quantization Details
 - **Quantization Type**: {quantization_type}
-- **Group Size**: {group_size if quantization_type == "int4_weight_only" else None}
-## Usage
-You can use this model in your applications by loading it directly from the Hugging Face Hub:
-```python
-from transformers import AutoModel
-model = AutoModel.from_pretrained("{model_name}")"""
     return model_card
-def load_model(model_name, quantization_config, auth_token) :
-    return AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
-def load_model_cpu(model_name, quantization_config, auth_token) :
-    return AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, quantization_config=quantization_config, use_auth_token=auth_token.token)
-def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None):
     print(f"Quantizing model: {quantization_type}")
-    if quantization_type == "int4_weight_only" :
         quantization_config = TorchAoConfig(quantization_type, group_size=group_size)
-    else :
         quantization_config = TorchAoConfig(quantization_type)
-    model = load_model(model_name, quantization_config=quantization_config, auth_token=auth_token)
     return model
-def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
-        model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
-        if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
-        else :
-            if quantization_type == "int4_weight_only" :
-                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
-            else :
-                repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}"
-        model_card = create_model_card(repo_name, quantization_type, group_size)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
         # Push to Hub
@@ -106,130 +229,301 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
             repo_id=repo_name,
             repo_type="model",
         )
-    return f'<h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a>'
-def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quantization_type, group_size, quantized_model_name):
-    if oauth_token is None :
-        return "Error : Please Sign In to your HuggingFace account to use the quantizer"
     if not profile:
-        return "Error: Please Sign In to your HuggingFace account to use the quantizer"
-    exists_message = check_model_exists(oauth_token, profile.username, quantization_type, group_size, model_name, quantized_model_name)
-    if exists_message :
-        return exists_message
-    if quantization_type == "int4_weight_only" :
-        return "int4_weight_only not supported on cpu"
-    if not group_size.isdigit() :
-        return "group_size must be a number"
-    group_size = int(group_size)
     try:
-        quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username)
-        return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
-    except Exception as e :
-        return e
-css="""/* Custom CSS to allow scrolling */
 .gradio-container {overflow-y: auto;}
 """
-with gr.Blocks(theme=gr.themes.Ocean(), css=css) as app:
     gr.Markdown(
         """
-        # 🤗 LLM Model TorchAO Quantization App
         Quantize your favorite Hugging Face models using TorchAO and save them to your profile!
         """
     )
     gr.LoginButton(elem_id="login-button", elem_classes="center-button", min_width=250)
     m1 = gr.Markdown()
-    app.load(hello, inputs=None, outputs=m1)
-    radio = gr.Radio(["show", "hide"], label="Show Instructions", value="hide")
-    instructions = gr.Markdown(
-        """
-        ## Instructions
-        1. Login to your HuggingFace account
-        2. Enter the name of the Hugging Face LLM model you want to quantize (Make sure you have access to it)
-        3. Choose the quantization type.
-        4. Optionally, specify the group size.
-        5. Optionally, choose a custom name for the quantized model
-        6. Click "Quantize and Save Model" to start the process.
-        7. Once complete, you'll receive a link to the quantized model on Hugging Face.
-        Note: This process may take some time depending on the model size and your hardware you can check the container logs to see where are you at in the process!
-        """,
-        visible=False
-    )
-    def update_visibility(radio):
-        value = radio
-        if value == "show":
-            return gr.Textbox(visible=True)
-        else:
-            return gr.Textbox(visible=False)
-    radio.change(update_visibility, radio, instructions)
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 model_name = HuggingfaceHubSearch(
-                    label="Hub Model ID",
                     placeholder="Search for model id on Huggingface",
                     search_type="model",
-                    scale=2
                 )
             with gr.Row():
                 with gr.Column():
                     quantization_type = gr.Dropdown(
-                        info="Quantization Type",
-                        choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
                         value="int8_weight_only",
                         filterable=False,
                         show_label=False,
                     )
                     group_size = gr.Textbox(
-                        info="Group Size (only for int4_weight_only)",
-                        value=128,
                         interactive=True,
-                        show_label=False
                     )
                     quantized_model_name = gr.Textbox(
-                        info="Model Name (optional : to override default)",
                         value="",
                         interactive=True,
-                        show_label=False
                     )
         with gr.Column():
-            quantize_button = gr.Button("Quantize and Save Model", variant="primary")
-            output_link = gr.Markdown(label="Quantized Model Link", container=True, min_height=40)
-    # Adding CSS styles for the username box
-    app.css = """
-    #username-box {
-        background-color: #f0f8ff; /* Light color */
-        border-radius: 8px;
-        padding: 10px;
-    }
-    """
-    app.css = """
-    .center-button {
-        display: flex;
-        justify-content: center;
-        align-items: center;
-        margin: 0 auto; /* Center horizontally */
-    }
-    """
     quantize_button.click(
         fn=quantize_and_save,
         inputs=[model_name, quantization_type, group_size, quantized_model_name],
-        outputs=[output_link]
     )
 # Launch the app
-app.launch()

 import torch
 from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer, AutoModel
 import tempfile
+from huggingface_hub import HfApi, snapshot_download
 from huggingface_hub import list_models
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from packaging import version
 import os
+from torchao.quantization import (
+    Int4WeightOnlyConfig,
+    Int8WeightOnlyConfig,
+    Int8DynamicActivationInt8WeightConfig,
+    Float8WeightOnlyConfig,
+)
 MAP_QUANT_TYPE_TO_NAME = {
+    "int4_weight_only": "int4wo",
+    "int8_weight_only": "int8wo",
+    "int8_dynamic_activation_int8_weight": "int8da8w",
+    "autoquant": "autoquant",
 }
+MAP_QUANT_TYPE_TO_CONFIG = {
+    "int4_weight_only": Int4WeightOnlyConfig,
+    "int8_weight_only": Int8WeightOnlyConfig,
+    "int8_dynamic_activation_int8_weight": Int8DynamicActivationInt8WeightConfig,
+    "float8_weight_only": Float8WeightOnlyConfig,
+}
 def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
     # ^ expect a gr.OAuthProfile object as input to get the user's profile
         return "Hello !"
     return f"Hello {profile.name} !"
+def check_model_exists(
+    oauth_token: gr.OAuthToken | None,
+    username,
+    quantization_type,
+    group_size,
+    model_name,
+    quantized_model_name,
+):
     """Check if a model exists in the user's Hugging Face repository."""
     try:
         models = list_models(author=username, token=oauth_token.token)
         model_names = [model.id for model in models]
+        if quantized_model_name:
             repo_name = f"{username}/{quantized_model_name}"
+        else:
+            if (
+                quantization_type == "int4_weight_only"
+                or quantization_type == "int8_weight_only"
+            ) and (group_size is not None):
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
+            else:
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}"
         if repo_name in model_names:
             return f"Model '{repo_name}' already exists in your repository."
         else:
     except Exception as e:
         return f"Error checking model existence: {str(e)}"
 def create_model_card(model_name, quantization_type, group_size):
+    # Try to download the original README
+    original_readme = ""
+    original_yaml_header = ""
+    try:
+        # Download the README.md file from the original model
+        model_path = snapshot_download(
+            repo_id=model_name, allow_patterns=["README.md"], repo_type="model"
+        )
+        readme_path = os.path.join(model_path, "README.md")
+        if os.path.exists(readme_path):
+            with open(readme_path, "r", encoding="utf-8") as f:
+                content = f.read()
+                if content.startswith("---"):
+                    parts = content.split("---", 2)
+                    if len(parts) >= 3:
+                        original_yaml_header = parts[1]
+                        original_readme = "---".join(parts[2:])
+                    else:
+                        original_readme = content
+                else:
+                    original_readme = content
+    except Exception as e:
+        print(f"Error reading original README: {str(e)}")
+        original_readme = ""
+    # Create new YAML header with base_model field
+    yaml_header = f"""---
 base_model:
+- {model_name}"""
+    # Add any original YAML fields except base_model
+    if original_yaml_header:
+        in_base_model_section = False
+        found_tags = False
+        for line in original_yaml_header.strip().split("\n"):
+            # Skip if we're in a base_model section that continues to the next line
+            if in_base_model_section:
+                if (
+                    line.strip().startswith("-")
+                    or not line.strip()
+                    or line.startswith(" ")
+                ):
+                    continue
+                else:
+                    in_base_model_section = False
+            # Check for base_model field
+            if line.strip().startswith("base_model:"):
+                in_base_model_section = True
+                # If base_model has inline value (like "base_model: model_name")
+                if ":" in line and len(line.split(":", 1)[1].strip()) > 0:
+                    in_base_model_section = False
+                continue
+            # Check for tags field and add bnb-my-repo
+            if line.strip().startswith("tags:"):
+                found_tags = True
+                yaml_header += f"\n{line}"
+                yaml_header += "\n- torchao-my-repo"
+                continue
+            yaml_header += f"\n{line}"
+        # If tags field wasn't found, add it
+        if not found_tags:
+            yaml_header += "\ntags:"
+            yaml_header += "\n- torchao-my-repo"
+    # Complete the YAML header
+    yaml_header += "\n---"
+    # Create the quantization info section
+    quant_info = f"""
 # {model_name} (Quantized)
 ## Description
+This model is a quantized version of the original model [`{model_name}`](https://huggingface.co/{model_name}).
+It's quantized using the TorchAO library using the [torchao-my-repo](https://huggingface.co/spaces/pytorch/torchao-my-repo) space.
 ## Quantization Details
 - **Quantization Type**: {quantization_type}
+- **Group Size**: {group_size}
+"""
+    # Combine everything
+    model_card = yaml_header + quant_info
+    # Append original README content if available
+    if original_readme and not original_readme.isspace():
+        model_card += "\n\n# 📄 Original Model Information\n\n" + original_readme
     return model_card
+def quantize_model(
+    model_name, quantization_type, group_size=128, auth_token=None, username=None
+):
     print(f"Quantizing model: {quantization_type}")
+    if (
+        quantization_type == "int4_weight_only"
+        or quantization_type == "int8_weight_only"
+    ):
         quantization_config = TorchAoConfig(quantization_type, group_size=group_size)
+    else:
         quantization_config = TorchAoConfig(quantization_type)
+    model = AutoModel.from_pretrained(
+        model_name,
+        torch_dtype="auto",
+        quantization_config=quantization_config,
+        device_map="cpu",
+        use_auth_token=auth_token.token,
+    )
     return model
+def save_model(
+    model,
+    model_name,
+    quantization_type,
+    group_size=128,
+    username=None,
+    auth_token=None,
+    quantized_model_name=None,
+):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
+        # Load and save the tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name, use_auth_token=auth_token.token
+        )
+        tokenizer.save_pretrained(tmpdirname, use_auth_token=auth_token.token)
+        # Save the model
+        model.save_pretrained(
+            tmpdirname, safe_serialization=False, use_auth_token=auth_token.token
+        )
+        if quantized_model_name:
             repo_name = f"{username}/{quantized_model_name}"
+        else:
+            if (
+                quantization_type == "int4_weight_only"
+                or quantization_type == "int8_weight_only"
+            ) and (group_size is not None):
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
+            else:
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}"
+        model_card = create_model_card(model_name, quantization_type, group_size)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
         # Push to Hub
             repo_id=repo_name,
             repo_type="model",
         )
+    import io
+    from contextlib import redirect_stdout
+    import html
+    # Capture the model architecture string
+    f = io.StringIO()
+    with redirect_stdout(f):
+        print(model)
+    model_architecture_str = f.getvalue()
+    # Escape HTML characters and format with line breaks
+    model_architecture_str_html = html.escape(model_architecture_str).replace(
+        "\n", "<br/>"
+    )
+    # Format it for display in markdown with proper styling
+    model_architecture_info = f"""
+    <div class="model-architecture-container" style="margin-top: 20px; margin-bottom: 20px; background-color: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #4CAF50;">
+        <h3 style="margin-top: 0; color: #2E7D32;">📋 Model Architecture</h3>
+        <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
+        <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
+        </div>
+    </div>
+    """
+    repo_link = f"""
+    <div class="repo-link" style="margin-top: 20px; margin-bottom: 20px; background-color: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #4CAF50;">
+        <h3 style="margin-top: 0; color: #2E7D32;">🔗 Repository Link</h3>
+        <p>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a></p>
+    </div>
+    """
+    return (
+        f"<h1>🎉 Quantization Completed</h1><br/>{repo_link}{model_architecture_info}"
+    )
+def quantize_and_save(
+    profile: gr.OAuthProfile | None,
+    oauth_token: gr.OAuthToken | None,
+    model_name,
+    quantization_type,
+    group_size,
+    quantized_model_name,
+):
+    if oauth_token is None:
+        return """
+        <div class="error-box">
+            <h3>❌ Authentication Error</h3>
+            <p>Please sign in to your HuggingFace account to use the quantizer.</p>
+        </div>
+        """
     if not profile:
+        return """
+        <div class="error-box">
+            <h3>❌ Authentication Error</h3>
+            <p>Please sign in to your HuggingFace account to use the quantizer.</p>
+        </div>
+        """
+    if not group_size.isdigit():
+        if group_size != "":
+            return """
+            <div class="error-box">
+                <h3>❌ Group Size Error</h3>
+                <p>Group Size is a number for int4_weight_only and int8_weight_only or empty for int8_weight_only</p>
+            </div>
+            """
+    if group_size and group_size.strip():
+        group_size = int(group_size)
+    else:
+        group_size = None
+    exists_message = check_model_exists(
+        oauth_token,
+        profile.username,
+        quantization_type,
+        group_size,
+        model_name,
+        quantized_model_name,
+    )
+    if exists_message:
+        return f"""
+        <div class="warning-box">
+            <h3>⚠️ Model Already Exists</h3>
+            <p>{exists_message}</p>
+        </div>
+        """
+    # if quantization_type == "int4_weight_only" :
+    #     return "int4_weight_only not supported on cpu"
     try:
+        quantized_model = quantize_model(
+            model_name, quantization_type, group_size, oauth_token, profile.username
+        )
+        return save_model(
+            quantized_model,
+            model_name,
+            quantization_type,
+            group_size,
+            profile.username,
+            oauth_token,
+            quantized_model_name,
+        )
+    except Exception as e:
+        return str(e)
+def get_model_size(model):
+    """
+    Calculate the size of a PyTorch model in gigabytes.
+    Args:
+        model: PyTorch model
+    Returns:
+        float: Size of the model in GB
+    """
+    # Get model state dict
+    state_dict = model.state_dict()
+    # Calculate total size in bytes
+    total_size = 0
+    for param in state_dict.values():
+        # Calculate bytes for each parameter
+        total_size += param.nelement() * param.element_size()
+    # Convert bytes to gigabytes (1 GB = 1,073,741,824 bytes)
+    size_gb = total_size / (1024**3)
+    size_gb = round(size_gb, 2)
+    return size_gb
+# Add enhanced CSS styling
+css = """
+/* Custom CSS for enhanced UI */
 .gradio-container {overflow-y: auto;}
+/* Fix alignment for radio buttons and dropdowns */
+.gradio-radio, .gradio-dropdown {
+    display: flex !important;
+    align-items: center !important;
+    margin: 10px 0 !important;
+}
+/* Consistent spacing and alignment */
+.gradio-dropdown, .gradio-textbox, .gradio-radio {
+    margin-bottom: 12px !important;
+    width: 100% !important;
+}
+/* Quantize button styling with glow effect */
+button[variant="primary"] {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
+    color: white !important;
+    padding: 16px 32px !important;
+    font-size: 1.1rem !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 12px !important;
+    box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
+    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
+    position: relative;
+    overflow: hidden;
+    animation: glow 1.5s ease-in-out infinite alternate;
+}
+button[variant="primary"]::before {
+    content: "✨ ";
+}
+button[variant="primary"]:hover {
+    transform: translateY(-5px) scale(1.05) !important;
+    box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
+}
+@keyframes glow {
+    from {
+        box-shadow: 0 0 10px rgba(59, 130, 246, 0.5);
+    }
+    to {
+        box-shadow: 0 0 20px rgba(59, 130, 246, 0.8), 0 0 30px rgba(16, 185, 129, 0.5);
+    }
+}
+/* Login button styling */
+#login-button {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
+    color: white !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 12px !important;
+    box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
+    transition: all 0.3s ease !important;
+    max-width: 300px !important;
+    margin: 0 auto !important;
+}
 """
+# Update the main app layout
+with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
     gr.Markdown(
         """
+        # 🤗 TorchAO Model Quantizer ✨
         Quantize your favorite Hugging Face models using TorchAO and save them to your profile!
+        <br/>
         """
     )
     gr.LoginButton(elem_id="login-button", elem_classes="center-button", min_width=250)
     m1 = gr.Markdown()
+    demo.load(hello, inputs=None, outputs=m1)
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 model_name = HuggingfaceHubSearch(
+                    label="🔍 Hub Model ID",
                     placeholder="Search for model id on Huggingface",
                     search_type="model",
                 )
+            gr.Markdown("""### ⚙️ Quantization Settings""")
             with gr.Row():
                 with gr.Column():
                     quantization_type = gr.Dropdown(
+                        info="Select the Quantization method",
+                        choices=[
+                            "int4_weight_only",
+                            "int8_weight_only",
+                            "int8_dynamic_activation_int8_weight",
+                            "autoquant",
+                        ],
                         value="int8_weight_only",
                         filterable=False,
                         show_label=False,
                     )
                     group_size = gr.Textbox(
+                        info="Group Size (only for int4_weight_only and int8_weight_only)",
+                        value="128",
                         interactive=True,
+                        show_label=False,
                     )
                     quantized_model_name = gr.Textbox(
+                        info="Custom name for your quantized model (optional)",
                         value="",
                         interactive=True,
+                        show_label=False,
                     )
         with gr.Column():
+            quantize_button = gr.Button(
+                "🚀 Quantize and Push to Hub", variant="primary"
+            )
+            output_link = gr.Markdown(
+                label="🔗 Quantized Model Info", container=True, min_height=200
+            )
+    # Add information section
+    with gr.Accordion("📚 About TorchAO Quantization", open=True):
+        gr.Markdown(
+            """
+            ## 📝 Quantization Options
+            ### Quantization Types
+            - **int4_weight_only**: 4-bit weight-only quantization
+            - **int8_weight_only**: 8-bit weight-only quantization
+            - **int8_dynamic_activation_int8_weight**: 8-bit quantization for both weights and activations
+            ### Group Size
+            - Only applicable for int4_weight_only and int8_weight_only quantization
+            - Default value is 128
+            - Affects the granularity of quantization
+            ## 🔍 How It Works
+            1. Downloads the original model
+            2. Applies TorchAO quantization with your selected settings
+            3. Uploads the quantized model to your HuggingFace account
+            ## 📊 Memory Benefits
+            - int4_weight_only can reduce model size by up to 75%
+            - int8_weight_only typically reduces size by about 50%
+            """
+        )
+    # Keep existing click handler
     quantize_button.click(
         fn=quantize_and_save,
         inputs=[model_name, quantization_type, group_size, quantized_model_name],
+        outputs=[output_link],
     )
 # Launch the app
+demo.launch(share=True)

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-git+https://github.com/huggingface/transformers.git@main#egg=transformers
 accelerate
 torchao
 huggingface-hub

+transformers
 accelerate
 torchao
 huggingface-hub