Spaces:

optimum
/

neuron-export

Paused

App Files Files Community

badaoui HF Staff commited on 19 days ago

Commit

900a193

verified ·

1 Parent(s): 23e1414

Update optimum_neuron_export.py

Browse files

Files changed (1) hide show

optimum_neuron_export.py +634 -119

optimum_neuron_export.py CHANGED Viewed

@@ -1,32 +1,113 @@
 import os
 import shutil
-from tempfile import TemporaryDirectory
-from typing import List, Optional, Tuple, Dict, Any
 from huggingface_hub import (
     CommitOperationAdd,
     HfApi,
     ModelCard,
     Discussion,
     CommitInfo,
 )
 from huggingface_hub.file_download import repo_folder_name
-from optimum.exporters.neuron import main_export
 from optimum.exporters.tasks import TasksManager
 SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
-def get_default_compiler_kwargs() -> Dict[str, Any]:
-    """
-    Get default compiler kwargs for neuron export.
-    Based on infer_compiler_kwargs function from the original code.
-    """
-    return {
-        "auto_cast": None,  # Default to None (equivalent to "none")
-        "auto_cast_type": None,
-        # Add other compiler-specific kwargs if needed
-        # "disable_fast_relayout": False,  # Only available for certain compilers
-        # "disable_fallback": False,       # Only available for certain compilers
-    }
 def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
     try:
@@ -42,144 +123,578 @@ def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discuss
             return discussion
     return None
-def export_and_git_add(model_id: str, task: str, folder: str, token: str) -> List:
-    # Get default compiler kwargs
-    compiler_kwargs = get_default_compiler_kwargs()
-    # Infer task if it's "auto"
-    if task == "auto":
-        try:
-            task = TasksManager.infer_task_from_model(model_id)
-        except Exception as e:
-            raise Exception(f"Could not infer task for model {model_id}: {e}")
-    print(f"Exporting model {model_id} with task: {task}")
     try:
-        # Call main_export with all required parameters
-        main_export(
-            model_name_or_path=model_id,
-            output=folder,
-            compiler_kwargs=compiler_kwargs,
-            task=task,
             token=token,
-            # Add other commonly needed parameters with sensible defaults
-            torch_dtype=None,  # Let it use model's default
-            dynamic_batch_size=False,
-            do_validation=False,  # Disable validation for now to avoid issues
-            trust_remote_code=False,
-            force_download=False,
-            local_files_only=False,
-            # Default optimization level (O2 is the default from original code)
-            optlevel="2",
-            # Other defaults
-            tensor_parallel_size=1,
-            disable_neuron_cache=False,
-            inline_weights_to_neff=True,
-            output_attentions=False,
-            output_hidden_states=False,
-            # Add input shapes for common models
-            batch_size=1,
-            sequence_length=128,
         )
-        print(f"Export completed successfully")
     except Exception as e:
-        print(f"Export failed with error: {e}")
         raise
-    operations = [
-        CommitOperationAdd(
-            path_in_repo=os.path.join("neuron", file_name),
-            path_or_fileobj=os.path.join(folder, file_name),
-        )
-        for file_name in os.listdir(folder)
-        if os.path.isfile(os.path.join(folder, file_name))  # Only add files, not directories
-    ]
     try:
         card = ModelCard.load(model_id, token=token)
-        if card.data.tags is None:
             card.data.tags = []
         if "neuron" not in card.data.tags:
             card.data.tags.append("neuron")
-        card.save(os.path.join(folder, "README.md"))
-        operations.append(
-            CommitOperationAdd(
-                path_in_repo="README.md",
-                path_or_fileobj=os.path.join(folder, "README.md")
             )
         )
     except Exception as e:
-        print(f"Warning: Could not update model card: {e}")
-        pass
-    return operations
 def convert(
     api: "HfApi",
     model_id: str,
-    task: str,
     force: bool = False,
     token: str = None,
-) -> Tuple[str, "CommitInfo"]:
-    pr_title = "Adding Neuron-optimized model files"
     info = api.model_info(model_id, token=token)
-    filenames = set(s.rfilename for s in info.siblings)
     requesting_user = api.whoami(token=token)["name"]
-    with TemporaryDirectory() as d:
-        folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
-        os.makedirs(folder, exist_ok=True)
-        new_pr = None
         try:
-            pr = previous_pr(api, model_id, pr_title)
-            if any(fname.startswith("neuron/") for fname in filenames) and not force:
-                raise Exception(
-                    f"Model {model_id} already has Neuron files, skipping export."
-                )
-            elif pr is not None and not force:
-                url = f"https://huggingface.co/{model_id}/discussions/{pr.num}"
-                new_pr = pr
-                raise Exception(
-                    f"Model {model_id} already has an open PR: [{url}]({url})"
                 )
-            else:
-                operations = export_and_git_add(model_id, task, folder, token=token)
-                if not operations:
-                    raise Exception("No files were generated during export")
-                commit_description = f"""
-🤖 Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files.
-Neuron-optimized models can achieve high-performance inference on AWS Inferentia and Trainium chips. Learn more:
-- [AWS Neuron Documentation](https://awsdocs-neuron.readthedocs-hosted.com)
-- [🤗 Optimum Neuron Guide](https://huggingface.co/docs/optimum-neuron/index)
-"""
-                new_pr = api.create_commit(
-                    repo_id=model_id,
-                    operations=operations,
-                    commit_message=pr_title,
-                    commit_description=commit_description,
-                    create_pr=True,
-                    token=token,
                 )
-        except Exception as e:
-            # Clean up folder before re-raising
-            if os.path.exists(folder):
-                shutil.rmtree(folder, ignore_errors=True)
-            print(f"Conversion failed with error: {e}")  # Print the actual error
-            return "1", str(e)  # Return error code and message
-        finally:
-            # Ensure cleanup
-            if os.path.exists(folder):
-                shutil.rmtree(folder, ignore_errors=True)
-    return "0", new_pr

 import os
 import shutil
+from tempfile import TemporaryDirectory, NamedTemporaryFile
+from typing import List, Union, Optional, Tuple, Dict, Any, Generator
+from pathlib import Path
 from huggingface_hub import (
     CommitOperationAdd,
     HfApi,
     ModelCard,
     Discussion,
     CommitInfo,
+    create_repo,
+    RepoUrl,
 )
 from huggingface_hub.file_download import repo_folder_name
 from optimum.exporters.tasks import TasksManager
+from optimum.exporters.neuron.model_configs import *
+from optimum.neuron import (
+    NeuronModelForFeatureExtraction,
+    NeuronModelForSentenceTransformers,
+    NeuronModelForMaskedLM,
+    NeuronModelForQuestionAnswering,
+    NeuronModelForSequenceClassification,
+    NeuronModelForTokenClassification,
+    NeuronModelForMultipleChoice,
+    NeuronModelForImageClassification,
+    NeuronModelForSemanticSegmentation,
+    NeuronModelForObjectDetection,
+    NeuronModelForAudioClassification,
+    NeuronModelForAudioFrameClassification,
+    NeuronModelForCTC,
+    NeuronModelForXVector,
+    NeuronModelForCausalLM,
+    NeuronModelForConditionalGeneration,
+)
+from optimum.neuron import (
+    NeuronDiffusionPipelineBase,
+    NeuronStableDiffusionPipeline,
+    NeuronStableDiffusionImg2ImgPipeline,
+    NeuronStableDiffusionInpaintPipeline,
+    NeuronStableDiffusionInstructPix2PixPipeline,
+    NeuronLatentConsistencyModelPipeline,
+    NeuronStableDiffusionXLPipeline,
+    NeuronStableDiffusionXLImg2ImgPipeline,
+    NeuronStableDiffusionXLInpaintPipeline,
+    NeuronStableDiffusionControlNetPipeline,
+    NeuronStableDiffusionXLControlNetPipeline,
+    NeuronPixArtAlphaPipeline,
+    NeuronPixArtSigmaPipeline,
+    NeuronFluxPipeline
+)
+from optimum.neuron.cache.entries.cache_entry import ModelCacheEntry
 SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
+CACHE_REPO_ID = "badaoui/optimum-neuron_compile-cache"
+# Task to NeuronModel mapping for transformers
+TASK_TO_MODEL_CLASS = {
+    "feature-extraction": NeuronModelForFeatureExtraction,
+    "sentence-transformers": NeuronModelForSentenceTransformers,
+    "fill-mask": NeuronModelForMaskedLM,
+    "question-answering": NeuronModelForQuestionAnswering,
+    "text-classification": NeuronModelForSequenceClassification,
+    "token-classification": NeuronModelForTokenClassification,
+    "multiple-choice": NeuronModelForMultipleChoice,
+    "image-classification": NeuronModelForImageClassification,
+    "semantic-segmentation": NeuronModelForSemanticSegmentation,
+    "object-detection": NeuronModelForObjectDetection,
+    "audio-classification": NeuronModelForAudioClassification,
+    "audio-frame-classification": NeuronModelForAudioFrameClassification,
+    "automatic-speech-recognition": NeuronModelForCTC,
+    "audio-xvector": NeuronModelForXVector,
+    "text-generation": NeuronModelForCausalLM,
+    "text2text-generation": NeuronModelForSeq2SeqLM,
+}
+# Diffusion pipeline mapping
+DIFFUSION_PIPELINE_MAPPING = {
+    "text-to-image": NeuronStableDiffusionPipeline,
+    "image-to-image": NeuronStableDiffusionImg2ImgPipeline,
+    "inpaint": NeuronStableDiffusionInpaintPipeline,
+    "instruct-pix2pix": NeuronStableDiffusionInstructPix2PixPipeline,
+    "latent-consistency": NeuronLatentConsistencyModelPipeline,
+    "stable_diffusion": NeuronStableDiffusionPipeline,
+    "stable-diffusion-xl": NeuronStableDiffusionXLPipeline,
+    "stable-diffusion-xl-img2img": NeuronStableDiffusionXLImg2ImgPipeline,
+    "stable-diffusion-xl-inpaint": NeuronStableDiffusionXLInpaintPipeline,
+    "controlnet": NeuronStableDiffusionControlNetPipeline,
+    "controlnet-xl": NeuronStableDiffusionXLControlNetPipeline,
+    "pixart-alpha": NeuronPixArtAlphaPipeline,
+    "pixart-sigma": NeuronPixArtSigmaPipeline,
+    "flux": NeuronFluxPipeline,
+}
+def get_default_input_shapes(task_or_pipeline: str) -> Dict[str, int]:
+    """Get default input shapes based on task type or diffusion pipeline type."""
+    if task_or_pipeline in ["feature-extraction", "sentence-transformers", "fill-mask", "question-answering", "text-classification", "token-classification","text-generation","text2text-generation"]:
+        return {"batch_size": 1, "sequence_length": 128}
+    elif task_or_pipeline == "multiple-choice":
+        return {"batch_size": 1, "num_choices": 4, "sequence_length": 128}
+    elif task_or_pipeline in ["image-classification", "semantic-segmentation", "object-detection"]:
+        return {"batch_size": 1, "num_channels": 3, "height": 224, "width": 224}
+    elif task_or_pipeline in ["audio-classification", "audio-frame-classification", "automatic-speech-recognition", "audio-xvector"]:
+        return {"batch_size": 1, "audio_sequence_length": 16000}
+    elif task_or_pipeline in DIFFUSION_PIPELINE_MAPPING:
+        return {"batch_size": 1, "height": 1024, "width": 1024, "num_images_per_prompt": 1}
+    else:
+        # Default to text-based shapes
+        return {"batch_size": 1, "sequence_length": 128}
 def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
     try:
             return discussion
     return None
+def get_local_cache_structure(local_cache_base: str = "/var/tmp/neuron-compile-cache") -> Dict[str, List[str]]:
+    """
+    Get the structure of the local Neuron cache to preserve it in the hub.
+    Returns a dict mapping neuronxcc folders to their MODULE folders.
+    """
+    cache_structure = {}
+    if not os.path.exists(local_cache_base):
+        return cache_structure
+    try:
+        for item in os.listdir(local_cache_base):
+            item_path = os.path.join(local_cache_base, item)
+            if os.path.isdir(item_path) and item.startswith('neuronxcc-'):
+                modules = []
+                for subitem in os.listdir(item_path):
+                    subitem_path = os.path.join(item_path, subitem)
+                    if os.path.isdir(subitem_path) and subitem.startswith('MODULE_'):
+                        modules.append(subitem)
+                if modules:
+                    cache_structure[item] = modules
+    except Exception as e:
+        print(f"Warning: Could not read local cache structure: {e}")
+    return cache_structure
+def upload_cache_files(cache_dir: str, cache_repo_id: str, token: str) -> Generator[Union[str, CommitInfo], None, None]:
+    """
+    Upload cache files to the cache repository and create PR.
+    This is a generator function.
+    """
     try:
+        api = HfApi(token=token)
+        # Create cache operations
+        cache_operations = []
+        for root, _, files in os.walk(cache_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                rel_path = os.path.relpath(file_path, cache_dir)
+                cache_operations.append(
+                    CommitOperationAdd(
+                        path_in_repo=rel_path,
+                        path_or_fileobj=file_path,
+                    )
+                )
+        yield f"📤 Found {len(cache_operations)} cache files to upload."
+        if cache_operations:
+            # Create PR in cache repository
+            cache_pr_title = f"Add Neuron cache for {os.path.basename(cache_dir)}"
+            cache_commit_description = """
+🤖 Neuron Cache Bot: Adding compiled Neuron cache artifacts.
+This PR contains the compiled neuronxcc cache files that can be used to speed up model loading for AWS Neuron devices.
+"""
+            cache_pr = api.create_commit(
+                repo_id=cache_repo_id,
+                operations=cache_operations,
+                commit_message=cache_pr_title,
+                commit_description=cache_commit_description,
+                create_pr=True,
+                token=token,
+            )
+            yield f"✅ Cache PR created successfully: https://huggingface.co/{cache_repo_id}/discussions/{cache_pr.pr_num}"
+            # Yield the final PR object so the caller can use it
+            yield cache_pr
+        else:
+            yield "⚠️ No cache files found to upload."
+            yield None
+    except Exception as e:
+        yield f"❌ Cache upload failed: {e}"
+        raise
+def export_and_git_add(model_id: str, task_or_pipeline: str, model_type: str, folder: str, token: str) -> Any:
+    if task_or_pipeline == "auto":
+        try:
+            task_or_pipeline = TasksManager.infer_task_from_model(model_id)
+        except Exception as e:
+            raise Exception(f"❌ Could not infer task for model {model_id}: {e}")
+    yield f"📦 Exporting model `{model_id}` for task `{task_or_pipeline}`..."
+    model_class = TASK_TO_MODEL_CLASS.get(task_or_pipeline) if model_type == "transformers" else DIFFUSION_PIPELINE_MAPPING.get(task_or_pipeline)
+    if model_class is None:
+        supported = list(TASK_TO_MODEL_CLASS.keys()) if model_type == "transformers" else list(DIFFUSION_PIPELINE_MAPPING.keys())
+        raise Exception(f"❌ Unsupported task/pipeline: {task_or_pipeline}. Supported: {supported}")
+    input_shapes = get_default_input_shapes(task_or_pipeline)
+    yield f"🔧 Using input shapes: {input_shapes}"
+    try:
+        model = model_class.from_pretrained(
+            model_id,
+            torch_dtype=torch.bfloat16,
+            export=True,
             token=token,
+            tensor_parallel_size=4,
+            **input_shapes,
         )
+        model.save_pretrained(folder)
+        yield "✅ Export completed successfully."
     except Exception as e:
+        yield f"❌ Export failed with error: {e}"
         raise
+    operations = []
+    for root, _, files in os.walk(folder):
+        for filename in files:
+            file_path = os.path.join(root, filename)
+            repo_path = os.path.relpath(file_path, folder)
+            operations.append(CommitOperationAdd(path_in_repo=repo_path, path_or_fileobj=file_path))
+    yield f"📁 Found {len(operations)} files to upload"
     try:
         card = ModelCard.load(model_id, token=token)
+        if not hasattr(card.data, "tags") or card.data.tags is None:
             card.data.tags = []
         if "neuron" not in card.data.tags:
             card.data.tags.append("neuron")
+        readme_path = os.path.join(folder, "README.md")
+        card.save(readme_path)
+        # Check if README.md is already in operations, if so update, else add
+        readme_op = next((op for op in operations if op.path_in_repo == "README.md"), None)
+        if readme_op:
+            readme_op.path_or_fileobj = readme_path
+        else:
+            operations.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=readme_path))
+    except Exception as e:
+        yield f"⚠️ Warning: Could not update model card: {e}"
+    yield ("__RETURN__", operations)
+def generate_neuron_repo_name(api, original_model_id: str, task_or_pipeline: str, token:str) -> str:
+    """Generate a name for the Neuron-optimized repository."""
+    # Replace '©' with '-' and add neuron suffix
+    requesting_user = api.whoami(token=token)["name"]
+    base_name = original_model_id.replace('/', '-')
+    return f"{requesting_user}/{base_name}-neuron"
+def create_neuron_repo_and_upload(
+    operations: List[CommitOperationAdd],
+    original_model_id: str,
+    model_type: str,
+    task_or_pipeline: str,
+    requesting_user: str,
+    token: str,
+) -> Generator[Union[str, RepoUrl], None, None]:
+    """
+    Creates a new repository with Neuron files and uploads them.
+    """
+    api = HfApi(token=token)
+    if task_or_pipeline == "auto":
+        try:
+            task_or_pipeline = TasksManager.infer_task_from_model(original_model_id)
+        except Exception as e:
+            raise Exception(f"❌ Could not infer task for model {original_model_id}: {e}")
+    # Generate repository name
+    neuron_repo_name = generate_neuron_repo_name(api, original_model_id, task_or_pipeline, token)
+    yield f"🏗️ Creating new repository: {neuron_repo_name}"
+    try:
+        # Create the repository
+        repo_url = create_repo(
+            repo_id=neuron_repo_name,
+            token=token,
+            repo_type="model",
+            private=False,
+            exist_ok=True,
+        )
+        yield f"✅ Repository created: {repo_url}"
+        # Get the appropriate class name for the Python example
+        if model_type == "transformers":
+            model_class = TASK_TO_MODEL_CLASS.get(task_or_pipeline)
+        else:
+            model_class = DIFFUSION_PIPELINE_MAPPING.get(task_or_pipeline)
+        model_class_name = model_class.__name__ if model_class else "NeuronModel"
+        # Create enhanced model card for the Neuron repo
+        neuron_readme_content = f"""---
+tags:
+- neuron
+- optimized
+- aws-neuron
+- {task_or_pipeline}
+base_model: {original_model_id}
+---
+# Neuron-Optimized {original_model_id}
+This repository contains AWS Neuron-optimized files for [{original_model_id}](https://huggingface.co/{original_model_id}).
+## Model Details
+- **Base Model**: [{original_model_id}](https://huggingface.co/{original_model_id})
+- **Task**: {task_or_pipeline}
+- **Optimization**: AWS Neuron compilation
+- **Generated by**: [{requesting_user}](https://huggingface.co/{requesting_user})
+- **Generated using**: [Optimum Neuron Compiler Space]({SPACES_URL})
+## Usage
+This model has been optimized for AWS Neuron devices (Inferentia/Trainium). To use it:
+```python
+from optimum.neuron import {model_class_name}
+model = {model_class_name}.from_pretrained("{neuron_repo_name}")
+```
+## Performance
+These files are pre-compiled for AWS Neuron devices and should provide improved inference performance compared to the original model when deployed on Inferentia or Trainium instances.
+## Original Model
+For the original model, training details, and more information, please visit: [{original_model_id}](https://huggingface.co/{original_model_id})
+"""
+        # Update the README in operations
+        readme_op = next((op for op in operations if op.path_in_repo == "README.md"), None)
+        if readme_op:
+            # Create a temporary file with the new content
+            with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
+                f.write(neuron_readme_content)
+                readme_op.path_or_fileobj = f.name
+        else:
+            # Add new README operation
+            with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
+                f.write(neuron_readme_content)
+                operations.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=f.name))
+        # Upload files to the new repository
+        commit_message = f"Add Neuron-optimized files for {original_model_id}"
+        commit_description = f"""
+🤖 Neuron Export Bot: Adding AWS Neuron-optimized model files.
+Original model: [{original_model_id}](https://huggingface.co/{original_model_id})
+Task: {task_or_pipeline}
+Generated by: [{requesting_user}](https://huggingface.co/{requesting_user})
+Generated using: [Optimum Neuron Compiler Space]({SPACES_URL})
+These files have been pre-compiled for AWS Neuron devices (Inferentia/Trainium) and should provide improved inference performance.
+"""
+        yield f"📤 Uploading {len(operations)} files to {neuron_repo_name}..."
+        commit_info = api.create_commit(
+            repo_id=neuron_repo_name,
+            operations=operations,
+            commit_message=commit_message,
+            commit_description=commit_description,
+            token=token,
+        )
+        yield f"✅ Files uploaded successfully to: https://huggingface.co/{neuron_repo_name}"
+        yield repo_url
+    except Exception as e:
+        yield f"❌ Failed to create/upload to Neuron repository: {e}"
+        raise
+def create_readme_pr_for_original_model(
+    original_model_id: str,
+    neuron_repo_name: str,
+    task_or_pipeline: str,
+    requesting_user: str,
+    token: str,
+) -> Generator[Union[str, CommitInfo], None, None]:
+    """
+    Creates a PR on the original model repository to add a link to the Neuron-optimized version.
+    """
+    api = HfApi(token=token)
+    yield f"📝 Creating PR to add Neuron repo link in {original_model_id}..."
+    try:
+        # Check if there's already an open PR
+        pr_title = "Add link to Neuron-optimized version"
+        existing_pr = previous_pr(api, original_model_id, pr_title)
+        if existing_pr:
+            yield f"⚠️ PR already exists: https://huggingface.co/{original_model_id}/discussions/{existing_pr.num}"
+            return
+        # Get the current README
+        try:
+            current_readme_path = api.hf_hub_download(
+                repo_id=original_model_id,
+                filename="README.md",
+                token=token,
             )
+            with open(current_readme_path, 'r', encoding='utf-8') as f:
+                readme_content = f.read()
+        except Exception:
+            # If README doesn't exist, create a basic one
+            readme_content = f"# {original_model_id}\n\n"
+        # Add Neuron optimization section, separated by a horizontal rule
+        neuron_section = f"""
+---
+## 🚀 AWS Neuron Optimized Version Available
+A Neuron-optimized version of this model is available for improved performance on AWS Inferentia/Trainium instances:
+**[{neuron_repo_name}](https://huggingface.co/{neuron_repo_name})**
+The Neuron-optimized version provides:
+- Pre-compiled artifacts for faster loading
+- Optimized performance on AWS Neuron devices
+- Same model capabilities with improved inference speed
+"""
+        # Append the Neuron section to the end of the README
+        updated_readme = readme_content.rstrip() + "\n" + neuron_section
+        # Create temporary file with updated README
+        with NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding="utf-8") as f:
+            f.write(updated_readme)
+            temp_readme_path = f.name
+        # Create the PR
+        operations = [CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=temp_readme_path)]
+        commit_description = f"""
+🤖 Neuron Export Bot: Adding link to Neuron-optimized version.
+A Neuron-optimized version of this model has been created at [{neuron_repo_name}](https://huggingface.co/{neuron_repo_name}).
+The optimized version provides improved performance on AWS Inferentia/Trainium instances with pre-compiled artifacts.
+Generated by: [{requesting_user}](https://huggingface.co/{requesting_user})
+Generated using: [Optimum Neuron Compiler Space]({SPACES_URL})
+"""
+        pr = api.create_commit(
+            repo_id=original_model_id,
+            operations=operations,
+            commit_message=pr_title,
+            commit_description=commit_description,
+            create_pr=True,
+            token=token,
         )
+        yield f"✅ README PR created: https://huggingface.co/{original_model_id}/discussions/{pr.pr_num}"
+        yield pr
+        # Clean up temporary file
+        os.unlink(temp_readme_path)
     except Exception as e:
+        yield f"❌ Failed to create README PR: {e}"
+        raise
+# --- Updated upload_to_custom_repo function (unchanged) ---
+def upload_to_custom_repo(
+    operations: List[CommitOperationAdd],
+    custom_repo_id: str,
+    original_model_id: str,
+    requesting_user: str,
+    token: str,
+) -> Generator[Union[str, CommitInfo], None, None]:
+    """
+    Uploads neuron files to a custom repository and creates a PR.
+    """
+    yield f"📤 Preparing to upload to custom repo: {custom_repo_id}"
+    api = HfApi(token=token)
+    try:
+        # Ensure the custom repo exists
+        api.repo_info(repo_id=custom_repo_id, repo_type="model")
+    except Exception as e:
+        yield f"❌ Could not access custom repository `{custom_repo_id}`. Please ensure it exists and you have write access. Error: {e}"
+        raise
+    pr_title = f"Add Neuron-optimized files for {original_model_id}"
+    commit_description = f"""
+🤖 Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files for `{original_model_id}`.
+These files were generated using the [Optimum Neuron Compiler Space](https://huggingface.co/spaces/optimum/neuron-export).
+"""
+    try:
+        custom_pr = api.create_commit(
+            repo_id=custom_repo_id,
+            operations=operations,
+            commit_message=pr_title,
+            commit_description=commit_description,
+            create_pr=True,
+            token=token,
+        )
+        yield f"✅ Custom PR created successfully: https://huggingface.co/{custom_repo_id}/discussions/{custom_pr.pr_num}"
+        yield custom_pr
+    except Exception as e:
+        yield f"❌ Failed to create PR in custom repository: {e}"
+        raise
 def convert(
     api: "HfApi",
     model_id: str,
+    task_or_pipeline: str,
+    model_type: str = "transformers",
     force: bool = False,
     token: str = None,
+    pr_options: Dict = None,
+) -> Generator[Tuple[str, Any], None, None]:
+    if pr_options is None:
+        pr_options = {}
     info = api.model_info(model_id, token=token)
+    filenames = {s.rfilename for s in info.siblings}
     requesting_user = api.whoami(token=token)["name"]
+    if not any(pr_options.values()):
+        yield "1", "⚠️ No option selected. Please choose at least one option."
+        return
+    if pr_options.get("create_custom_pr") and not pr_options.get("custom_repo_id"):
+        yield "1", "⚠️ Custom PR selected but no repository ID was provided."
+        return
+    yield "0", f"🚀 Starting export process with options: {pr_options}..."
+    with TemporaryDirectory() as temp_dir:
+        export_folder = os.path.join(temp_dir, "export")
+        cache_mirror_dir = os.path.join(temp_dir, "cache_mirror")
+        os.makedirs(export_folder, exist_ok=True)
+        os.makedirs(cache_mirror_dir, exist_ok=True)
+        result_info = {}
         try:
+            # --- Export Logic ---
+            export_gen = export_and_git_add(model_id, task_or_pipeline, model_type, export_folder, token=token)
+            operations = None
+            for message in export_gen:
+                if isinstance(message, tuple) and message[0] == "__RETURN__":
+                    operations = message[1]
+                    break
+                else:
+                    yield "0", message
+            if not operations:
+                raise Exception("Export process did not produce any files to commit.")
+            # --- Cache Handling ---
+            cache_files_available = False
+            if pr_options.get("create_cache_pr"):
+                yield "0", "Checking for local cache files..."
+                local_cache_structure = get_local_cache_structure()
+                yield "0", f"🗂️ Found cache structure: {len(local_cache_structure)} neuronxcc folders"
+                if local_cache_structure:
+                    cache_files_available = True
+                    local_cache_base = "/var/tmp/neuron-compile-cache"
+                    # Copy cache files to a temporary mirror directory for upload
+                    shutil.copytree(local_cache_base, cache_mirror_dir, dirs_exist_ok=True)
+                    yield "0", "Copied cache files to a temporary location for upload."
+            # --- New Repository Creation (Replaces Model PR) ---
+            if pr_options.get("create_neuron_repo"):
+                yield "0", "🏗️ Creating new Neuron-optimized repository..."
+                neuron_repo_url = None
+                # Generate the repo name first so we can use it consistently
+                neuron_repo_name = generate_neuron_repo_name(api, model_id, task_or_pipeline, token)
+                repo_creation_gen = create_neuron_repo_and_upload(
+                    operations, model_id, model_type, task_or_pipeline, requesting_user, token
                 )
+                for msg in repo_creation_gen:
+                    if isinstance(msg, str):
+                        yield "0", msg
+                    else:
+                        neuron_repo_url = msg
+                result_info["neuron_repo"] = f"https://huggingface.co/{neuron_repo_name}"
+                # Automatically create a PR on the original model to add a link
+                yield "0", "📝 Creating PR to add Neuron repo link to original model..."
+                readme_pr = None
+                readme_pr_gen = create_readme_pr_for_original_model(
+                    model_id, neuron_repo_name, task_or_pipeline, requesting_user, token
                 )
+                for msg in readme_pr_gen:
+                    if isinstance(msg, str):
+                        yield "0", msg
+                    else:
+                        readme_pr = msg
+                if readme_pr:
+                    result_info["readme_pr"] = f"https://huggingface.co/{model_id}/discussions/{readme_pr.pr_num}"
+            # --- Cache Repository PR ---
+            if pr_options.get("create_cache_pr"):
+                if cache_files_available:
+                    yield "0", "📤 Creating PR in cache repository..."
+                    cache_pr = None
+                    cache_upload_gen = upload_cache_files(cache_mirror_dir, CACHE_REPO_ID, token)
+                    for msg in cache_upload_gen:
+                        if isinstance(msg, str):
+                            yield "0", msg
+                        else:
+                            cache_pr = msg
+                    if cache_pr:
+                        result_info["cache_pr"] = f"https://huggingface.co/{CACHE_REPO_ID}/discussions/{cache_pr.pr_num}"
+                else:
+                    yield "0", "⚠️ No new cache files were generated to upload."
+            # --- Custom Repository PR ---
+            if pr_options.get("create_custom_pr"):
+                custom_repo_id = pr_options["custom_repo_id"]
+                yield "0", f"📤 Creating PR in custom repository: {custom_repo_id}..."
+                custom_pr = None
+                custom_upload_gen = upload_to_custom_repo(operations, custom_repo_id, model_id, requesting_user, token)
+                for msg in custom_upload_gen:
+                    if isinstance(msg, str):
+                        yield "0", msg
+                    else:
+                        custom_pr = msg
+                if custom_pr:
+                    result_info["custom_pr"] = f"https://huggingface.co/{custom_repo_id}/discussions/{custom_pr.pr_num}"
+            yield "0", result_info
+        except Exception as e:
+            yield "1", f"❌ Conversion failed with a critical error: {e}"
+            # Re-raise the exception to be caught by the outer try-except in the Gradio app if needed
+            raise
+def list_cached_models(cache_repo_id: str, token: str = None) -> Dict[str, List[str]]:
+    """
+    List all cached neuronxcc folders in the repository.
+    """
+    try:
+        api = HfApi(token=token)
+        repo_files = api.list_repo_files(cache_repo_id, token=token)
+        # Group files by neuronxcc folder
+        neuronxcc_cache = {}
+        for file_path in repo_files:
+            # Extract neuronxcc folder from path
+            parts = file_path.split('/')
+            if len(parts) >= 3 and parts[0].startswith('neuronxcc-'):
+                neuronxcc_folder = parts[0]
+                module_folder = parts[1]
+                if neuronxcc_folder not in neuronxcc_cache:
+                    neuronxcc_cache[neuronxcc_folder] = set()
+                neuronxcc_cache[neuronxcc_folder].add(module_folder)
+        # Convert sets to lists
+        return {k: list(v) for k, v in neuronxcc_cache.items()}
+    except Exception as e:
+        print(f"Failed to list cached models: {e}")
+        return {}